]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (C) 2009 Apple Inc. All rights reserved. | |
3 | * | |
4 | * Redistribution and use in source and binary forms, with or without | |
5 | * modification, are permitted provided that the following conditions | |
6 | * are met: | |
7 | * 1. Redistributions of source code must retain the above copyright | |
8 | * notice, this list of conditions and the following disclaimer. | |
9 | * 2. Redistributions in binary form must reproduce the above copyright | |
10 | * notice, this list of conditions and the following disclaimer in the | |
11 | * documentation and/or other materials provided with the distribution. | |
12 | * | |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
24 | */ | |
25 | ||
26 | #ifndef RegexPattern_h | |
27 | #define RegexPattern_h | |
28 | ||
29 | #include <wtf/Platform.h> | |
30 | ||
31 | #if ENABLE(YARR) | |
32 | ||
33 | #include <wtf/Vector.h> | |
34 | #include <wtf/unicode/Unicode.h> | |
35 | ||
36 | ||
37 | namespace JSC { namespace Yarr { | |
38 | ||
39 | #define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers. | |
40 | #define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers. | |
41 | #define RegexStackSpaceForBackTrackInfoBackReference 2 | |
42 | #define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative. | |
43 | #define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1 | |
44 | #define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers. | |
45 | #define RegexStackSpaceForBackTrackInfoParentheses 4 | |
46 | ||
47 | struct PatternDisjunction; | |
48 | ||
49 | struct CharacterRange { | |
50 | UChar begin; | |
51 | UChar end; | |
52 | ||
53 | CharacterRange(UChar begin, UChar end) | |
54 | : begin(begin) | |
55 | , end(end) | |
56 | { | |
57 | } | |
58 | }; | |
59 | ||
60 | struct CharacterClass { | |
61 | Vector<UChar> m_matches; | |
62 | Vector<CharacterRange> m_ranges; | |
63 | Vector<UChar> m_matchesUnicode; | |
64 | Vector<CharacterRange> m_rangesUnicode; | |
65 | }; | |
66 | ||
67 | enum QuantifierType { | |
68 | QuantifierFixedCount, | |
69 | QuantifierGreedy, | |
70 | QuantifierNonGreedy, | |
71 | }; | |
72 | ||
73 | struct PatternTerm { | |
74 | enum Type { | |
75 | TypeAssertionBOL, | |
76 | TypeAssertionEOL, | |
77 | TypeAssertionWordBoundary, | |
78 | TypePatternCharacter, | |
79 | TypeCharacterClass, | |
80 | TypeBackReference, | |
81 | TypeForwardReference, | |
82 | TypeParenthesesSubpattern, | |
83 | TypeParentheticalAssertion, | |
84 | } type; | |
85 | bool invertOrCapture; | |
86 | union { | |
87 | UChar patternCharacter; | |
88 | CharacterClass* characterClass; | |
89 | unsigned subpatternId; | |
90 | struct { | |
91 | PatternDisjunction* disjunction; | |
92 | unsigned subpatternId; | |
93 | unsigned lastSubpatternId; | |
94 | bool isCopy; | |
95 | } parentheses; | |
96 | }; | |
97 | QuantifierType quantityType; | |
98 | unsigned quantityCount; | |
99 | int inputPosition; | |
100 | unsigned frameLocation; | |
101 | ||
102 | PatternTerm(UChar ch) | |
103 | : type(PatternTerm::TypePatternCharacter) | |
104 | { | |
105 | patternCharacter = ch; | |
106 | quantityType = QuantifierFixedCount; | |
107 | quantityCount = 1; | |
108 | } | |
109 | ||
110 | PatternTerm(CharacterClass* charClass, bool invert) | |
111 | : type(PatternTerm::TypeCharacterClass) | |
112 | , invertOrCapture(invert) | |
113 | { | |
114 | characterClass = charClass; | |
115 | quantityType = QuantifierFixedCount; | |
116 | quantityCount = 1; | |
117 | } | |
118 | ||
119 | PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool invertOrCapture) | |
120 | : type(type) | |
121 | , invertOrCapture(invertOrCapture) | |
122 | { | |
123 | parentheses.disjunction = disjunction; | |
124 | parentheses.subpatternId = subpatternId; | |
125 | parentheses.isCopy = false; | |
126 | quantityType = QuantifierFixedCount; | |
127 | quantityCount = 1; | |
128 | } | |
129 | ||
130 | PatternTerm(Type type, bool invert = false) | |
131 | : type(type) | |
132 | , invertOrCapture(invert) | |
133 | { | |
134 | quantityType = QuantifierFixedCount; | |
135 | quantityCount = 1; | |
136 | } | |
137 | ||
138 | PatternTerm(unsigned spatternId) | |
139 | : type(TypeBackReference) | |
140 | , invertOrCapture(invertOrCapture) | |
141 | { | |
142 | subpatternId = spatternId; | |
143 | quantityType = QuantifierFixedCount; | |
144 | quantityCount = 1; | |
145 | } | |
146 | ||
147 | static PatternTerm ForwardReference() | |
148 | { | |
149 | return PatternTerm(TypeForwardReference); | |
150 | } | |
151 | ||
152 | static PatternTerm BOL() | |
153 | { | |
154 | return PatternTerm(TypeAssertionBOL); | |
155 | } | |
156 | ||
157 | static PatternTerm EOL() | |
158 | { | |
159 | return PatternTerm(TypeAssertionEOL); | |
160 | } | |
161 | ||
162 | static PatternTerm WordBoundary(bool invert) | |
163 | { | |
164 | return PatternTerm(TypeAssertionWordBoundary, invert); | |
165 | } | |
166 | ||
167 | bool invert() | |
168 | { | |
169 | return invertOrCapture; | |
170 | } | |
171 | ||
172 | bool capture() | |
173 | { | |
174 | return invertOrCapture; | |
175 | } | |
176 | ||
177 | void quantify(unsigned count, QuantifierType type) | |
178 | { | |
179 | quantityCount = count; | |
180 | quantityType = type; | |
181 | } | |
182 | }; | |
183 | ||
184 | struct PatternAlternative { | |
185 | PatternAlternative(PatternDisjunction* disjunction) | |
186 | : m_parent(disjunction) | |
187 | { | |
188 | } | |
189 | ||
190 | PatternTerm& lastTerm() | |
191 | { | |
192 | ASSERT(m_terms.size()); | |
193 | return m_terms[m_terms.size() - 1]; | |
194 | } | |
195 | ||
196 | void removeLastTerm() | |
197 | { | |
198 | ASSERT(m_terms.size()); | |
199 | m_terms.shrink(m_terms.size() - 1); | |
200 | } | |
201 | ||
202 | Vector<PatternTerm> m_terms; | |
203 | PatternDisjunction* m_parent; | |
204 | unsigned m_minimumSize; | |
205 | bool m_hasFixedSize; | |
206 | }; | |
207 | ||
208 | struct PatternDisjunction { | |
209 | PatternDisjunction(PatternAlternative* parent = 0) | |
210 | : m_parent(parent) | |
211 | { | |
212 | } | |
213 | ||
214 | ~PatternDisjunction() | |
215 | { | |
216 | deleteAllValues(m_alternatives); | |
217 | } | |
218 | ||
219 | PatternAlternative* addNewAlternative() | |
220 | { | |
221 | PatternAlternative* alternative = new PatternAlternative(this); | |
222 | m_alternatives.append(alternative); | |
223 | return alternative; | |
224 | } | |
225 | ||
226 | Vector<PatternAlternative*> m_alternatives; | |
227 | PatternAlternative* m_parent; | |
228 | unsigned m_minimumSize; | |
229 | unsigned m_callFrameSize; | |
230 | bool m_hasFixedSize; | |
231 | }; | |
232 | ||
233 | // You probably don't want to be calling these functions directly | |
234 | // (please to be calling newlineCharacterClass() et al on your | |
235 | // friendly neighborhood RegexPattern instance to get nicely | |
236 | // cached copies). | |
237 | CharacterClass* newlineCreate(); | |
238 | CharacterClass* digitsCreate(); | |
239 | CharacterClass* spacesCreate(); | |
240 | CharacterClass* wordcharCreate(); | |
241 | CharacterClass* nondigitsCreate(); | |
242 | CharacterClass* nonspacesCreate(); | |
243 | CharacterClass* nonwordcharCreate(); | |
244 | ||
245 | struct RegexPattern { | |
246 | RegexPattern(bool ignoreCase, bool multiline) | |
247 | : m_ignoreCase(ignoreCase) | |
248 | , m_multiline(multiline) | |
249 | , m_numSubpatterns(0) | |
250 | , m_maxBackReference(0) | |
251 | , newlineCached(0) | |
252 | , digitsCached(0) | |
253 | , spacesCached(0) | |
254 | , wordcharCached(0) | |
255 | , nondigitsCached(0) | |
256 | , nonspacesCached(0) | |
257 | , nonwordcharCached(0) | |
258 | { | |
259 | } | |
260 | ||
261 | ~RegexPattern() | |
262 | { | |
263 | deleteAllValues(m_disjunctions); | |
264 | deleteAllValues(m_userCharacterClasses); | |
265 | } | |
266 | ||
267 | void reset() | |
268 | { | |
269 | m_numSubpatterns = 0; | |
270 | m_maxBackReference = 0; | |
271 | ||
272 | newlineCached = 0; | |
273 | digitsCached = 0; | |
274 | spacesCached = 0; | |
275 | wordcharCached = 0; | |
276 | nondigitsCached = 0; | |
277 | nonspacesCached = 0; | |
278 | nonwordcharCached = 0; | |
279 | ||
280 | deleteAllValues(m_disjunctions); | |
281 | m_disjunctions.clear(); | |
282 | deleteAllValues(m_userCharacterClasses); | |
283 | m_userCharacterClasses.clear(); | |
284 | } | |
285 | ||
286 | bool containsIllegalBackReference() | |
287 | { | |
288 | return m_maxBackReference > m_numSubpatterns; | |
289 | } | |
290 | ||
291 | CharacterClass* newlineCharacterClass() | |
292 | { | |
293 | if (!newlineCached) | |
294 | m_userCharacterClasses.append(newlineCached = newlineCreate()); | |
295 | return newlineCached; | |
296 | } | |
297 | CharacterClass* digitsCharacterClass() | |
298 | { | |
299 | if (!digitsCached) | |
300 | m_userCharacterClasses.append(digitsCached = digitsCreate()); | |
301 | return digitsCached; | |
302 | } | |
303 | CharacterClass* spacesCharacterClass() | |
304 | { | |
305 | if (!spacesCached) | |
306 | m_userCharacterClasses.append(spacesCached = spacesCreate()); | |
307 | return spacesCached; | |
308 | } | |
309 | CharacterClass* wordcharCharacterClass() | |
310 | { | |
311 | if (!wordcharCached) | |
312 | m_userCharacterClasses.append(wordcharCached = wordcharCreate()); | |
313 | return wordcharCached; | |
314 | } | |
315 | CharacterClass* nondigitsCharacterClass() | |
316 | { | |
317 | if (!nondigitsCached) | |
318 | m_userCharacterClasses.append(nondigitsCached = nondigitsCreate()); | |
319 | return nondigitsCached; | |
320 | } | |
321 | CharacterClass* nonspacesCharacterClass() | |
322 | { | |
323 | if (!nonspacesCached) | |
324 | m_userCharacterClasses.append(nonspacesCached = nonspacesCreate()); | |
325 | return nonspacesCached; | |
326 | } | |
327 | CharacterClass* nonwordcharCharacterClass() | |
328 | { | |
329 | if (!nonwordcharCached) | |
330 | m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate()); | |
331 | return nonwordcharCached; | |
332 | } | |
333 | ||
334 | bool m_ignoreCase; | |
335 | bool m_multiline; | |
336 | unsigned m_numSubpatterns; | |
337 | unsigned m_maxBackReference; | |
338 | PatternDisjunction* m_body; | |
339 | Vector<PatternDisjunction*, 4> m_disjunctions; | |
340 | Vector<CharacterClass*> m_userCharacterClasses; | |
341 | ||
342 | private: | |
343 | CharacterClass* newlineCached; | |
344 | CharacterClass* digitsCached; | |
345 | CharacterClass* spacesCached; | |
346 | CharacterClass* wordcharCached; | |
347 | CharacterClass* nondigitsCached; | |
348 | CharacterClass* nonspacesCached; | |
349 | CharacterClass* nonwordcharCached; | |
350 | }; | |
351 | ||
352 | } } // namespace JSC::Yarr | |
353 | ||
354 | #endif | |
355 | ||
356 | #endif // RegexPattern_h |