]> git.saurik.com Git - apple/javascriptcore.git/blame_incremental - yarr/RegexPattern.h
JavaScriptCore-584.tar.gz
[apple/javascriptcore.git] / yarr / RegexPattern.h
... / ...
CommitLineData
1/*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#ifndef RegexPattern_h
27#define RegexPattern_h
28
29#include <wtf/Platform.h>
30
31#if ENABLE(YARR)
32
33#include <wtf/Vector.h>
34#include <wtf/unicode/Unicode.h>
35
36
37namespace JSC { namespace Yarr {
38
39#define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers.
40#define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers.
41#define RegexStackSpaceForBackTrackInfoBackReference 2
42#define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
43#define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1
44#define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
45#define RegexStackSpaceForBackTrackInfoParentheses 4
46
47struct PatternDisjunction;
48
49struct CharacterRange {
50 UChar begin;
51 UChar end;
52
53 CharacterRange(UChar begin, UChar end)
54 : begin(begin)
55 , end(end)
56 {
57 }
58};
59
60struct CharacterClass : FastAllocBase {
61 Vector<UChar> m_matches;
62 Vector<CharacterRange> m_ranges;
63 Vector<UChar> m_matchesUnicode;
64 Vector<CharacterRange> m_rangesUnicode;
65};
66
67enum QuantifierType {
68 QuantifierFixedCount,
69 QuantifierGreedy,
70 QuantifierNonGreedy,
71};
72
73struct PatternTerm {
74 enum Type {
75 TypeAssertionBOL,
76 TypeAssertionEOL,
77 TypeAssertionWordBoundary,
78 TypePatternCharacter,
79 TypeCharacterClass,
80 TypeBackReference,
81 TypeForwardReference,
82 TypeParenthesesSubpattern,
83 TypeParentheticalAssertion,
84 } type;
85 bool invertOrCapture;
86 union {
87 UChar patternCharacter;
88 CharacterClass* characterClass;
89 unsigned subpatternId;
90 struct {
91 PatternDisjunction* disjunction;
92 unsigned subpatternId;
93 unsigned lastSubpatternId;
94 bool isCopy;
95 } parentheses;
96 };
97 QuantifierType quantityType;
98 unsigned quantityCount;
99 int inputPosition;
100 unsigned frameLocation;
101
102 PatternTerm(UChar ch)
103 : type(PatternTerm::TypePatternCharacter)
104 {
105 patternCharacter = ch;
106 quantityType = QuantifierFixedCount;
107 quantityCount = 1;
108 }
109
110 PatternTerm(CharacterClass* charClass, bool invert)
111 : type(PatternTerm::TypeCharacterClass)
112 , invertOrCapture(invert)
113 {
114 characterClass = charClass;
115 quantityType = QuantifierFixedCount;
116 quantityCount = 1;
117 }
118
119 PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool invertOrCapture)
120 : type(type)
121 , invertOrCapture(invertOrCapture)
122 {
123 parentheses.disjunction = disjunction;
124 parentheses.subpatternId = subpatternId;
125 parentheses.isCopy = false;
126 quantityType = QuantifierFixedCount;
127 quantityCount = 1;
128 }
129
130 PatternTerm(Type type, bool invert = false)
131 : type(type)
132 , invertOrCapture(invert)
133 {
134 quantityType = QuantifierFixedCount;
135 quantityCount = 1;
136 }
137
138 PatternTerm(unsigned spatternId)
139 : type(TypeBackReference)
140 , invertOrCapture(false)
141 {
142 subpatternId = spatternId;
143 quantityType = QuantifierFixedCount;
144 quantityCount = 1;
145 }
146
147 static PatternTerm ForwardReference()
148 {
149 return PatternTerm(TypeForwardReference);
150 }
151
152 static PatternTerm BOL()
153 {
154 return PatternTerm(TypeAssertionBOL);
155 }
156
157 static PatternTerm EOL()
158 {
159 return PatternTerm(TypeAssertionEOL);
160 }
161
162 static PatternTerm WordBoundary(bool invert)
163 {
164 return PatternTerm(TypeAssertionWordBoundary, invert);
165 }
166
167 bool invert()
168 {
169 return invertOrCapture;
170 }
171
172 bool capture()
173 {
174 return invertOrCapture;
175 }
176
177 void quantify(unsigned count, QuantifierType type)
178 {
179 quantityCount = count;
180 quantityType = type;
181 }
182};
183
184struct PatternAlternative : FastAllocBase {
185 PatternAlternative(PatternDisjunction* disjunction)
186 : m_parent(disjunction)
187 {
188 }
189
190 PatternTerm& lastTerm()
191 {
192 ASSERT(m_terms.size());
193 return m_terms[m_terms.size() - 1];
194 }
195
196 void removeLastTerm()
197 {
198 ASSERT(m_terms.size());
199 m_terms.shrink(m_terms.size() - 1);
200 }
201
202 Vector<PatternTerm> m_terms;
203 PatternDisjunction* m_parent;
204 unsigned m_minimumSize;
205 bool m_hasFixedSize;
206};
207
208struct PatternDisjunction : FastAllocBase {
209 PatternDisjunction(PatternAlternative* parent = 0)
210 : m_parent(parent)
211 {
212 }
213
214 ~PatternDisjunction()
215 {
216 deleteAllValues(m_alternatives);
217 }
218
219 PatternAlternative* addNewAlternative()
220 {
221 PatternAlternative* alternative = new PatternAlternative(this);
222 m_alternatives.append(alternative);
223 return alternative;
224 }
225
226 Vector<PatternAlternative*> m_alternatives;
227 PatternAlternative* m_parent;
228 unsigned m_minimumSize;
229 unsigned m_callFrameSize;
230 bool m_hasFixedSize;
231};
232
233// You probably don't want to be calling these functions directly
234// (please to be calling newlineCharacterClass() et al on your
235// friendly neighborhood RegexPattern instance to get nicely
236// cached copies).
237CharacterClass* newlineCreate();
238CharacterClass* digitsCreate();
239CharacterClass* spacesCreate();
240CharacterClass* wordcharCreate();
241CharacterClass* nondigitsCreate();
242CharacterClass* nonspacesCreate();
243CharacterClass* nonwordcharCreate();
244
245struct RegexPattern {
246 RegexPattern(bool ignoreCase, bool multiline)
247 : m_ignoreCase(ignoreCase)
248 , m_multiline(multiline)
249 , m_numSubpatterns(0)
250 , m_maxBackReference(0)
251 , newlineCached(0)
252 , digitsCached(0)
253 , spacesCached(0)
254 , wordcharCached(0)
255 , nondigitsCached(0)
256 , nonspacesCached(0)
257 , nonwordcharCached(0)
258 {
259 }
260
261 ~RegexPattern()
262 {
263 deleteAllValues(m_disjunctions);
264 deleteAllValues(m_userCharacterClasses);
265 }
266
267 void reset()
268 {
269 m_numSubpatterns = 0;
270 m_maxBackReference = 0;
271
272 newlineCached = 0;
273 digitsCached = 0;
274 spacesCached = 0;
275 wordcharCached = 0;
276 nondigitsCached = 0;
277 nonspacesCached = 0;
278 nonwordcharCached = 0;
279
280 deleteAllValues(m_disjunctions);
281 m_disjunctions.clear();
282 deleteAllValues(m_userCharacterClasses);
283 m_userCharacterClasses.clear();
284 }
285
286 bool containsIllegalBackReference()
287 {
288 return m_maxBackReference > m_numSubpatterns;
289 }
290
291 CharacterClass* newlineCharacterClass()
292 {
293 if (!newlineCached)
294 m_userCharacterClasses.append(newlineCached = newlineCreate());
295 return newlineCached;
296 }
297 CharacterClass* digitsCharacterClass()
298 {
299 if (!digitsCached)
300 m_userCharacterClasses.append(digitsCached = digitsCreate());
301 return digitsCached;
302 }
303 CharacterClass* spacesCharacterClass()
304 {
305 if (!spacesCached)
306 m_userCharacterClasses.append(spacesCached = spacesCreate());
307 return spacesCached;
308 }
309 CharacterClass* wordcharCharacterClass()
310 {
311 if (!wordcharCached)
312 m_userCharacterClasses.append(wordcharCached = wordcharCreate());
313 return wordcharCached;
314 }
315 CharacterClass* nondigitsCharacterClass()
316 {
317 if (!nondigitsCached)
318 m_userCharacterClasses.append(nondigitsCached = nondigitsCreate());
319 return nondigitsCached;
320 }
321 CharacterClass* nonspacesCharacterClass()
322 {
323 if (!nonspacesCached)
324 m_userCharacterClasses.append(nonspacesCached = nonspacesCreate());
325 return nonspacesCached;
326 }
327 CharacterClass* nonwordcharCharacterClass()
328 {
329 if (!nonwordcharCached)
330 m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate());
331 return nonwordcharCached;
332 }
333
334 bool m_ignoreCase;
335 bool m_multiline;
336 unsigned m_numSubpatterns;
337 unsigned m_maxBackReference;
338 PatternDisjunction* m_body;
339 Vector<PatternDisjunction*, 4> m_disjunctions;
340 Vector<CharacterClass*> m_userCharacterClasses;
341
342private:
343 CharacterClass* newlineCached;
344 CharacterClass* digitsCached;
345 CharacterClass* spacesCached;
346 CharacterClass* wordcharCached;
347 CharacterClass* nondigitsCached;
348 CharacterClass* nonspacesCached;
349 CharacterClass* nonwordcharCached;
350};
351
352} } // namespace JSC::Yarr
353
354#endif
355
356#endif // RegexPattern_h