]> git.saurik.com Git - apple/javascriptcore.git/blob - yarr/YarrPattern.h
JavaScriptCore-1097.3.tar.gz
[apple/javascriptcore.git] / yarr / YarrPattern.h
1 /*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifndef YarrPattern_h
28 #define YarrPattern_h
29
30 #include <runtime/UString.h>
31 #include <wtf/CheckedArithmetic.h>
32 #include <wtf/RefCounted.h>
33 #include <wtf/Vector.h>
34 #include <wtf/unicode/Unicode.h>
35
36 namespace JSC { namespace Yarr {
37
38 struct PatternDisjunction;
39
40 struct CharacterRange {
41 UChar begin;
42 UChar end;
43
44 CharacterRange(UChar begin, UChar end)
45 : begin(begin)
46 , end(end)
47 {
48 }
49 };
50
51 struct CharacterClassTable : RefCounted<CharacterClassTable> {
52 const char* m_table;
53 bool m_inverted;
54 static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted)
55 {
56 return adoptRef(new CharacterClassTable(table, inverted));
57 }
58
59 private:
60 CharacterClassTable(const char* table, bool inverted)
61 : m_table(table)
62 , m_inverted(inverted)
63 {
64 }
65 };
66
67 struct CharacterClass {
68 WTF_MAKE_FAST_ALLOCATED;
69 public:
70 // All CharacterClass instances have to have the full set of matches and ranges,
71 // they may have an optional table for faster lookups (which must match the
72 // specified matches and ranges)
73 CharacterClass(PassRefPtr<CharacterClassTable> table)
74 : m_table(table)
75 {
76 }
77 Vector<UChar> m_matches;
78 Vector<CharacterRange> m_ranges;
79 Vector<UChar> m_matchesUnicode;
80 Vector<CharacterRange> m_rangesUnicode;
81 RefPtr<CharacterClassTable> m_table;
82 };
83
84 enum QuantifierType {
85 QuantifierFixedCount,
86 QuantifierGreedy,
87 QuantifierNonGreedy,
88 };
89
90 struct PatternTerm {
91 enum Type {
92 TypeAssertionBOL,
93 TypeAssertionEOL,
94 TypeAssertionWordBoundary,
95 TypePatternCharacter,
96 TypeCharacterClass,
97 TypeBackReference,
98 TypeForwardReference,
99 TypeParenthesesSubpattern,
100 TypeParentheticalAssertion,
101 TypeDotStarEnclosure,
102 } type;
103 bool m_capture :1;
104 bool m_invert :1;
105 union {
106 UChar patternCharacter;
107 CharacterClass* characterClass;
108 unsigned backReferenceSubpatternId;
109 struct {
110 PatternDisjunction* disjunction;
111 unsigned subpatternId;
112 unsigned lastSubpatternId;
113 bool isCopy;
114 bool isTerminal;
115 } parentheses;
116 struct {
117 bool bolAnchor : 1;
118 bool eolAnchor : 1;
119 } anchors;
120 };
121 QuantifierType quantityType;
122 Checked<unsigned> quantityCount;
123 int inputPosition;
124 unsigned frameLocation;
125
126 PatternTerm(UChar ch)
127 : type(PatternTerm::TypePatternCharacter)
128 , m_capture(false)
129 , m_invert(false)
130 {
131 patternCharacter = ch;
132 quantityType = QuantifierFixedCount;
133 quantityCount = 1;
134 }
135
136 PatternTerm(CharacterClass* charClass, bool invert)
137 : type(PatternTerm::TypeCharacterClass)
138 , m_capture(false)
139 , m_invert(invert)
140 {
141 characterClass = charClass;
142 quantityType = QuantifierFixedCount;
143 quantityCount = 1;
144 }
145
146 PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false)
147 : type(type)
148 , m_capture(capture)
149 , m_invert(invert)
150 {
151 parentheses.disjunction = disjunction;
152 parentheses.subpatternId = subpatternId;
153 parentheses.isCopy = false;
154 parentheses.isTerminal = false;
155 quantityType = QuantifierFixedCount;
156 quantityCount = 1;
157 }
158
159 PatternTerm(Type type, bool invert = false)
160 : type(type)
161 , m_capture(false)
162 , m_invert(invert)
163 {
164 quantityType = QuantifierFixedCount;
165 quantityCount = 1;
166 }
167
168 PatternTerm(unsigned spatternId)
169 : type(TypeBackReference)
170 , m_capture(false)
171 , m_invert(false)
172 {
173 backReferenceSubpatternId = spatternId;
174 quantityType = QuantifierFixedCount;
175 quantityCount = 1;
176 }
177
178 PatternTerm(bool bolAnchor, bool eolAnchor)
179 : type(TypeDotStarEnclosure)
180 , m_capture(false)
181 , m_invert(false)
182 {
183 anchors.bolAnchor = bolAnchor;
184 anchors.eolAnchor = eolAnchor;
185 quantityType = QuantifierFixedCount;
186 quantityCount = 1;
187 }
188
189 static PatternTerm ForwardReference()
190 {
191 return PatternTerm(TypeForwardReference);
192 }
193
194 static PatternTerm BOL()
195 {
196 return PatternTerm(TypeAssertionBOL);
197 }
198
199 static PatternTerm EOL()
200 {
201 return PatternTerm(TypeAssertionEOL);
202 }
203
204 static PatternTerm WordBoundary(bool invert)
205 {
206 return PatternTerm(TypeAssertionWordBoundary, invert);
207 }
208
209 bool invert()
210 {
211 return m_invert;
212 }
213
214 bool capture()
215 {
216 return m_capture;
217 }
218
219 void quantify(unsigned count, QuantifierType type)
220 {
221 quantityCount = count;
222 quantityType = type;
223 }
224 };
225
226 struct PatternAlternative {
227 WTF_MAKE_FAST_ALLOCATED;
228 public:
229 PatternAlternative(PatternDisjunction* disjunction)
230 : m_parent(disjunction)
231 , m_onceThrough(false)
232 , m_hasFixedSize(false)
233 , m_startsWithBOL(false)
234 , m_containsBOL(false)
235 {
236 }
237
238 PatternTerm& lastTerm()
239 {
240 ASSERT(m_terms.size());
241 return m_terms[m_terms.size() - 1];
242 }
243
244 void removeLastTerm()
245 {
246 ASSERT(m_terms.size());
247 m_terms.shrink(m_terms.size() - 1);
248 }
249
250 void setOnceThrough()
251 {
252 m_onceThrough = true;
253 }
254
255 bool onceThrough()
256 {
257 return m_onceThrough;
258 }
259
260 Vector<PatternTerm> m_terms;
261 PatternDisjunction* m_parent;
262 unsigned m_minimumSize;
263 bool m_onceThrough : 1;
264 bool m_hasFixedSize : 1;
265 bool m_startsWithBOL : 1;
266 bool m_containsBOL : 1;
267 };
268
269 struct PatternDisjunction {
270 WTF_MAKE_FAST_ALLOCATED;
271 public:
272 PatternDisjunction(PatternAlternative* parent = 0)
273 : m_parent(parent)
274 , m_hasFixedSize(false)
275 {
276 }
277
278 ~PatternDisjunction()
279 {
280 deleteAllValues(m_alternatives);
281 }
282
283 PatternAlternative* addNewAlternative()
284 {
285 PatternAlternative* alternative = new PatternAlternative(this);
286 m_alternatives.append(alternative);
287 return alternative;
288 }
289
290 Vector<PatternAlternative*> m_alternatives;
291 PatternAlternative* m_parent;
292 unsigned m_minimumSize;
293 unsigned m_callFrameSize;
294 bool m_hasFixedSize;
295 };
296
297 // You probably don't want to be calling these functions directly
298 // (please to be calling newlineCharacterClass() et al on your
299 // friendly neighborhood YarrPattern instance to get nicely
300 // cached copies).
301 CharacterClass* newlineCreate();
302 CharacterClass* digitsCreate();
303 CharacterClass* spacesCreate();
304 CharacterClass* wordcharCreate();
305 CharacterClass* nondigitsCreate();
306 CharacterClass* nonspacesCreate();
307 CharacterClass* nonwordcharCreate();
308
309 struct TermChain {
310 TermChain(PatternTerm term)
311 : term(term)
312 {}
313
314 PatternTerm term;
315 Vector<TermChain> hotTerms;
316 };
317
318 struct YarrPattern {
319 JS_EXPORT_PRIVATE YarrPattern(const UString& pattern, bool ignoreCase, bool multiline, const char** error);
320
321 ~YarrPattern()
322 {
323 deleteAllValues(m_disjunctions);
324 deleteAllValues(m_userCharacterClasses);
325 }
326
327 void reset()
328 {
329 m_numSubpatterns = 0;
330 m_maxBackReference = 0;
331
332 m_containsBackreferences = false;
333 m_containsBOL = false;
334
335 newlineCached = 0;
336 digitsCached = 0;
337 spacesCached = 0;
338 wordcharCached = 0;
339 nondigitsCached = 0;
340 nonspacesCached = 0;
341 nonwordcharCached = 0;
342
343 deleteAllValues(m_disjunctions);
344 m_disjunctions.clear();
345 deleteAllValues(m_userCharacterClasses);
346 m_userCharacterClasses.clear();
347 }
348
349 bool containsIllegalBackReference()
350 {
351 return m_maxBackReference > m_numSubpatterns;
352 }
353
354 CharacterClass* newlineCharacterClass()
355 {
356 if (!newlineCached)
357 m_userCharacterClasses.append(newlineCached = newlineCreate());
358 return newlineCached;
359 }
360 CharacterClass* digitsCharacterClass()
361 {
362 if (!digitsCached)
363 m_userCharacterClasses.append(digitsCached = digitsCreate());
364 return digitsCached;
365 }
366 CharacterClass* spacesCharacterClass()
367 {
368 if (!spacesCached)
369 m_userCharacterClasses.append(spacesCached = spacesCreate());
370 return spacesCached;
371 }
372 CharacterClass* wordcharCharacterClass()
373 {
374 if (!wordcharCached)
375 m_userCharacterClasses.append(wordcharCached = wordcharCreate());
376 return wordcharCached;
377 }
378 CharacterClass* nondigitsCharacterClass()
379 {
380 if (!nondigitsCached)
381 m_userCharacterClasses.append(nondigitsCached = nondigitsCreate());
382 return nondigitsCached;
383 }
384 CharacterClass* nonspacesCharacterClass()
385 {
386 if (!nonspacesCached)
387 m_userCharacterClasses.append(nonspacesCached = nonspacesCreate());
388 return nonspacesCached;
389 }
390 CharacterClass* nonwordcharCharacterClass()
391 {
392 if (!nonwordcharCached)
393 m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate());
394 return nonwordcharCached;
395 }
396
397 bool m_ignoreCase : 1;
398 bool m_multiline : 1;
399 bool m_containsBackreferences : 1;
400 bool m_containsBOL : 1;
401 unsigned m_numSubpatterns;
402 unsigned m_maxBackReference;
403 PatternDisjunction* m_body;
404 Vector<PatternDisjunction*, 4> m_disjunctions;
405 Vector<CharacterClass*> m_userCharacterClasses;
406
407 private:
408 const char* compile(const UString& patternString);
409
410 CharacterClass* newlineCached;
411 CharacterClass* digitsCached;
412 CharacterClass* spacesCached;
413 CharacterClass* wordcharCached;
414 CharacterClass* nondigitsCached;
415 CharacterClass* nonspacesCached;
416 CharacterClass* nonwordcharCached;
417 };
418
419 } } // namespace JSC::Yarr
420
421 #endif // YarrPattern_h