]> git.saurik.com Git - apple/javascriptcore.git/blob - yarr/YarrPattern.h
JavaScriptCore-903.tar.gz
[apple/javascriptcore.git] / yarr / YarrPattern.h
1 /*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifndef YarrPattern_h
28 #define YarrPattern_h
29
30 #include <runtime/UString.h>
31 #include <wtf/Vector.h>
32 #include <wtf/unicode/Unicode.h>
33
34 namespace JSC { namespace Yarr {
35
36 struct PatternDisjunction;
37
38 struct CharacterRange {
39 UChar begin;
40 UChar end;
41
42 CharacterRange(UChar begin, UChar end)
43 : begin(begin)
44 , end(end)
45 {
46 }
47 };
48
49 struct CharacterClassTable : RefCounted<CharacterClassTable> {
50 const char* m_table;
51 bool m_inverted;
52 static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted)
53 {
54 return adoptRef(new CharacterClassTable(table, inverted));
55 }
56
57 private:
58 CharacterClassTable(const char* table, bool inverted)
59 : m_table(table)
60 , m_inverted(inverted)
61 {
62 }
63 };
64
65 struct CharacterClass {
66 WTF_MAKE_FAST_ALLOCATED;
67 public:
68 // All CharacterClass instances have to have the full set of matches and ranges,
69 // they may have an optional table for faster lookups (which must match the
70 // specified matches and ranges)
71 CharacterClass(PassRefPtr<CharacterClassTable> table)
72 : m_table(table)
73 {
74 }
75 Vector<UChar> m_matches;
76 Vector<CharacterRange> m_ranges;
77 Vector<UChar> m_matchesUnicode;
78 Vector<CharacterRange> m_rangesUnicode;
79 RefPtr<CharacterClassTable> m_table;
80 };
81
82 enum QuantifierType {
83 QuantifierFixedCount,
84 QuantifierGreedy,
85 QuantifierNonGreedy,
86 };
87
88 struct PatternTerm {
89 enum Type {
90 TypeAssertionBOL,
91 TypeAssertionEOL,
92 TypeAssertionWordBoundary,
93 TypePatternCharacter,
94 TypeCharacterClass,
95 TypeBackReference,
96 TypeForwardReference,
97 TypeParenthesesSubpattern,
98 TypeParentheticalAssertion,
99 TypeDotStarEnclosure,
100 } type;
101 bool m_capture :1;
102 bool m_invert :1;
103 union {
104 UChar patternCharacter;
105 CharacterClass* characterClass;
106 unsigned backReferenceSubpatternId;
107 struct {
108 PatternDisjunction* disjunction;
109 unsigned subpatternId;
110 unsigned lastSubpatternId;
111 bool isCopy;
112 bool isTerminal;
113 } parentheses;
114 struct {
115 bool bolAnchor : 1;
116 bool eolAnchor : 1;
117 } anchors;
118 };
119 QuantifierType quantityType;
120 unsigned quantityCount;
121 int inputPosition;
122 unsigned frameLocation;
123
124 PatternTerm(UChar ch)
125 : type(PatternTerm::TypePatternCharacter)
126 , m_capture(false)
127 , m_invert(false)
128 {
129 patternCharacter = ch;
130 quantityType = QuantifierFixedCount;
131 quantityCount = 1;
132 }
133
134 PatternTerm(CharacterClass* charClass, bool invert)
135 : type(PatternTerm::TypeCharacterClass)
136 , m_capture(false)
137 , m_invert(invert)
138 {
139 characterClass = charClass;
140 quantityType = QuantifierFixedCount;
141 quantityCount = 1;
142 }
143
144 PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false)
145 : type(type)
146 , m_capture(capture)
147 , m_invert(invert)
148 {
149 parentheses.disjunction = disjunction;
150 parentheses.subpatternId = subpatternId;
151 parentheses.isCopy = false;
152 parentheses.isTerminal = false;
153 quantityType = QuantifierFixedCount;
154 quantityCount = 1;
155 }
156
157 PatternTerm(Type type, bool invert = false)
158 : type(type)
159 , m_capture(false)
160 , m_invert(invert)
161 {
162 quantityType = QuantifierFixedCount;
163 quantityCount = 1;
164 }
165
166 PatternTerm(unsigned spatternId)
167 : type(TypeBackReference)
168 , m_capture(false)
169 , m_invert(false)
170 {
171 backReferenceSubpatternId = spatternId;
172 quantityType = QuantifierFixedCount;
173 quantityCount = 1;
174 }
175
176 PatternTerm(bool bolAnchor, bool eolAnchor)
177 : type(TypeDotStarEnclosure)
178 , m_capture(false)
179 , m_invert(false)
180 {
181 anchors.bolAnchor = bolAnchor;
182 anchors.eolAnchor = eolAnchor;
183 quantityType = QuantifierFixedCount;
184 quantityCount = 1;
185 }
186
187 static PatternTerm ForwardReference()
188 {
189 return PatternTerm(TypeForwardReference);
190 }
191
192 static PatternTerm BOL()
193 {
194 return PatternTerm(TypeAssertionBOL);
195 }
196
197 static PatternTerm EOL()
198 {
199 return PatternTerm(TypeAssertionEOL);
200 }
201
202 static PatternTerm WordBoundary(bool invert)
203 {
204 return PatternTerm(TypeAssertionWordBoundary, invert);
205 }
206
207 bool invert()
208 {
209 return m_invert;
210 }
211
212 bool capture()
213 {
214 return m_capture;
215 }
216
217 void quantify(unsigned count, QuantifierType type)
218 {
219 quantityCount = count;
220 quantityType = type;
221 }
222 };
223
224 struct PatternAlternative {
225 WTF_MAKE_FAST_ALLOCATED;
226 public:
227 PatternAlternative(PatternDisjunction* disjunction)
228 : m_parent(disjunction)
229 , m_onceThrough(false)
230 , m_hasFixedSize(false)
231 , m_startsWithBOL(false)
232 , m_containsBOL(false)
233 {
234 }
235
236 PatternTerm& lastTerm()
237 {
238 ASSERT(m_terms.size());
239 return m_terms[m_terms.size() - 1];
240 }
241
242 void removeLastTerm()
243 {
244 ASSERT(m_terms.size());
245 m_terms.shrink(m_terms.size() - 1);
246 }
247
248 void setOnceThrough()
249 {
250 m_onceThrough = true;
251 }
252
253 bool onceThrough()
254 {
255 return m_onceThrough;
256 }
257
258 Vector<PatternTerm> m_terms;
259 PatternDisjunction* m_parent;
260 unsigned m_minimumSize;
261 bool m_onceThrough : 1;
262 bool m_hasFixedSize : 1;
263 bool m_startsWithBOL : 1;
264 bool m_containsBOL : 1;
265 };
266
267 struct PatternDisjunction {
268 WTF_MAKE_FAST_ALLOCATED;
269 public:
270 PatternDisjunction(PatternAlternative* parent = 0)
271 : m_parent(parent)
272 , m_hasFixedSize(false)
273 {
274 }
275
276 ~PatternDisjunction()
277 {
278 deleteAllValues(m_alternatives);
279 }
280
281 PatternAlternative* addNewAlternative()
282 {
283 PatternAlternative* alternative = new PatternAlternative(this);
284 m_alternatives.append(alternative);
285 return alternative;
286 }
287
288 Vector<PatternAlternative*> m_alternatives;
289 PatternAlternative* m_parent;
290 unsigned m_minimumSize;
291 unsigned m_callFrameSize;
292 bool m_hasFixedSize;
293 };
294
295 // You probably don't want to be calling these functions directly
296 // (please to be calling newlineCharacterClass() et al on your
297 // friendly neighborhood YarrPattern instance to get nicely
298 // cached copies).
299 CharacterClass* newlineCreate();
300 CharacterClass* digitsCreate();
301 CharacterClass* spacesCreate();
302 CharacterClass* wordcharCreate();
303 CharacterClass* nondigitsCreate();
304 CharacterClass* nonspacesCreate();
305 CharacterClass* nonwordcharCreate();
306
307 struct TermChain {
308 TermChain(PatternTerm term)
309 : term(term)
310 {}
311
312 PatternTerm term;
313 Vector<TermChain> hotTerms;
314 };
315
316 struct YarrPattern {
317 YarrPattern(const UString& pattern, bool ignoreCase, bool multiline, const char** error);
318
319 ~YarrPattern()
320 {
321 deleteAllValues(m_disjunctions);
322 deleteAllValues(m_userCharacterClasses);
323 }
324
325 void reset()
326 {
327 m_numSubpatterns = 0;
328 m_maxBackReference = 0;
329
330 m_containsBackreferences = false;
331 m_containsBOL = false;
332
333 newlineCached = 0;
334 digitsCached = 0;
335 spacesCached = 0;
336 wordcharCached = 0;
337 nondigitsCached = 0;
338 nonspacesCached = 0;
339 nonwordcharCached = 0;
340
341 deleteAllValues(m_disjunctions);
342 m_disjunctions.clear();
343 deleteAllValues(m_userCharacterClasses);
344 m_userCharacterClasses.clear();
345 }
346
347 bool containsIllegalBackReference()
348 {
349 return m_maxBackReference > m_numSubpatterns;
350 }
351
352 CharacterClass* newlineCharacterClass()
353 {
354 if (!newlineCached)
355 m_userCharacterClasses.append(newlineCached = newlineCreate());
356 return newlineCached;
357 }
358 CharacterClass* digitsCharacterClass()
359 {
360 if (!digitsCached)
361 m_userCharacterClasses.append(digitsCached = digitsCreate());
362 return digitsCached;
363 }
364 CharacterClass* spacesCharacterClass()
365 {
366 if (!spacesCached)
367 m_userCharacterClasses.append(spacesCached = spacesCreate());
368 return spacesCached;
369 }
370 CharacterClass* wordcharCharacterClass()
371 {
372 if (!wordcharCached)
373 m_userCharacterClasses.append(wordcharCached = wordcharCreate());
374 return wordcharCached;
375 }
376 CharacterClass* nondigitsCharacterClass()
377 {
378 if (!nondigitsCached)
379 m_userCharacterClasses.append(nondigitsCached = nondigitsCreate());
380 return nondigitsCached;
381 }
382 CharacterClass* nonspacesCharacterClass()
383 {
384 if (!nonspacesCached)
385 m_userCharacterClasses.append(nonspacesCached = nonspacesCreate());
386 return nonspacesCached;
387 }
388 CharacterClass* nonwordcharCharacterClass()
389 {
390 if (!nonwordcharCached)
391 m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate());
392 return nonwordcharCached;
393 }
394
395 bool m_ignoreCase : 1;
396 bool m_multiline : 1;
397 bool m_containsBackreferences : 1;
398 bool m_containsBOL : 1;
399 unsigned m_numSubpatterns;
400 unsigned m_maxBackReference;
401 PatternDisjunction* m_body;
402 Vector<PatternDisjunction*, 4> m_disjunctions;
403 Vector<CharacterClass*> m_userCharacterClasses;
404
405 private:
406 const char* compile(const UString& patternString);
407
408 CharacterClass* newlineCached;
409 CharacterClass* digitsCached;
410 CharacterClass* spacesCached;
411 CharacterClass* wordcharCached;
412 CharacterClass* nondigitsCached;
413 CharacterClass* nonspacesCached;
414 CharacterClass* nonwordcharCached;
415 };
416
417 } } // namespace JSC::Yarr
418
419 #endif // YarrPattern_h