]> git.saurik.com Git - apple/javascriptcore.git/blob - yarr/YarrPattern.h
JavaScriptCore-1218.34.tar.gz
[apple/javascriptcore.git] / yarr / YarrPattern.h
1 /*
2 * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifndef YarrPattern_h
28 #define YarrPattern_h
29
30 #include <wtf/CheckedArithmetic.h>
31 #include <wtf/OwnPtr.h>
32 #include <wtf/PassOwnPtr.h>
33 #include <wtf/RefCounted.h>
34 #include <wtf/Vector.h>
35 #include <wtf/text/WTFString.h>
36 #include <wtf/unicode/Unicode.h>
37
38 namespace JSC { namespace Yarr {
39
40 struct PatternDisjunction;
41
42 struct CharacterRange {
43 UChar begin;
44 UChar end;
45
46 CharacterRange(UChar begin, UChar end)
47 : begin(begin)
48 , end(end)
49 {
50 }
51 };
52
53 struct CharacterClass {
54 WTF_MAKE_FAST_ALLOCATED;
55 public:
56 // All CharacterClass instances have to have the full set of matches and ranges,
57 // they may have an optional m_table for faster lookups (which must match the
58 // specified matches and ranges)
59 CharacterClass()
60 : m_table(0)
61 {
62 }
63 CharacterClass(const char* table, bool inverted)
64 : m_table(table)
65 , m_tableInverted(inverted)
66 {
67 }
68 Vector<UChar> m_matches;
69 Vector<CharacterRange> m_ranges;
70 Vector<UChar> m_matchesUnicode;
71 Vector<CharacterRange> m_rangesUnicode;
72
73 const char* m_table;
74 bool m_tableInverted;
75 };
76
77 enum QuantifierType {
78 QuantifierFixedCount,
79 QuantifierGreedy,
80 QuantifierNonGreedy,
81 };
82
83 struct PatternTerm {
84 enum Type {
85 TypeAssertionBOL,
86 TypeAssertionEOL,
87 TypeAssertionWordBoundary,
88 TypePatternCharacter,
89 TypeCharacterClass,
90 TypeBackReference,
91 TypeForwardReference,
92 TypeParenthesesSubpattern,
93 TypeParentheticalAssertion,
94 TypeDotStarEnclosure,
95 } type;
96 bool m_capture :1;
97 bool m_invert :1;
98 union {
99 UChar patternCharacter;
100 CharacterClass* characterClass;
101 unsigned backReferenceSubpatternId;
102 struct {
103 PatternDisjunction* disjunction;
104 unsigned subpatternId;
105 unsigned lastSubpatternId;
106 bool isCopy;
107 bool isTerminal;
108 } parentheses;
109 struct {
110 bool bolAnchor : 1;
111 bool eolAnchor : 1;
112 } anchors;
113 };
114 QuantifierType quantityType;
115 Checked<unsigned> quantityCount;
116 int inputPosition;
117 unsigned frameLocation;
118
119 PatternTerm(UChar ch)
120 : type(PatternTerm::TypePatternCharacter)
121 , m_capture(false)
122 , m_invert(false)
123 {
124 patternCharacter = ch;
125 quantityType = QuantifierFixedCount;
126 quantityCount = 1;
127 }
128
129 PatternTerm(CharacterClass* charClass, bool invert)
130 : type(PatternTerm::TypeCharacterClass)
131 , m_capture(false)
132 , m_invert(invert)
133 {
134 characterClass = charClass;
135 quantityType = QuantifierFixedCount;
136 quantityCount = 1;
137 }
138
139 PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false)
140 : type(type)
141 , m_capture(capture)
142 , m_invert(invert)
143 {
144 parentheses.disjunction = disjunction;
145 parentheses.subpatternId = subpatternId;
146 parentheses.isCopy = false;
147 parentheses.isTerminal = false;
148 quantityType = QuantifierFixedCount;
149 quantityCount = 1;
150 }
151
152 PatternTerm(Type type, bool invert = false)
153 : type(type)
154 , m_capture(false)
155 , m_invert(invert)
156 {
157 quantityType = QuantifierFixedCount;
158 quantityCount = 1;
159 }
160
161 PatternTerm(unsigned spatternId)
162 : type(TypeBackReference)
163 , m_capture(false)
164 , m_invert(false)
165 {
166 backReferenceSubpatternId = spatternId;
167 quantityType = QuantifierFixedCount;
168 quantityCount = 1;
169 }
170
171 PatternTerm(bool bolAnchor, bool eolAnchor)
172 : type(TypeDotStarEnclosure)
173 , m_capture(false)
174 , m_invert(false)
175 {
176 anchors.bolAnchor = bolAnchor;
177 anchors.eolAnchor = eolAnchor;
178 quantityType = QuantifierFixedCount;
179 quantityCount = 1;
180 }
181
182 static PatternTerm ForwardReference()
183 {
184 return PatternTerm(TypeForwardReference);
185 }
186
187 static PatternTerm BOL()
188 {
189 return PatternTerm(TypeAssertionBOL);
190 }
191
192 static PatternTerm EOL()
193 {
194 return PatternTerm(TypeAssertionEOL);
195 }
196
197 static PatternTerm WordBoundary(bool invert)
198 {
199 return PatternTerm(TypeAssertionWordBoundary, invert);
200 }
201
202 bool invert()
203 {
204 return m_invert;
205 }
206
207 bool capture()
208 {
209 return m_capture;
210 }
211
212 void quantify(unsigned count, QuantifierType type)
213 {
214 quantityCount = count;
215 quantityType = type;
216 }
217 };
218
219 struct PatternAlternative {
220 WTF_MAKE_FAST_ALLOCATED;
221 public:
222 PatternAlternative(PatternDisjunction* disjunction)
223 : m_parent(disjunction)
224 , m_onceThrough(false)
225 , m_hasFixedSize(false)
226 , m_startsWithBOL(false)
227 , m_containsBOL(false)
228 {
229 }
230
231 PatternTerm& lastTerm()
232 {
233 ASSERT(m_terms.size());
234 return m_terms[m_terms.size() - 1];
235 }
236
237 void removeLastTerm()
238 {
239 ASSERT(m_terms.size());
240 m_terms.shrink(m_terms.size() - 1);
241 }
242
243 void setOnceThrough()
244 {
245 m_onceThrough = true;
246 }
247
248 bool onceThrough()
249 {
250 return m_onceThrough;
251 }
252
253 Vector<PatternTerm> m_terms;
254 PatternDisjunction* m_parent;
255 unsigned m_minimumSize;
256 bool m_onceThrough : 1;
257 bool m_hasFixedSize : 1;
258 bool m_startsWithBOL : 1;
259 bool m_containsBOL : 1;
260 };
261
262 struct PatternDisjunction {
263 WTF_MAKE_FAST_ALLOCATED;
264 public:
265 PatternDisjunction(PatternAlternative* parent = 0)
266 : m_parent(parent)
267 , m_hasFixedSize(false)
268 {
269 }
270
271 PatternAlternative* addNewAlternative()
272 {
273 PatternAlternative* alternative = new PatternAlternative(this);
274 m_alternatives.append(adoptPtr(alternative));
275 return alternative;
276 }
277
278 Vector<OwnPtr<PatternAlternative> > m_alternatives;
279 PatternAlternative* m_parent;
280 unsigned m_minimumSize;
281 unsigned m_callFrameSize;
282 bool m_hasFixedSize;
283 };
284
285 // You probably don't want to be calling these functions directly
286 // (please to be calling newlineCharacterClass() et al on your
287 // friendly neighborhood YarrPattern instance to get nicely
288 // cached copies).
289 CharacterClass* newlineCreate();
290 CharacterClass* digitsCreate();
291 CharacterClass* spacesCreate();
292 CharacterClass* wordcharCreate();
293 CharacterClass* nondigitsCreate();
294 CharacterClass* nonspacesCreate();
295 CharacterClass* nonwordcharCreate();
296
297 struct TermChain {
298 TermChain(PatternTerm term)
299 : term(term)
300 {}
301
302 PatternTerm term;
303 Vector<TermChain> hotTerms;
304 };
305
306 struct YarrPattern {
307 JS_EXPORT_PRIVATE YarrPattern(const String& pattern, bool ignoreCase, bool multiline, const char** error);
308
309 void reset()
310 {
311 m_numSubpatterns = 0;
312 m_maxBackReference = 0;
313
314 m_containsBackreferences = false;
315 m_containsBOL = false;
316
317 newlineCached = 0;
318 digitsCached = 0;
319 spacesCached = 0;
320 wordcharCached = 0;
321 nondigitsCached = 0;
322 nonspacesCached = 0;
323 nonwordcharCached = 0;
324
325 m_disjunctions.clear();
326 m_userCharacterClasses.clear();
327 }
328
329 bool containsIllegalBackReference()
330 {
331 return m_maxBackReference > m_numSubpatterns;
332 }
333
334 CharacterClass* newlineCharacterClass()
335 {
336 if (!newlineCached)
337 m_userCharacterClasses.append(adoptPtr(newlineCached = newlineCreate()));
338 return newlineCached;
339 }
340 CharacterClass* digitsCharacterClass()
341 {
342 if (!digitsCached)
343 m_userCharacterClasses.append(adoptPtr(digitsCached = digitsCreate()));
344 return digitsCached;
345 }
346 CharacterClass* spacesCharacterClass()
347 {
348 if (!spacesCached)
349 m_userCharacterClasses.append(adoptPtr(spacesCached = spacesCreate()));
350 return spacesCached;
351 }
352 CharacterClass* wordcharCharacterClass()
353 {
354 if (!wordcharCached)
355 m_userCharacterClasses.append(adoptPtr(wordcharCached = wordcharCreate()));
356 return wordcharCached;
357 }
358 CharacterClass* nondigitsCharacterClass()
359 {
360 if (!nondigitsCached)
361 m_userCharacterClasses.append(adoptPtr(nondigitsCached = nondigitsCreate()));
362 return nondigitsCached;
363 }
364 CharacterClass* nonspacesCharacterClass()
365 {
366 if (!nonspacesCached)
367 m_userCharacterClasses.append(adoptPtr(nonspacesCached = nonspacesCreate()));
368 return nonspacesCached;
369 }
370 CharacterClass* nonwordcharCharacterClass()
371 {
372 if (!nonwordcharCached)
373 m_userCharacterClasses.append(adoptPtr(nonwordcharCached = nonwordcharCreate()));
374 return nonwordcharCached;
375 }
376
377 bool m_ignoreCase : 1;
378 bool m_multiline : 1;
379 bool m_containsBackreferences : 1;
380 bool m_containsBOL : 1;
381 unsigned m_numSubpatterns;
382 unsigned m_maxBackReference;
383 PatternDisjunction* m_body;
384 Vector<OwnPtr<PatternDisjunction>, 4> m_disjunctions;
385 Vector<OwnPtr<CharacterClass> > m_userCharacterClasses;
386
387 private:
388 const char* compile(const String& patternString);
389
390 CharacterClass* newlineCached;
391 CharacterClass* digitsCached;
392 CharacterClass* spacesCached;
393 CharacterClass* wordcharCached;
394 CharacterClass* nondigitsCached;
395 CharacterClass* nonspacesCached;
396 CharacterClass* nonwordcharCached;
397 };
398
399 } } // namespace JSC::Yarr
400
401 #endif // YarrPattern_h