]> git.saurik.com Git - apple/javascriptcore.git/blame - yarr/YarrPattern.h
JavaScriptCore-1218.35.tar.gz
[apple/javascriptcore.git] / yarr / YarrPattern.h
CommitLineData
ba379fdc 1/*
93a37866 2 * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
14957cd0 3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
ba379fdc
A
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
14957cd0
A
27#ifndef YarrPattern_h
28#define YarrPattern_h
ba379fdc 29
1df5f87f 30#include <wtf/CheckedArithmetic.h>
93a37866
A
31#include <wtf/OwnPtr.h>
32#include <wtf/PassOwnPtr.h>
6fe7ccc8 33#include <wtf/RefCounted.h>
ba379fdc 34#include <wtf/Vector.h>
93a37866 35#include <wtf/text/WTFString.h>
ba379fdc
A
36#include <wtf/unicode/Unicode.h>
37
ba379fdc
A
38namespace JSC { namespace Yarr {
39
ba379fdc
A
40struct PatternDisjunction;
41
42struct CharacterRange {
43 UChar begin;
44 UChar end;
45
46 CharacterRange(UChar begin, UChar end)
47 : begin(begin)
48 , end(end)
49 {
50 }
51};
52
14957cd0
A
53struct CharacterClass {
54 WTF_MAKE_FAST_ALLOCATED;
55public:
4e4e5a6f 56 // All CharacterClass instances have to have the full set of matches and ranges,
93a37866 57 // they may have an optional m_table for faster lookups (which must match the
4e4e5a6f 58 // specified matches and ranges)
93a37866
A
59 CharacterClass()
60 : m_table(0)
61 {
62 }
63 CharacterClass(const char* table, bool inverted)
4e4e5a6f 64 : m_table(table)
93a37866 65 , m_tableInverted(inverted)
4e4e5a6f
A
66 {
67 }
ba379fdc
A
68 Vector<UChar> m_matches;
69 Vector<CharacterRange> m_ranges;
70 Vector<UChar> m_matchesUnicode;
71 Vector<CharacterRange> m_rangesUnicode;
93a37866
A
72
73 const char* m_table;
74 bool m_tableInverted;
ba379fdc
A
75};
76
77enum QuantifierType {
78 QuantifierFixedCount,
79 QuantifierGreedy,
80 QuantifierNonGreedy,
81};
82
83struct PatternTerm {
84 enum Type {
85 TypeAssertionBOL,
86 TypeAssertionEOL,
87 TypeAssertionWordBoundary,
88 TypePatternCharacter,
89 TypeCharacterClass,
90 TypeBackReference,
91 TypeForwardReference,
92 TypeParenthesesSubpattern,
93 TypeParentheticalAssertion,
14957cd0 94 TypeDotStarEnclosure,
ba379fdc 95 } type;
14957cd0
A
96 bool m_capture :1;
97 bool m_invert :1;
ba379fdc
A
98 union {
99 UChar patternCharacter;
100 CharacterClass* characterClass;
14957cd0 101 unsigned backReferenceSubpatternId;
ba379fdc
A
102 struct {
103 PatternDisjunction* disjunction;
104 unsigned subpatternId;
105 unsigned lastSubpatternId;
106 bool isCopy;
14957cd0 107 bool isTerminal;
ba379fdc 108 } parentheses;
14957cd0
A
109 struct {
110 bool bolAnchor : 1;
111 bool eolAnchor : 1;
112 } anchors;
ba379fdc
A
113 };
114 QuantifierType quantityType;
1df5f87f 115 Checked<unsigned> quantityCount;
ba379fdc
A
116 int inputPosition;
117 unsigned frameLocation;
118
119 PatternTerm(UChar ch)
120 : type(PatternTerm::TypePatternCharacter)
14957cd0
A
121 , m_capture(false)
122 , m_invert(false)
ba379fdc
A
123 {
124 patternCharacter = ch;
125 quantityType = QuantifierFixedCount;
126 quantityCount = 1;
127 }
128
129 PatternTerm(CharacterClass* charClass, bool invert)
130 : type(PatternTerm::TypeCharacterClass)
14957cd0
A
131 , m_capture(false)
132 , m_invert(invert)
ba379fdc
A
133 {
134 characterClass = charClass;
135 quantityType = QuantifierFixedCount;
136 quantityCount = 1;
137 }
138
14957cd0 139 PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false)
ba379fdc 140 : type(type)
14957cd0
A
141 , m_capture(capture)
142 , m_invert(invert)
ba379fdc
A
143 {
144 parentheses.disjunction = disjunction;
145 parentheses.subpatternId = subpatternId;
146 parentheses.isCopy = false;
14957cd0 147 parentheses.isTerminal = false;
ba379fdc
A
148 quantityType = QuantifierFixedCount;
149 quantityCount = 1;
150 }
151
152 PatternTerm(Type type, bool invert = false)
153 : type(type)
14957cd0
A
154 , m_capture(false)
155 , m_invert(invert)
ba379fdc
A
156 {
157 quantityType = QuantifierFixedCount;
158 quantityCount = 1;
159 }
160
161 PatternTerm(unsigned spatternId)
162 : type(TypeBackReference)
14957cd0
A
163 , m_capture(false)
164 , m_invert(false)
ba379fdc 165 {
14957cd0 166 backReferenceSubpatternId = spatternId;
ba379fdc
A
167 quantityType = QuantifierFixedCount;
168 quantityCount = 1;
169 }
170
14957cd0
A
171 PatternTerm(bool bolAnchor, bool eolAnchor)
172 : type(TypeDotStarEnclosure)
173 , m_capture(false)
174 , m_invert(false)
175 {
176 anchors.bolAnchor = bolAnchor;
177 anchors.eolAnchor = eolAnchor;
178 quantityType = QuantifierFixedCount;
179 quantityCount = 1;
180 }
181
ba379fdc
A
182 static PatternTerm ForwardReference()
183 {
184 return PatternTerm(TypeForwardReference);
185 }
186
187 static PatternTerm BOL()
188 {
189 return PatternTerm(TypeAssertionBOL);
190 }
191
192 static PatternTerm EOL()
193 {
194 return PatternTerm(TypeAssertionEOL);
195 }
196
197 static PatternTerm WordBoundary(bool invert)
198 {
199 return PatternTerm(TypeAssertionWordBoundary, invert);
200 }
201
202 bool invert()
203 {
14957cd0 204 return m_invert;
ba379fdc
A
205 }
206
207 bool capture()
208 {
14957cd0 209 return m_capture;
ba379fdc
A
210 }
211
212 void quantify(unsigned count, QuantifierType type)
213 {
214 quantityCount = count;
215 quantityType = type;
216 }
217};
218
14957cd0
A
219struct PatternAlternative {
220 WTF_MAKE_FAST_ALLOCATED;
221public:
ba379fdc
A
222 PatternAlternative(PatternDisjunction* disjunction)
223 : m_parent(disjunction)
14957cd0
A
224 , m_onceThrough(false)
225 , m_hasFixedSize(false)
226 , m_startsWithBOL(false)
227 , m_containsBOL(false)
ba379fdc
A
228 {
229 }
230
231 PatternTerm& lastTerm()
232 {
233 ASSERT(m_terms.size());
234 return m_terms[m_terms.size() - 1];
235 }
236
237 void removeLastTerm()
238 {
239 ASSERT(m_terms.size());
240 m_terms.shrink(m_terms.size() - 1);
241 }
14957cd0
A
242
243 void setOnceThrough()
244 {
245 m_onceThrough = true;
246 }
247
248 bool onceThrough()
249 {
250 return m_onceThrough;
251 }
ba379fdc
A
252
253 Vector<PatternTerm> m_terms;
254 PatternDisjunction* m_parent;
255 unsigned m_minimumSize;
14957cd0
A
256 bool m_onceThrough : 1;
257 bool m_hasFixedSize : 1;
258 bool m_startsWithBOL : 1;
259 bool m_containsBOL : 1;
ba379fdc
A
260};
261
14957cd0
A
262struct PatternDisjunction {
263 WTF_MAKE_FAST_ALLOCATED;
264public:
ba379fdc
A
265 PatternDisjunction(PatternAlternative* parent = 0)
266 : m_parent(parent)
14957cd0 267 , m_hasFixedSize(false)
ba379fdc
A
268 {
269 }
270
ba379fdc
A
271 PatternAlternative* addNewAlternative()
272 {
273 PatternAlternative* alternative = new PatternAlternative(this);
93a37866 274 m_alternatives.append(adoptPtr(alternative));
ba379fdc
A
275 return alternative;
276 }
277
93a37866 278 Vector<OwnPtr<PatternAlternative> > m_alternatives;
ba379fdc
A
279 PatternAlternative* m_parent;
280 unsigned m_minimumSize;
281 unsigned m_callFrameSize;
282 bool m_hasFixedSize;
283};
284
285// You probably don't want to be calling these functions directly
286// (please to be calling newlineCharacterClass() et al on your
14957cd0 287// friendly neighborhood YarrPattern instance to get nicely
ba379fdc
A
288// cached copies).
289CharacterClass* newlineCreate();
290CharacterClass* digitsCreate();
291CharacterClass* spacesCreate();
292CharacterClass* wordcharCreate();
293CharacterClass* nondigitsCreate();
294CharacterClass* nonspacesCreate();
295CharacterClass* nonwordcharCreate();
296
14957cd0
A
297struct TermChain {
298 TermChain(PatternTerm term)
299 : term(term)
300 {}
301
302 PatternTerm term;
303 Vector<TermChain> hotTerms;
304};
ba379fdc 305
14957cd0 306struct YarrPattern {
93a37866 307 JS_EXPORT_PRIVATE YarrPattern(const String& pattern, bool ignoreCase, bool multiline, const char** error);
ba379fdc
A
308
309 void reset()
310 {
311 m_numSubpatterns = 0;
312 m_maxBackReference = 0;
313
14957cd0
A
314 m_containsBackreferences = false;
315 m_containsBOL = false;
4e4e5a6f 316
ba379fdc
A
317 newlineCached = 0;
318 digitsCached = 0;
319 spacesCached = 0;
320 wordcharCached = 0;
321 nondigitsCached = 0;
322 nonspacesCached = 0;
323 nonwordcharCached = 0;
324
ba379fdc 325 m_disjunctions.clear();
ba379fdc
A
326 m_userCharacterClasses.clear();
327 }
328
329 bool containsIllegalBackReference()
330 {
331 return m_maxBackReference > m_numSubpatterns;
332 }
333
334 CharacterClass* newlineCharacterClass()
335 {
336 if (!newlineCached)
93a37866 337 m_userCharacterClasses.append(adoptPtr(newlineCached = newlineCreate()));
ba379fdc
A
338 return newlineCached;
339 }
340 CharacterClass* digitsCharacterClass()
341 {
342 if (!digitsCached)
93a37866 343 m_userCharacterClasses.append(adoptPtr(digitsCached = digitsCreate()));
ba379fdc
A
344 return digitsCached;
345 }
346 CharacterClass* spacesCharacterClass()
347 {
348 if (!spacesCached)
93a37866 349 m_userCharacterClasses.append(adoptPtr(spacesCached = spacesCreate()));
ba379fdc
A
350 return spacesCached;
351 }
352 CharacterClass* wordcharCharacterClass()
353 {
354 if (!wordcharCached)
93a37866 355 m_userCharacterClasses.append(adoptPtr(wordcharCached = wordcharCreate()));
ba379fdc
A
356 return wordcharCached;
357 }
358 CharacterClass* nondigitsCharacterClass()
359 {
360 if (!nondigitsCached)
93a37866 361 m_userCharacterClasses.append(adoptPtr(nondigitsCached = nondigitsCreate()));
ba379fdc
A
362 return nondigitsCached;
363 }
364 CharacterClass* nonspacesCharacterClass()
365 {
366 if (!nonspacesCached)
93a37866 367 m_userCharacterClasses.append(adoptPtr(nonspacesCached = nonspacesCreate()));
ba379fdc
A
368 return nonspacesCached;
369 }
370 CharacterClass* nonwordcharCharacterClass()
371 {
372 if (!nonwordcharCached)
93a37866 373 m_userCharacterClasses.append(adoptPtr(nonwordcharCached = nonwordcharCreate()));
ba379fdc
A
374 return nonwordcharCached;
375 }
376
14957cd0
A
377 bool m_ignoreCase : 1;
378 bool m_multiline : 1;
379 bool m_containsBackreferences : 1;
380 bool m_containsBOL : 1;
ba379fdc
A
381 unsigned m_numSubpatterns;
382 unsigned m_maxBackReference;
383 PatternDisjunction* m_body;
93a37866
A
384 Vector<OwnPtr<PatternDisjunction>, 4> m_disjunctions;
385 Vector<OwnPtr<CharacterClass> > m_userCharacterClasses;
ba379fdc
A
386
387private:
93a37866 388 const char* compile(const String& patternString);
14957cd0 389
ba379fdc
A
390 CharacterClass* newlineCached;
391 CharacterClass* digitsCached;
392 CharacterClass* spacesCached;
393 CharacterClass* wordcharCached;
394 CharacterClass* nondigitsCached;
395 CharacterClass* nonspacesCached;
396 CharacterClass* nonwordcharCached;
397};
398
399} } // namespace JSC::Yarr
400
14957cd0 401#endif // YarrPattern_h