]> git.saurik.com Git - apple/javascriptcore.git/blame - yarr/YarrPattern.h
JavaScriptCore-7600.1.4.13.1.tar.gz
[apple/javascriptcore.git] / yarr / YarrPattern.h
CommitLineData
ba379fdc 1/*
93a37866 2 * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
14957cd0 3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
ba379fdc
A
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
14957cd0
A
27#ifndef YarrPattern_h
28#define YarrPattern_h
ba379fdc 29
1df5f87f 30#include <wtf/CheckedArithmetic.h>
93a37866
A
31#include <wtf/OwnPtr.h>
32#include <wtf/PassOwnPtr.h>
6fe7ccc8 33#include <wtf/RefCounted.h>
ba379fdc 34#include <wtf/Vector.h>
93a37866 35#include <wtf/text/WTFString.h>
ba379fdc 36
ba379fdc
A
37namespace JSC { namespace Yarr {
38
ba379fdc
A
39struct PatternDisjunction;
40
41struct CharacterRange {
42 UChar begin;
43 UChar end;
44
45 CharacterRange(UChar begin, UChar end)
46 : begin(begin)
47 , end(end)
48 {
49 }
50};
51
14957cd0
A
52struct CharacterClass {
53 WTF_MAKE_FAST_ALLOCATED;
54public:
4e4e5a6f 55 // All CharacterClass instances have to have the full set of matches and ranges,
93a37866 56 // they may have an optional m_table for faster lookups (which must match the
4e4e5a6f 57 // specified matches and ranges)
93a37866
A
58 CharacterClass()
59 : m_table(0)
60 {
61 }
62 CharacterClass(const char* table, bool inverted)
4e4e5a6f 63 : m_table(table)
93a37866 64 , m_tableInverted(inverted)
4e4e5a6f
A
65 {
66 }
ba379fdc
A
67 Vector<UChar> m_matches;
68 Vector<CharacterRange> m_ranges;
69 Vector<UChar> m_matchesUnicode;
70 Vector<CharacterRange> m_rangesUnicode;
93a37866
A
71
72 const char* m_table;
73 bool m_tableInverted;
ba379fdc
A
74};
75
76enum QuantifierType {
77 QuantifierFixedCount,
78 QuantifierGreedy,
79 QuantifierNonGreedy,
80};
81
82struct PatternTerm {
83 enum Type {
84 TypeAssertionBOL,
85 TypeAssertionEOL,
86 TypeAssertionWordBoundary,
87 TypePatternCharacter,
88 TypeCharacterClass,
89 TypeBackReference,
90 TypeForwardReference,
91 TypeParenthesesSubpattern,
92 TypeParentheticalAssertion,
14957cd0 93 TypeDotStarEnclosure,
ba379fdc 94 } type;
14957cd0
A
95 bool m_capture :1;
96 bool m_invert :1;
ba379fdc
A
97 union {
98 UChar patternCharacter;
99 CharacterClass* characterClass;
14957cd0 100 unsigned backReferenceSubpatternId;
ba379fdc
A
101 struct {
102 PatternDisjunction* disjunction;
103 unsigned subpatternId;
104 unsigned lastSubpatternId;
105 bool isCopy;
14957cd0 106 bool isTerminal;
ba379fdc 107 } parentheses;
14957cd0
A
108 struct {
109 bool bolAnchor : 1;
110 bool eolAnchor : 1;
111 } anchors;
ba379fdc
A
112 };
113 QuantifierType quantityType;
1df5f87f 114 Checked<unsigned> quantityCount;
ba379fdc
A
115 int inputPosition;
116 unsigned frameLocation;
117
118 PatternTerm(UChar ch)
119 : type(PatternTerm::TypePatternCharacter)
14957cd0
A
120 , m_capture(false)
121 , m_invert(false)
ba379fdc
A
122 {
123 patternCharacter = ch;
124 quantityType = QuantifierFixedCount;
125 quantityCount = 1;
126 }
127
128 PatternTerm(CharacterClass* charClass, bool invert)
129 : type(PatternTerm::TypeCharacterClass)
14957cd0
A
130 , m_capture(false)
131 , m_invert(invert)
ba379fdc
A
132 {
133 characterClass = charClass;
134 quantityType = QuantifierFixedCount;
135 quantityCount = 1;
136 }
137
14957cd0 138 PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false)
ba379fdc 139 : type(type)
14957cd0
A
140 , m_capture(capture)
141 , m_invert(invert)
ba379fdc
A
142 {
143 parentheses.disjunction = disjunction;
144 parentheses.subpatternId = subpatternId;
145 parentheses.isCopy = false;
14957cd0 146 parentheses.isTerminal = false;
ba379fdc
A
147 quantityType = QuantifierFixedCount;
148 quantityCount = 1;
149 }
150
151 PatternTerm(Type type, bool invert = false)
152 : type(type)
14957cd0
A
153 , m_capture(false)
154 , m_invert(invert)
ba379fdc
A
155 {
156 quantityType = QuantifierFixedCount;
157 quantityCount = 1;
158 }
159
160 PatternTerm(unsigned spatternId)
161 : type(TypeBackReference)
14957cd0
A
162 , m_capture(false)
163 , m_invert(false)
ba379fdc 164 {
14957cd0 165 backReferenceSubpatternId = spatternId;
ba379fdc
A
166 quantityType = QuantifierFixedCount;
167 quantityCount = 1;
168 }
169
14957cd0
A
170 PatternTerm(bool bolAnchor, bool eolAnchor)
171 : type(TypeDotStarEnclosure)
172 , m_capture(false)
173 , m_invert(false)
174 {
175 anchors.bolAnchor = bolAnchor;
176 anchors.eolAnchor = eolAnchor;
177 quantityType = QuantifierFixedCount;
178 quantityCount = 1;
179 }
180
ba379fdc
A
181 static PatternTerm ForwardReference()
182 {
183 return PatternTerm(TypeForwardReference);
184 }
185
186 static PatternTerm BOL()
187 {
188 return PatternTerm(TypeAssertionBOL);
189 }
190
191 static PatternTerm EOL()
192 {
193 return PatternTerm(TypeAssertionEOL);
194 }
195
196 static PatternTerm WordBoundary(bool invert)
197 {
198 return PatternTerm(TypeAssertionWordBoundary, invert);
199 }
200
201 bool invert()
202 {
14957cd0 203 return m_invert;
ba379fdc
A
204 }
205
206 bool capture()
207 {
14957cd0 208 return m_capture;
ba379fdc
A
209 }
210
211 void quantify(unsigned count, QuantifierType type)
212 {
213 quantityCount = count;
214 quantityType = type;
215 }
216};
217
14957cd0
A
218struct PatternAlternative {
219 WTF_MAKE_FAST_ALLOCATED;
220public:
ba379fdc
A
221 PatternAlternative(PatternDisjunction* disjunction)
222 : m_parent(disjunction)
14957cd0
A
223 , m_onceThrough(false)
224 , m_hasFixedSize(false)
225 , m_startsWithBOL(false)
226 , m_containsBOL(false)
ba379fdc
A
227 {
228 }
229
230 PatternTerm& lastTerm()
231 {
232 ASSERT(m_terms.size());
233 return m_terms[m_terms.size() - 1];
234 }
235
236 void removeLastTerm()
237 {
238 ASSERT(m_terms.size());
239 m_terms.shrink(m_terms.size() - 1);
240 }
14957cd0
A
241
242 void setOnceThrough()
243 {
244 m_onceThrough = true;
245 }
246
247 bool onceThrough()
248 {
249 return m_onceThrough;
250 }
ba379fdc
A
251
252 Vector<PatternTerm> m_terms;
253 PatternDisjunction* m_parent;
254 unsigned m_minimumSize;
14957cd0
A
255 bool m_onceThrough : 1;
256 bool m_hasFixedSize : 1;
257 bool m_startsWithBOL : 1;
258 bool m_containsBOL : 1;
ba379fdc
A
259};
260
14957cd0
A
261struct PatternDisjunction {
262 WTF_MAKE_FAST_ALLOCATED;
263public:
ba379fdc
A
264 PatternDisjunction(PatternAlternative* parent = 0)
265 : m_parent(parent)
14957cd0 266 , m_hasFixedSize(false)
ba379fdc
A
267 {
268 }
269
ba379fdc
A
270 PatternAlternative* addNewAlternative()
271 {
272 PatternAlternative* alternative = new PatternAlternative(this);
93a37866 273 m_alternatives.append(adoptPtr(alternative));
ba379fdc
A
274 return alternative;
275 }
276
81345200 277 Vector<OwnPtr<PatternAlternative>> m_alternatives;
ba379fdc
A
278 PatternAlternative* m_parent;
279 unsigned m_minimumSize;
280 unsigned m_callFrameSize;
281 bool m_hasFixedSize;
282};
283
284// You probably don't want to be calling these functions directly
285// (please to be calling newlineCharacterClass() et al on your
14957cd0 286// friendly neighborhood YarrPattern instance to get nicely
ba379fdc
A
287// cached copies).
288CharacterClass* newlineCreate();
289CharacterClass* digitsCreate();
290CharacterClass* spacesCreate();
291CharacterClass* wordcharCreate();
292CharacterClass* nondigitsCreate();
293CharacterClass* nonspacesCreate();
294CharacterClass* nonwordcharCreate();
295
14957cd0
A
296struct TermChain {
297 TermChain(PatternTerm term)
298 : term(term)
299 {}
300
301 PatternTerm term;
302 Vector<TermChain> hotTerms;
303};
ba379fdc 304
14957cd0 305struct YarrPattern {
93a37866 306 JS_EXPORT_PRIVATE YarrPattern(const String& pattern, bool ignoreCase, bool multiline, const char** error);
ba379fdc
A
307
308 void reset()
309 {
310 m_numSubpatterns = 0;
311 m_maxBackReference = 0;
312
14957cd0
A
313 m_containsBackreferences = false;
314 m_containsBOL = false;
81345200 315 m_containsUnsignedLengthPattern = false;
4e4e5a6f 316
ba379fdc
A
317 newlineCached = 0;
318 digitsCached = 0;
319 spacesCached = 0;
320 wordcharCached = 0;
321 nondigitsCached = 0;
322 nonspacesCached = 0;
323 nonwordcharCached = 0;
324
ba379fdc 325 m_disjunctions.clear();
ba379fdc
A
326 m_userCharacterClasses.clear();
327 }
328
329 bool containsIllegalBackReference()
330 {
331 return m_maxBackReference > m_numSubpatterns;
332 }
333
81345200
A
334 bool containsUnsignedLengthPattern()
335 {
336 return m_containsUnsignedLengthPattern;
337 }
338
ba379fdc
A
339 CharacterClass* newlineCharacterClass()
340 {
341 if (!newlineCached)
93a37866 342 m_userCharacterClasses.append(adoptPtr(newlineCached = newlineCreate()));
ba379fdc
A
343 return newlineCached;
344 }
345 CharacterClass* digitsCharacterClass()
346 {
347 if (!digitsCached)
93a37866 348 m_userCharacterClasses.append(adoptPtr(digitsCached = digitsCreate()));
ba379fdc
A
349 return digitsCached;
350 }
351 CharacterClass* spacesCharacterClass()
352 {
353 if (!spacesCached)
93a37866 354 m_userCharacterClasses.append(adoptPtr(spacesCached = spacesCreate()));
ba379fdc
A
355 return spacesCached;
356 }
357 CharacterClass* wordcharCharacterClass()
358 {
359 if (!wordcharCached)
93a37866 360 m_userCharacterClasses.append(adoptPtr(wordcharCached = wordcharCreate()));
ba379fdc
A
361 return wordcharCached;
362 }
363 CharacterClass* nondigitsCharacterClass()
364 {
365 if (!nondigitsCached)
93a37866 366 m_userCharacterClasses.append(adoptPtr(nondigitsCached = nondigitsCreate()));
ba379fdc
A
367 return nondigitsCached;
368 }
369 CharacterClass* nonspacesCharacterClass()
370 {
371 if (!nonspacesCached)
93a37866 372 m_userCharacterClasses.append(adoptPtr(nonspacesCached = nonspacesCreate()));
ba379fdc
A
373 return nonspacesCached;
374 }
375 CharacterClass* nonwordcharCharacterClass()
376 {
377 if (!nonwordcharCached)
93a37866 378 m_userCharacterClasses.append(adoptPtr(nonwordcharCached = nonwordcharCreate()));
ba379fdc
A
379 return nonwordcharCached;
380 }
381
14957cd0
A
382 bool m_ignoreCase : 1;
383 bool m_multiline : 1;
384 bool m_containsBackreferences : 1;
385 bool m_containsBOL : 1;
81345200 386 bool m_containsUnsignedLengthPattern : 1;
ba379fdc
A
387 unsigned m_numSubpatterns;
388 unsigned m_maxBackReference;
389 PatternDisjunction* m_body;
93a37866 390 Vector<OwnPtr<PatternDisjunction>, 4> m_disjunctions;
81345200 391 Vector<OwnPtr<CharacterClass>> m_userCharacterClasses;
ba379fdc
A
392
393private:
93a37866 394 const char* compile(const String& patternString);
14957cd0 395
ba379fdc
A
396 CharacterClass* newlineCached;
397 CharacterClass* digitsCached;
398 CharacterClass* spacesCached;
399 CharacterClass* wordcharCached;
400 CharacterClass* nondigitsCached;
401 CharacterClass* nonspacesCached;
402 CharacterClass* nonwordcharCached;
403};
404
405} } // namespace JSC::Yarr
406
14957cd0 407#endif // YarrPattern_h