]> git.saurik.com Git - apple/javascriptcore.git/blame - yarr/YarrPattern.h
JavaScriptCore-7601.1.46.3.tar.gz
[apple/javascriptcore.git] / yarr / YarrPattern.h
CommitLineData
ba379fdc 1/*
93a37866 2 * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
14957cd0 3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
ba379fdc
A
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
14957cd0
A
27#ifndef YarrPattern_h
28#define YarrPattern_h
ba379fdc 29
1df5f87f 30#include <wtf/CheckedArithmetic.h>
6fe7ccc8 31#include <wtf/RefCounted.h>
ba379fdc 32#include <wtf/Vector.h>
93a37866 33#include <wtf/text/WTFString.h>
ba379fdc 34
ba379fdc
A
35namespace JSC { namespace Yarr {
36
ba379fdc
A
37struct PatternDisjunction;
38
39struct CharacterRange {
40 UChar begin;
41 UChar end;
42
43 CharacterRange(UChar begin, UChar end)
44 : begin(begin)
45 , end(end)
46 {
47 }
48};
49
14957cd0
A
50struct CharacterClass {
51 WTF_MAKE_FAST_ALLOCATED;
52public:
4e4e5a6f 53 // All CharacterClass instances have to have the full set of matches and ranges,
93a37866 54 // they may have an optional m_table for faster lookups (which must match the
4e4e5a6f 55 // specified matches and ranges)
93a37866
A
56 CharacterClass()
57 : m_table(0)
58 {
59 }
60 CharacterClass(const char* table, bool inverted)
4e4e5a6f 61 : m_table(table)
93a37866 62 , m_tableInverted(inverted)
4e4e5a6f
A
63 {
64 }
ba379fdc
A
65 Vector<UChar> m_matches;
66 Vector<CharacterRange> m_ranges;
67 Vector<UChar> m_matchesUnicode;
68 Vector<CharacterRange> m_rangesUnicode;
93a37866
A
69
70 const char* m_table;
71 bool m_tableInverted;
ba379fdc
A
72};
73
74enum QuantifierType {
75 QuantifierFixedCount,
76 QuantifierGreedy,
77 QuantifierNonGreedy,
78};
79
80struct PatternTerm {
81 enum Type {
82 TypeAssertionBOL,
83 TypeAssertionEOL,
84 TypeAssertionWordBoundary,
85 TypePatternCharacter,
86 TypeCharacterClass,
87 TypeBackReference,
88 TypeForwardReference,
89 TypeParenthesesSubpattern,
90 TypeParentheticalAssertion,
14957cd0 91 TypeDotStarEnclosure,
ba379fdc 92 } type;
14957cd0
A
93 bool m_capture :1;
94 bool m_invert :1;
ba379fdc
A
95 union {
96 UChar patternCharacter;
97 CharacterClass* characterClass;
14957cd0 98 unsigned backReferenceSubpatternId;
ba379fdc
A
99 struct {
100 PatternDisjunction* disjunction;
101 unsigned subpatternId;
102 unsigned lastSubpatternId;
103 bool isCopy;
14957cd0 104 bool isTerminal;
ba379fdc 105 } parentheses;
14957cd0
A
106 struct {
107 bool bolAnchor : 1;
108 bool eolAnchor : 1;
109 } anchors;
ba379fdc
A
110 };
111 QuantifierType quantityType;
1df5f87f 112 Checked<unsigned> quantityCount;
ba379fdc
A
113 int inputPosition;
114 unsigned frameLocation;
115
116 PatternTerm(UChar ch)
117 : type(PatternTerm::TypePatternCharacter)
14957cd0
A
118 , m_capture(false)
119 , m_invert(false)
ba379fdc
A
120 {
121 patternCharacter = ch;
122 quantityType = QuantifierFixedCount;
123 quantityCount = 1;
124 }
125
126 PatternTerm(CharacterClass* charClass, bool invert)
127 : type(PatternTerm::TypeCharacterClass)
14957cd0
A
128 , m_capture(false)
129 , m_invert(invert)
ba379fdc
A
130 {
131 characterClass = charClass;
132 quantityType = QuantifierFixedCount;
133 quantityCount = 1;
134 }
135
14957cd0 136 PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false)
ba379fdc 137 : type(type)
14957cd0
A
138 , m_capture(capture)
139 , m_invert(invert)
ba379fdc
A
140 {
141 parentheses.disjunction = disjunction;
142 parentheses.subpatternId = subpatternId;
143 parentheses.isCopy = false;
14957cd0 144 parentheses.isTerminal = false;
ba379fdc
A
145 quantityType = QuantifierFixedCount;
146 quantityCount = 1;
147 }
148
149 PatternTerm(Type type, bool invert = false)
150 : type(type)
14957cd0
A
151 , m_capture(false)
152 , m_invert(invert)
ba379fdc
A
153 {
154 quantityType = QuantifierFixedCount;
155 quantityCount = 1;
156 }
157
158 PatternTerm(unsigned spatternId)
159 : type(TypeBackReference)
14957cd0
A
160 , m_capture(false)
161 , m_invert(false)
ba379fdc 162 {
14957cd0 163 backReferenceSubpatternId = spatternId;
ba379fdc
A
164 quantityType = QuantifierFixedCount;
165 quantityCount = 1;
166 }
167
14957cd0
A
168 PatternTerm(bool bolAnchor, bool eolAnchor)
169 : type(TypeDotStarEnclosure)
170 , m_capture(false)
171 , m_invert(false)
172 {
173 anchors.bolAnchor = bolAnchor;
174 anchors.eolAnchor = eolAnchor;
175 quantityType = QuantifierFixedCount;
176 quantityCount = 1;
177 }
178
ba379fdc
A
179 static PatternTerm ForwardReference()
180 {
181 return PatternTerm(TypeForwardReference);
182 }
183
184 static PatternTerm BOL()
185 {
186 return PatternTerm(TypeAssertionBOL);
187 }
188
189 static PatternTerm EOL()
190 {
191 return PatternTerm(TypeAssertionEOL);
192 }
193
194 static PatternTerm WordBoundary(bool invert)
195 {
196 return PatternTerm(TypeAssertionWordBoundary, invert);
197 }
198
199 bool invert()
200 {
14957cd0 201 return m_invert;
ba379fdc
A
202 }
203
204 bool capture()
205 {
14957cd0 206 return m_capture;
ba379fdc
A
207 }
208
209 void quantify(unsigned count, QuantifierType type)
210 {
211 quantityCount = count;
212 quantityType = type;
213 }
214};
215
14957cd0
A
216struct PatternAlternative {
217 WTF_MAKE_FAST_ALLOCATED;
218public:
ba379fdc
A
219 PatternAlternative(PatternDisjunction* disjunction)
220 : m_parent(disjunction)
14957cd0
A
221 , m_onceThrough(false)
222 , m_hasFixedSize(false)
223 , m_startsWithBOL(false)
224 , m_containsBOL(false)
ba379fdc
A
225 {
226 }
227
228 PatternTerm& lastTerm()
229 {
230 ASSERT(m_terms.size());
231 return m_terms[m_terms.size() - 1];
232 }
233
234 void removeLastTerm()
235 {
236 ASSERT(m_terms.size());
237 m_terms.shrink(m_terms.size() - 1);
238 }
14957cd0
A
239
240 void setOnceThrough()
241 {
242 m_onceThrough = true;
243 }
244
245 bool onceThrough()
246 {
247 return m_onceThrough;
248 }
ba379fdc
A
249
250 Vector<PatternTerm> m_terms;
251 PatternDisjunction* m_parent;
252 unsigned m_minimumSize;
14957cd0
A
253 bool m_onceThrough : 1;
254 bool m_hasFixedSize : 1;
255 bool m_startsWithBOL : 1;
256 bool m_containsBOL : 1;
ba379fdc
A
257};
258
14957cd0
A
259struct PatternDisjunction {
260 WTF_MAKE_FAST_ALLOCATED;
261public:
ba379fdc
A
262 PatternDisjunction(PatternAlternative* parent = 0)
263 : m_parent(parent)
14957cd0 264 , m_hasFixedSize(false)
ba379fdc
A
265 {
266 }
267
ba379fdc
A
268 PatternAlternative* addNewAlternative()
269 {
ed1e77d3
A
270 m_alternatives.append(std::make_unique<PatternAlternative>(this));
271 return static_cast<PatternAlternative*>(m_alternatives.last().get());
ba379fdc
A
272 }
273
ed1e77d3 274 Vector<std::unique_ptr<PatternAlternative>> m_alternatives;
ba379fdc
A
275 PatternAlternative* m_parent;
276 unsigned m_minimumSize;
277 unsigned m_callFrameSize;
278 bool m_hasFixedSize;
279};
280
281// You probably don't want to be calling these functions directly
282// (please to be calling newlineCharacterClass() et al on your
14957cd0 283// friendly neighborhood YarrPattern instance to get nicely
ba379fdc 284// cached copies).
ed1e77d3
A
285std::unique_ptr<CharacterClass> newlineCreate();
286std::unique_ptr<CharacterClass> digitsCreate();
287std::unique_ptr<CharacterClass> spacesCreate();
288std::unique_ptr<CharacterClass> wordcharCreate();
289std::unique_ptr<CharacterClass> nondigitsCreate();
290std::unique_ptr<CharacterClass> nonspacesCreate();
291std::unique_ptr<CharacterClass> nonwordcharCreate();
ba379fdc 292
14957cd0
A
293struct TermChain {
294 TermChain(PatternTerm term)
295 : term(term)
296 {}
297
298 PatternTerm term;
299 Vector<TermChain> hotTerms;
300};
ba379fdc 301
14957cd0 302struct YarrPattern {
93a37866 303 JS_EXPORT_PRIVATE YarrPattern(const String& pattern, bool ignoreCase, bool multiline, const char** error);
ba379fdc
A
304
305 void reset()
306 {
307 m_numSubpatterns = 0;
308 m_maxBackReference = 0;
309
14957cd0
A
310 m_containsBackreferences = false;
311 m_containsBOL = false;
81345200 312 m_containsUnsignedLengthPattern = false;
4e4e5a6f 313
ba379fdc
A
314 newlineCached = 0;
315 digitsCached = 0;
316 spacesCached = 0;
317 wordcharCached = 0;
318 nondigitsCached = 0;
319 nonspacesCached = 0;
320 nonwordcharCached = 0;
321
ba379fdc 322 m_disjunctions.clear();
ba379fdc
A
323 m_userCharacterClasses.clear();
324 }
325
326 bool containsIllegalBackReference()
327 {
328 return m_maxBackReference > m_numSubpatterns;
329 }
330
81345200
A
331 bool containsUnsignedLengthPattern()
332 {
333 return m_containsUnsignedLengthPattern;
334 }
335
ba379fdc
A
336 CharacterClass* newlineCharacterClass()
337 {
ed1e77d3
A
338 if (!newlineCached) {
339 m_userCharacterClasses.append(newlineCreate());
340 newlineCached = m_userCharacterClasses.last().get();
341 }
ba379fdc
A
342 return newlineCached;
343 }
344 CharacterClass* digitsCharacterClass()
345 {
ed1e77d3
A
346 if (!digitsCached) {
347 m_userCharacterClasses.append(digitsCreate());
348 digitsCached = m_userCharacterClasses.last().get();
349 }
ba379fdc
A
350 return digitsCached;
351 }
352 CharacterClass* spacesCharacterClass()
353 {
ed1e77d3
A
354 if (!spacesCached) {
355 m_userCharacterClasses.append(spacesCreate());
356 spacesCached = m_userCharacterClasses.last().get();
357 }
ba379fdc
A
358 return spacesCached;
359 }
360 CharacterClass* wordcharCharacterClass()
361 {
ed1e77d3
A
362 if (!wordcharCached) {
363 m_userCharacterClasses.append(wordcharCreate());
364 wordcharCached = m_userCharacterClasses.last().get();
365 }
ba379fdc
A
366 return wordcharCached;
367 }
368 CharacterClass* nondigitsCharacterClass()
369 {
ed1e77d3
A
370 if (!nondigitsCached) {
371 m_userCharacterClasses.append(nondigitsCreate());
372 nondigitsCached = m_userCharacterClasses.last().get();
373 }
ba379fdc
A
374 return nondigitsCached;
375 }
376 CharacterClass* nonspacesCharacterClass()
377 {
ed1e77d3
A
378 if (!nonspacesCached) {
379 m_userCharacterClasses.append(nonspacesCreate());
380 nonspacesCached = m_userCharacterClasses.last().get();
381 }
ba379fdc
A
382 return nonspacesCached;
383 }
384 CharacterClass* nonwordcharCharacterClass()
385 {
ed1e77d3
A
386 if (!nonwordcharCached) {
387 m_userCharacterClasses.append(nonwordcharCreate());
388 nonwordcharCached = m_userCharacterClasses.last().get();
389 }
ba379fdc
A
390 return nonwordcharCached;
391 }
392
14957cd0
A
393 bool m_ignoreCase : 1;
394 bool m_multiline : 1;
395 bool m_containsBackreferences : 1;
396 bool m_containsBOL : 1;
81345200 397 bool m_containsUnsignedLengthPattern : 1;
ba379fdc
A
398 unsigned m_numSubpatterns;
399 unsigned m_maxBackReference;
400 PatternDisjunction* m_body;
ed1e77d3
A
401 Vector<std::unique_ptr<PatternDisjunction>, 4> m_disjunctions;
402 Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses;
ba379fdc
A
403
404private:
93a37866 405 const char* compile(const String& patternString);
14957cd0 406
ba379fdc
A
407 CharacterClass* newlineCached;
408 CharacterClass* digitsCached;
409 CharacterClass* spacesCached;
410 CharacterClass* wordcharCached;
411 CharacterClass* nondigitsCached;
412 CharacterClass* nonspacesCached;
413 CharacterClass* nonwordcharCached;
414};
415
416} } // namespace JSC::Yarr
417
14957cd0 418#endif // YarrPattern_h