2 * Copyright (C) 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef RegexPattern_h
27 #define RegexPattern_h
32 #include <wtf/Vector.h>
33 #include <wtf/unicode/Unicode.h>
36 namespace JSC
{ namespace Yarr
{
38 #define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers.
39 #define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers.
40 #define RegexStackSpaceForBackTrackInfoBackReference 2
41 #define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
42 #define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1
43 #define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
44 #define RegexStackSpaceForBackTrackInfoParentheses 4
46 struct PatternDisjunction
;
48 struct CharacterRange
{
52 CharacterRange(UChar begin
, UChar end
)
59 struct CharacterClassTable
: RefCounted
<CharacterClassTable
> {
62 static PassRefPtr
<CharacterClassTable
> create(const char* table
, bool inverted
)
64 return adoptRef(new CharacterClassTable(table
, inverted
));
68 CharacterClassTable(const char* table
, bool inverted
)
70 , m_inverted(inverted
)
75 struct CharacterClass
: FastAllocBase
{
76 // All CharacterClass instances have to have the full set of matches and ranges,
77 // they may have an optional table for faster lookups (which must match the
78 // specified matches and ranges)
79 CharacterClass(PassRefPtr
<CharacterClassTable
> table
)
83 Vector
<UChar
> m_matches
;
84 Vector
<CharacterRange
> m_ranges
;
85 Vector
<UChar
> m_matchesUnicode
;
86 Vector
<CharacterRange
> m_rangesUnicode
;
87 RefPtr
<CharacterClassTable
> m_table
;
100 TypeAssertionWordBoundary
,
101 TypePatternCharacter
,
104 TypeForwardReference
,
105 TypeParenthesesSubpattern
,
106 TypeParentheticalAssertion
,
108 bool invertOrCapture
;
110 UChar patternCharacter
;
111 CharacterClass
* characterClass
;
112 unsigned subpatternId
;
114 PatternDisjunction
* disjunction
;
115 unsigned subpatternId
;
116 unsigned lastSubpatternId
;
120 QuantifierType quantityType
;
121 unsigned quantityCount
;
123 unsigned frameLocation
;
125 PatternTerm(UChar ch
)
126 : type(PatternTerm::TypePatternCharacter
)
128 patternCharacter
= ch
;
129 quantityType
= QuantifierFixedCount
;
133 PatternTerm(CharacterClass
* charClass
, bool invert
)
134 : type(PatternTerm::TypeCharacterClass
)
135 , invertOrCapture(invert
)
137 characterClass
= charClass
;
138 quantityType
= QuantifierFixedCount
;
142 PatternTerm(Type type
, unsigned subpatternId
, PatternDisjunction
* disjunction
, bool invertOrCapture
)
144 , invertOrCapture(invertOrCapture
)
146 parentheses
.disjunction
= disjunction
;
147 parentheses
.subpatternId
= subpatternId
;
148 parentheses
.isCopy
= false;
149 quantityType
= QuantifierFixedCount
;
153 PatternTerm(Type type
, bool invert
= false)
155 , invertOrCapture(invert
)
157 quantityType
= QuantifierFixedCount
;
161 PatternTerm(unsigned spatternId
)
162 : type(TypeBackReference
)
163 , invertOrCapture(false)
165 subpatternId
= spatternId
;
166 quantityType
= QuantifierFixedCount
;
170 static PatternTerm
ForwardReference()
172 return PatternTerm(TypeForwardReference
);
175 static PatternTerm
BOL()
177 return PatternTerm(TypeAssertionBOL
);
180 static PatternTerm
EOL()
182 return PatternTerm(TypeAssertionEOL
);
185 static PatternTerm
WordBoundary(bool invert
)
187 return PatternTerm(TypeAssertionWordBoundary
, invert
);
192 return invertOrCapture
;
197 return invertOrCapture
;
200 void quantify(unsigned count
, QuantifierType type
)
202 quantityCount
= count
;
207 struct PatternAlternative
: FastAllocBase
{
208 PatternAlternative(PatternDisjunction
* disjunction
)
209 : m_parent(disjunction
)
213 PatternTerm
& lastTerm()
215 ASSERT(m_terms
.size());
216 return m_terms
[m_terms
.size() - 1];
219 void removeLastTerm()
221 ASSERT(m_terms
.size());
222 m_terms
.shrink(m_terms
.size() - 1);
225 Vector
<PatternTerm
> m_terms
;
226 PatternDisjunction
* m_parent
;
227 unsigned m_minimumSize
;
231 struct PatternDisjunction
: FastAllocBase
{
232 PatternDisjunction(PatternAlternative
* parent
= 0)
237 ~PatternDisjunction()
239 deleteAllValues(m_alternatives
);
242 PatternAlternative
* addNewAlternative()
244 PatternAlternative
* alternative
= new PatternAlternative(this);
245 m_alternatives
.append(alternative
);
249 Vector
<PatternAlternative
*> m_alternatives
;
250 PatternAlternative
* m_parent
;
251 unsigned m_minimumSize
;
252 unsigned m_callFrameSize
;
256 // You probably don't want to be calling these functions directly
257 // (please to be calling newlineCharacterClass() et al on your
258 // friendly neighborhood RegexPattern instance to get nicely
260 CharacterClass
* newlineCreate();
261 CharacterClass
* digitsCreate();
262 CharacterClass
* spacesCreate();
263 CharacterClass
* wordcharCreate();
264 CharacterClass
* nondigitsCreate();
265 CharacterClass
* nonspacesCreate();
266 CharacterClass
* nonwordcharCreate();
268 struct RegexPattern
{
269 RegexPattern(bool ignoreCase
, bool multiline
)
270 : m_ignoreCase(ignoreCase
)
271 , m_multiline(multiline
)
272 , m_numSubpatterns(0)
273 , m_maxBackReference(0)
274 , m_shouldFallBack(false)
281 , nonwordcharCached(0)
287 deleteAllValues(m_disjunctions
);
288 deleteAllValues(m_userCharacterClasses
);
293 m_numSubpatterns
= 0;
294 m_maxBackReference
= 0;
296 m_shouldFallBack
= false;
304 nonwordcharCached
= 0;
306 deleteAllValues(m_disjunctions
);
307 m_disjunctions
.clear();
308 deleteAllValues(m_userCharacterClasses
);
309 m_userCharacterClasses
.clear();
312 bool containsIllegalBackReference()
314 return m_maxBackReference
> m_numSubpatterns
;
317 CharacterClass
* newlineCharacterClass()
320 m_userCharacterClasses
.append(newlineCached
= newlineCreate());
321 return newlineCached
;
323 CharacterClass
* digitsCharacterClass()
326 m_userCharacterClasses
.append(digitsCached
= digitsCreate());
329 CharacterClass
* spacesCharacterClass()
332 m_userCharacterClasses
.append(spacesCached
= spacesCreate());
335 CharacterClass
* wordcharCharacterClass()
338 m_userCharacterClasses
.append(wordcharCached
= wordcharCreate());
339 return wordcharCached
;
341 CharacterClass
* nondigitsCharacterClass()
343 if (!nondigitsCached
)
344 m_userCharacterClasses
.append(nondigitsCached
= nondigitsCreate());
345 return nondigitsCached
;
347 CharacterClass
* nonspacesCharacterClass()
349 if (!nonspacesCached
)
350 m_userCharacterClasses
.append(nonspacesCached
= nonspacesCreate());
351 return nonspacesCached
;
353 CharacterClass
* nonwordcharCharacterClass()
355 if (!nonwordcharCached
)
356 m_userCharacterClasses
.append(nonwordcharCached
= nonwordcharCreate());
357 return nonwordcharCached
;
362 unsigned m_numSubpatterns
;
363 unsigned m_maxBackReference
;
364 bool m_shouldFallBack
;
365 PatternDisjunction
* m_body
;
366 Vector
<PatternDisjunction
*, 4> m_disjunctions
;
367 Vector
<CharacterClass
*> m_userCharacterClasses
;
370 CharacterClass
* newlineCached
;
371 CharacterClass
* digitsCached
;
372 CharacterClass
* spacesCached
;
373 CharacterClass
* wordcharCached
;
374 CharacterClass
* nondigitsCached
;
375 CharacterClass
* nonspacesCached
;
376 CharacterClass
* nonwordcharCached
;
379 } } // namespace JSC::Yarr
383 #endif // RegexPattern_h