2 * Copyright (C) 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef RegexPattern_h
27 #define RegexPattern_h
29 #include <wtf/Platform.h>
33 #include <wtf/Vector.h>
34 #include <wtf/unicode/Unicode.h>
37 namespace JSC
{ namespace Yarr
{
39 #define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers.
40 #define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers.
41 #define RegexStackSpaceForBackTrackInfoBackReference 2
42 #define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
43 #define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1
44 #define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
45 #define RegexStackSpaceForBackTrackInfoParentheses 4
47 struct PatternDisjunction
;
49 struct CharacterRange
{
53 CharacterRange(UChar begin
, UChar end
)
60 struct CharacterClass
: FastAllocBase
{
61 Vector
<UChar
> m_matches
;
62 Vector
<CharacterRange
> m_ranges
;
63 Vector
<UChar
> m_matchesUnicode
;
64 Vector
<CharacterRange
> m_rangesUnicode
;
77 TypeAssertionWordBoundary
,
82 TypeParenthesesSubpattern
,
83 TypeParentheticalAssertion
,
87 UChar patternCharacter
;
88 CharacterClass
* characterClass
;
89 unsigned subpatternId
;
91 PatternDisjunction
* disjunction
;
92 unsigned subpatternId
;
93 unsigned lastSubpatternId
;
97 QuantifierType quantityType
;
98 unsigned quantityCount
;
100 unsigned frameLocation
;
102 PatternTerm(UChar ch
)
103 : type(PatternTerm::TypePatternCharacter
)
105 patternCharacter
= ch
;
106 quantityType
= QuantifierFixedCount
;
110 PatternTerm(CharacterClass
* charClass
, bool invert
)
111 : type(PatternTerm::TypeCharacterClass
)
112 , invertOrCapture(invert
)
114 characterClass
= charClass
;
115 quantityType
= QuantifierFixedCount
;
119 PatternTerm(Type type
, unsigned subpatternId
, PatternDisjunction
* disjunction
, bool invertOrCapture
)
121 , invertOrCapture(invertOrCapture
)
123 parentheses
.disjunction
= disjunction
;
124 parentheses
.subpatternId
= subpatternId
;
125 parentheses
.isCopy
= false;
126 quantityType
= QuantifierFixedCount
;
130 PatternTerm(Type type
, bool invert
= false)
132 , invertOrCapture(invert
)
134 quantityType
= QuantifierFixedCount
;
138 PatternTerm(unsigned spatternId
)
139 : type(TypeBackReference
)
140 , invertOrCapture(false)
142 subpatternId
= spatternId
;
143 quantityType
= QuantifierFixedCount
;
147 static PatternTerm
ForwardReference()
149 return PatternTerm(TypeForwardReference
);
152 static PatternTerm
BOL()
154 return PatternTerm(TypeAssertionBOL
);
157 static PatternTerm
EOL()
159 return PatternTerm(TypeAssertionEOL
);
162 static PatternTerm
WordBoundary(bool invert
)
164 return PatternTerm(TypeAssertionWordBoundary
, invert
);
169 return invertOrCapture
;
174 return invertOrCapture
;
177 void quantify(unsigned count
, QuantifierType type
)
179 quantityCount
= count
;
184 struct PatternAlternative
: FastAllocBase
{
185 PatternAlternative(PatternDisjunction
* disjunction
)
186 : m_parent(disjunction
)
190 PatternTerm
& lastTerm()
192 ASSERT(m_terms
.size());
193 return m_terms
[m_terms
.size() - 1];
196 void removeLastTerm()
198 ASSERT(m_terms
.size());
199 m_terms
.shrink(m_terms
.size() - 1);
202 Vector
<PatternTerm
> m_terms
;
203 PatternDisjunction
* m_parent
;
204 unsigned m_minimumSize
;
208 struct PatternDisjunction
: FastAllocBase
{
209 PatternDisjunction(PatternAlternative
* parent
= 0)
214 ~PatternDisjunction()
216 deleteAllValues(m_alternatives
);
219 PatternAlternative
* addNewAlternative()
221 PatternAlternative
* alternative
= new PatternAlternative(this);
222 m_alternatives
.append(alternative
);
226 Vector
<PatternAlternative
*> m_alternatives
;
227 PatternAlternative
* m_parent
;
228 unsigned m_minimumSize
;
229 unsigned m_callFrameSize
;
233 // You probably don't want to be calling these functions directly
234 // (please to be calling newlineCharacterClass() et al on your
235 // friendly neighborhood RegexPattern instance to get nicely
237 CharacterClass
* newlineCreate();
238 CharacterClass
* digitsCreate();
239 CharacterClass
* spacesCreate();
240 CharacterClass
* wordcharCreate();
241 CharacterClass
* nondigitsCreate();
242 CharacterClass
* nonspacesCreate();
243 CharacterClass
* nonwordcharCreate();
245 struct RegexPattern
{
246 RegexPattern(bool ignoreCase
, bool multiline
)
247 : m_ignoreCase(ignoreCase
)
248 , m_multiline(multiline
)
249 , m_numSubpatterns(0)
250 , m_maxBackReference(0)
257 , nonwordcharCached(0)
263 deleteAllValues(m_disjunctions
);
264 deleteAllValues(m_userCharacterClasses
);
269 m_numSubpatterns
= 0;
270 m_maxBackReference
= 0;
278 nonwordcharCached
= 0;
280 deleteAllValues(m_disjunctions
);
281 m_disjunctions
.clear();
282 deleteAllValues(m_userCharacterClasses
);
283 m_userCharacterClasses
.clear();
286 bool containsIllegalBackReference()
288 return m_maxBackReference
> m_numSubpatterns
;
291 CharacterClass
* newlineCharacterClass()
294 m_userCharacterClasses
.append(newlineCached
= newlineCreate());
295 return newlineCached
;
297 CharacterClass
* digitsCharacterClass()
300 m_userCharacterClasses
.append(digitsCached
= digitsCreate());
303 CharacterClass
* spacesCharacterClass()
306 m_userCharacterClasses
.append(spacesCached
= spacesCreate());
309 CharacterClass
* wordcharCharacterClass()
312 m_userCharacterClasses
.append(wordcharCached
= wordcharCreate());
313 return wordcharCached
;
315 CharacterClass
* nondigitsCharacterClass()
317 if (!nondigitsCached
)
318 m_userCharacterClasses
.append(nondigitsCached
= nondigitsCreate());
319 return nondigitsCached
;
321 CharacterClass
* nonspacesCharacterClass()
323 if (!nonspacesCached
)
324 m_userCharacterClasses
.append(nonspacesCached
= nonspacesCreate());
325 return nonspacesCached
;
327 CharacterClass
* nonwordcharCharacterClass()
329 if (!nonwordcharCached
)
330 m_userCharacterClasses
.append(nonwordcharCached
= nonwordcharCreate());
331 return nonwordcharCached
;
336 unsigned m_numSubpatterns
;
337 unsigned m_maxBackReference
;
338 PatternDisjunction
* m_body
;
339 Vector
<PatternDisjunction
*, 4> m_disjunctions
;
340 Vector
<CharacterClass
*> m_userCharacterClasses
;
343 CharacterClass
* newlineCached
;
344 CharacterClass
* digitsCached
;
345 CharacterClass
* spacesCached
;
346 CharacterClass
* wordcharCached
;
347 CharacterClass
* nondigitsCached
;
348 CharacterClass
* nonspacesCached
;
349 CharacterClass
* nonwordcharCached
;
352 } } // namespace JSC::Yarr
356 #endif // RegexPattern_h