2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #include <runtime/UString.h>
31 #include <wtf/Vector.h>
32 #include <wtf/unicode/Unicode.h>
34 namespace JSC
{ namespace Yarr
{
36 struct PatternDisjunction
;
38 struct CharacterRange
{
42 CharacterRange(UChar begin
, UChar end
)
49 struct CharacterClassTable
: RefCounted
<CharacterClassTable
> {
52 static PassRefPtr
<CharacterClassTable
> create(const char* table
, bool inverted
)
54 return adoptRef(new CharacterClassTable(table
, inverted
));
58 CharacterClassTable(const char* table
, bool inverted
)
60 , m_inverted(inverted
)
65 struct CharacterClass
{
66 WTF_MAKE_FAST_ALLOCATED
;
68 // All CharacterClass instances have to have the full set of matches and ranges,
69 // they may have an optional table for faster lookups (which must match the
70 // specified matches and ranges)
71 CharacterClass(PassRefPtr
<CharacterClassTable
> table
)
75 Vector
<UChar
> m_matches
;
76 Vector
<CharacterRange
> m_ranges
;
77 Vector
<UChar
> m_matchesUnicode
;
78 Vector
<CharacterRange
> m_rangesUnicode
;
79 RefPtr
<CharacterClassTable
> m_table
;
92 TypeAssertionWordBoundary
,
97 TypeParenthesesSubpattern
,
98 TypeParentheticalAssertion
,
104 UChar patternCharacter
;
105 CharacterClass
* characterClass
;
106 unsigned backReferenceSubpatternId
;
108 PatternDisjunction
* disjunction
;
109 unsigned subpatternId
;
110 unsigned lastSubpatternId
;
119 QuantifierType quantityType
;
120 unsigned quantityCount
;
122 unsigned frameLocation
;
124 PatternTerm(UChar ch
)
125 : type(PatternTerm::TypePatternCharacter
)
129 patternCharacter
= ch
;
130 quantityType
= QuantifierFixedCount
;
134 PatternTerm(CharacterClass
* charClass
, bool invert
)
135 : type(PatternTerm::TypeCharacterClass
)
139 characterClass
= charClass
;
140 quantityType
= QuantifierFixedCount
;
144 PatternTerm(Type type
, unsigned subpatternId
, PatternDisjunction
* disjunction
, bool capture
= false, bool invert
= false)
149 parentheses
.disjunction
= disjunction
;
150 parentheses
.subpatternId
= subpatternId
;
151 parentheses
.isCopy
= false;
152 parentheses
.isTerminal
= false;
153 quantityType
= QuantifierFixedCount
;
157 PatternTerm(Type type
, bool invert
= false)
162 quantityType
= QuantifierFixedCount
;
166 PatternTerm(unsigned spatternId
)
167 : type(TypeBackReference
)
171 backReferenceSubpatternId
= spatternId
;
172 quantityType
= QuantifierFixedCount
;
176 PatternTerm(bool bolAnchor
, bool eolAnchor
)
177 : type(TypeDotStarEnclosure
)
181 anchors
.bolAnchor
= bolAnchor
;
182 anchors
.eolAnchor
= eolAnchor
;
183 quantityType
= QuantifierFixedCount
;
187 static PatternTerm
ForwardReference()
189 return PatternTerm(TypeForwardReference
);
192 static PatternTerm
BOL()
194 return PatternTerm(TypeAssertionBOL
);
197 static PatternTerm
EOL()
199 return PatternTerm(TypeAssertionEOL
);
202 static PatternTerm
WordBoundary(bool invert
)
204 return PatternTerm(TypeAssertionWordBoundary
, invert
);
217 void quantify(unsigned count
, QuantifierType type
)
219 quantityCount
= count
;
224 struct PatternAlternative
{
225 WTF_MAKE_FAST_ALLOCATED
;
227 PatternAlternative(PatternDisjunction
* disjunction
)
228 : m_parent(disjunction
)
229 , m_onceThrough(false)
230 , m_hasFixedSize(false)
231 , m_startsWithBOL(false)
232 , m_containsBOL(false)
236 PatternTerm
& lastTerm()
238 ASSERT(m_terms
.size());
239 return m_terms
[m_terms
.size() - 1];
242 void removeLastTerm()
244 ASSERT(m_terms
.size());
245 m_terms
.shrink(m_terms
.size() - 1);
248 void setOnceThrough()
250 m_onceThrough
= true;
255 return m_onceThrough
;
258 Vector
<PatternTerm
> m_terms
;
259 PatternDisjunction
* m_parent
;
260 unsigned m_minimumSize
;
261 bool m_onceThrough
: 1;
262 bool m_hasFixedSize
: 1;
263 bool m_startsWithBOL
: 1;
264 bool m_containsBOL
: 1;
267 struct PatternDisjunction
{
268 WTF_MAKE_FAST_ALLOCATED
;
270 PatternDisjunction(PatternAlternative
* parent
= 0)
272 , m_hasFixedSize(false)
276 ~PatternDisjunction()
278 deleteAllValues(m_alternatives
);
281 PatternAlternative
* addNewAlternative()
283 PatternAlternative
* alternative
= new PatternAlternative(this);
284 m_alternatives
.append(alternative
);
288 Vector
<PatternAlternative
*> m_alternatives
;
289 PatternAlternative
* m_parent
;
290 unsigned m_minimumSize
;
291 unsigned m_callFrameSize
;
295 // You probably don't want to be calling these functions directly
296 // (please to be calling newlineCharacterClass() et al on your
297 // friendly neighborhood YarrPattern instance to get nicely
299 CharacterClass
* newlineCreate();
300 CharacterClass
* digitsCreate();
301 CharacterClass
* spacesCreate();
302 CharacterClass
* wordcharCreate();
303 CharacterClass
* nondigitsCreate();
304 CharacterClass
* nonspacesCreate();
305 CharacterClass
* nonwordcharCreate();
308 TermChain(PatternTerm term
)
313 Vector
<TermChain
> hotTerms
;
317 YarrPattern(const UString
& pattern
, bool ignoreCase
, bool multiline
, const char** error
);
321 deleteAllValues(m_disjunctions
);
322 deleteAllValues(m_userCharacterClasses
);
327 m_numSubpatterns
= 0;
328 m_maxBackReference
= 0;
330 m_containsBackreferences
= false;
331 m_containsBOL
= false;
339 nonwordcharCached
= 0;
341 deleteAllValues(m_disjunctions
);
342 m_disjunctions
.clear();
343 deleteAllValues(m_userCharacterClasses
);
344 m_userCharacterClasses
.clear();
347 bool containsIllegalBackReference()
349 return m_maxBackReference
> m_numSubpatterns
;
352 CharacterClass
* newlineCharacterClass()
355 m_userCharacterClasses
.append(newlineCached
= newlineCreate());
356 return newlineCached
;
358 CharacterClass
* digitsCharacterClass()
361 m_userCharacterClasses
.append(digitsCached
= digitsCreate());
364 CharacterClass
* spacesCharacterClass()
367 m_userCharacterClasses
.append(spacesCached
= spacesCreate());
370 CharacterClass
* wordcharCharacterClass()
373 m_userCharacterClasses
.append(wordcharCached
= wordcharCreate());
374 return wordcharCached
;
376 CharacterClass
* nondigitsCharacterClass()
378 if (!nondigitsCached
)
379 m_userCharacterClasses
.append(nondigitsCached
= nondigitsCreate());
380 return nondigitsCached
;
382 CharacterClass
* nonspacesCharacterClass()
384 if (!nonspacesCached
)
385 m_userCharacterClasses
.append(nonspacesCached
= nonspacesCreate());
386 return nonspacesCached
;
388 CharacterClass
* nonwordcharCharacterClass()
390 if (!nonwordcharCached
)
391 m_userCharacterClasses
.append(nonwordcharCached
= nonwordcharCreate());
392 return nonwordcharCached
;
395 bool m_ignoreCase
: 1;
396 bool m_multiline
: 1;
397 bool m_containsBackreferences
: 1;
398 bool m_containsBOL
: 1;
399 unsigned m_numSubpatterns
;
400 unsigned m_maxBackReference
;
401 PatternDisjunction
* m_body
;
402 Vector
<PatternDisjunction
*, 4> m_disjunctions
;
403 Vector
<CharacterClass
*> m_userCharacterClasses
;
406 const char* compile(const UString
& patternString
);
408 CharacterClass
* newlineCached
;
409 CharacterClass
* digitsCached
;
410 CharacterClass
* spacesCached
;
411 CharacterClass
* wordcharCached
;
412 CharacterClass
* nondigitsCached
;
413 CharacterClass
* nonspacesCached
;
414 CharacterClass
* nonwordcharCached
;
417 } } // namespace JSC::Yarr
419 #endif // YarrPattern_h