2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #include <runtime/UString.h>
31 #include <wtf/CheckedArithmetic.h>
32 #include <wtf/Vector.h>
33 #include <wtf/unicode/Unicode.h>
35 namespace JSC
{ namespace Yarr
{
37 struct PatternDisjunction
;
39 struct CharacterRange
{
43 CharacterRange(UChar begin
, UChar end
)
50 struct CharacterClassTable
: RefCounted
<CharacterClassTable
> {
53 static PassRefPtr
<CharacterClassTable
> create(const char* table
, bool inverted
)
55 return adoptRef(new CharacterClassTable(table
, inverted
));
59 CharacterClassTable(const char* table
, bool inverted
)
61 , m_inverted(inverted
)
66 struct CharacterClass
{
67 WTF_MAKE_FAST_ALLOCATED
;
69 // All CharacterClass instances have to have the full set of matches and ranges,
70 // they may have an optional table for faster lookups (which must match the
71 // specified matches and ranges)
72 CharacterClass(PassRefPtr
<CharacterClassTable
> table
)
76 Vector
<UChar
> m_matches
;
77 Vector
<CharacterRange
> m_ranges
;
78 Vector
<UChar
> m_matchesUnicode
;
79 Vector
<CharacterRange
> m_rangesUnicode
;
80 RefPtr
<CharacterClassTable
> m_table
;
93 TypeAssertionWordBoundary
,
98 TypeParenthesesSubpattern
,
99 TypeParentheticalAssertion
,
100 TypeDotStarEnclosure
,
105 UChar patternCharacter
;
106 CharacterClass
* characterClass
;
107 unsigned backReferenceSubpatternId
;
109 PatternDisjunction
* disjunction
;
110 unsigned subpatternId
;
111 unsigned lastSubpatternId
;
120 QuantifierType quantityType
;
121 Checked
<unsigned> quantityCount
;
123 unsigned frameLocation
;
125 PatternTerm(UChar ch
)
126 : type(PatternTerm::TypePatternCharacter
)
130 patternCharacter
= ch
;
131 quantityType
= QuantifierFixedCount
;
135 PatternTerm(CharacterClass
* charClass
, bool invert
)
136 : type(PatternTerm::TypeCharacterClass
)
140 characterClass
= charClass
;
141 quantityType
= QuantifierFixedCount
;
145 PatternTerm(Type type
, unsigned subpatternId
, PatternDisjunction
* disjunction
, bool capture
= false, bool invert
= false)
150 parentheses
.disjunction
= disjunction
;
151 parentheses
.subpatternId
= subpatternId
;
152 parentheses
.isCopy
= false;
153 parentheses
.isTerminal
= false;
154 quantityType
= QuantifierFixedCount
;
158 PatternTerm(Type type
, bool invert
= false)
163 quantityType
= QuantifierFixedCount
;
167 PatternTerm(unsigned spatternId
)
168 : type(TypeBackReference
)
172 backReferenceSubpatternId
= spatternId
;
173 quantityType
= QuantifierFixedCount
;
177 PatternTerm(bool bolAnchor
, bool eolAnchor
)
178 : type(TypeDotStarEnclosure
)
182 anchors
.bolAnchor
= bolAnchor
;
183 anchors
.eolAnchor
= eolAnchor
;
184 quantityType
= QuantifierFixedCount
;
188 static PatternTerm
ForwardReference()
190 return PatternTerm(TypeForwardReference
);
193 static PatternTerm
BOL()
195 return PatternTerm(TypeAssertionBOL
);
198 static PatternTerm
EOL()
200 return PatternTerm(TypeAssertionEOL
);
203 static PatternTerm
WordBoundary(bool invert
)
205 return PatternTerm(TypeAssertionWordBoundary
, invert
);
218 void quantify(unsigned count
, QuantifierType type
)
220 quantityCount
= count
;
225 struct PatternAlternative
{
226 WTF_MAKE_FAST_ALLOCATED
;
228 PatternAlternative(PatternDisjunction
* disjunction
)
229 : m_parent(disjunction
)
230 , m_onceThrough(false)
231 , m_hasFixedSize(false)
232 , m_startsWithBOL(false)
233 , m_containsBOL(false)
237 PatternTerm
& lastTerm()
239 ASSERT(m_terms
.size());
240 return m_terms
[m_terms
.size() - 1];
243 void removeLastTerm()
245 ASSERT(m_terms
.size());
246 m_terms
.shrink(m_terms
.size() - 1);
249 void setOnceThrough()
251 m_onceThrough
= true;
256 return m_onceThrough
;
259 Vector
<PatternTerm
> m_terms
;
260 PatternDisjunction
* m_parent
;
261 unsigned m_minimumSize
;
262 bool m_onceThrough
: 1;
263 bool m_hasFixedSize
: 1;
264 bool m_startsWithBOL
: 1;
265 bool m_containsBOL
: 1;
268 struct PatternDisjunction
{
269 WTF_MAKE_FAST_ALLOCATED
;
271 PatternDisjunction(PatternAlternative
* parent
= 0)
273 , m_hasFixedSize(false)
277 ~PatternDisjunction()
279 deleteAllValues(m_alternatives
);
282 PatternAlternative
* addNewAlternative()
284 PatternAlternative
* alternative
= new PatternAlternative(this);
285 m_alternatives
.append(alternative
);
289 Vector
<PatternAlternative
*> m_alternatives
;
290 PatternAlternative
* m_parent
;
291 unsigned m_minimumSize
;
292 unsigned m_callFrameSize
;
296 // You probably don't want to be calling these functions directly
297 // (please to be calling newlineCharacterClass() et al on your
298 // friendly neighborhood YarrPattern instance to get nicely
300 CharacterClass
* newlineCreate();
301 CharacterClass
* digitsCreate();
302 CharacterClass
* spacesCreate();
303 CharacterClass
* wordcharCreate();
304 CharacterClass
* nondigitsCreate();
305 CharacterClass
* nonspacesCreate();
306 CharacterClass
* nonwordcharCreate();
309 TermChain(PatternTerm term
)
314 Vector
<TermChain
> hotTerms
;
318 YarrPattern(const UString
& pattern
, bool ignoreCase
, bool multiline
, const char** error
);
322 deleteAllValues(m_disjunctions
);
323 deleteAllValues(m_userCharacterClasses
);
328 m_numSubpatterns
= 0;
329 m_maxBackReference
= 0;
331 m_containsBackreferences
= false;
332 m_containsBOL
= false;
340 nonwordcharCached
= 0;
342 deleteAllValues(m_disjunctions
);
343 m_disjunctions
.clear();
344 deleteAllValues(m_userCharacterClasses
);
345 m_userCharacterClasses
.clear();
348 bool containsIllegalBackReference()
350 return m_maxBackReference
> m_numSubpatterns
;
353 CharacterClass
* newlineCharacterClass()
356 m_userCharacterClasses
.append(newlineCached
= newlineCreate());
357 return newlineCached
;
359 CharacterClass
* digitsCharacterClass()
362 m_userCharacterClasses
.append(digitsCached
= digitsCreate());
365 CharacterClass
* spacesCharacterClass()
368 m_userCharacterClasses
.append(spacesCached
= spacesCreate());
371 CharacterClass
* wordcharCharacterClass()
374 m_userCharacterClasses
.append(wordcharCached
= wordcharCreate());
375 return wordcharCached
;
377 CharacterClass
* nondigitsCharacterClass()
379 if (!nondigitsCached
)
380 m_userCharacterClasses
.append(nondigitsCached
= nondigitsCreate());
381 return nondigitsCached
;
383 CharacterClass
* nonspacesCharacterClass()
385 if (!nonspacesCached
)
386 m_userCharacterClasses
.append(nonspacesCached
= nonspacesCreate());
387 return nonspacesCached
;
389 CharacterClass
* nonwordcharCharacterClass()
391 if (!nonwordcharCached
)
392 m_userCharacterClasses
.append(nonwordcharCached
= nonwordcharCreate());
393 return nonwordcharCached
;
396 bool m_ignoreCase
: 1;
397 bool m_multiline
: 1;
398 bool m_containsBackreferences
: 1;
399 bool m_containsBOL
: 1;
400 unsigned m_numSubpatterns
;
401 unsigned m_maxBackReference
;
402 PatternDisjunction
* m_body
;
403 Vector
<PatternDisjunction
*, 4> m_disjunctions
;
404 Vector
<CharacterClass
*> m_userCharacterClasses
;
407 const char* compile(const UString
& patternString
);
409 CharacterClass
* newlineCached
;
410 CharacterClass
* digitsCached
;
411 CharacterClass
* spacesCached
;
412 CharacterClass
* wordcharCached
;
413 CharacterClass
* nondigitsCached
;
414 CharacterClass
* nonspacesCached
;
415 CharacterClass
* nonwordcharCached
;
418 } } // namespace JSC::Yarr
420 #endif // YarrPattern_h