2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #include <runtime/UString.h>
31 #include <wtf/CheckedArithmetic.h>
32 #include <wtf/RefCounted.h>
33 #include <wtf/Vector.h>
34 #include <wtf/unicode/Unicode.h>
36 namespace JSC
{ namespace Yarr
{
38 struct PatternDisjunction
;
40 struct CharacterRange
{
44 CharacterRange(UChar begin
, UChar end
)
51 struct CharacterClassTable
: RefCounted
<CharacterClassTable
> {
54 static PassRefPtr
<CharacterClassTable
> create(const char* table
, bool inverted
)
56 return adoptRef(new CharacterClassTable(table
, inverted
));
60 CharacterClassTable(const char* table
, bool inverted
)
62 , m_inverted(inverted
)
67 struct CharacterClass
{
68 WTF_MAKE_FAST_ALLOCATED
;
70 // All CharacterClass instances have to have the full set of matches and ranges,
71 // they may have an optional table for faster lookups (which must match the
72 // specified matches and ranges)
73 CharacterClass(PassRefPtr
<CharacterClassTable
> table
)
77 Vector
<UChar
> m_matches
;
78 Vector
<CharacterRange
> m_ranges
;
79 Vector
<UChar
> m_matchesUnicode
;
80 Vector
<CharacterRange
> m_rangesUnicode
;
81 RefPtr
<CharacterClassTable
> m_table
;
94 TypeAssertionWordBoundary
,
99 TypeParenthesesSubpattern
,
100 TypeParentheticalAssertion
,
101 TypeDotStarEnclosure
,
106 UChar patternCharacter
;
107 CharacterClass
* characterClass
;
108 unsigned backReferenceSubpatternId
;
110 PatternDisjunction
* disjunction
;
111 unsigned subpatternId
;
112 unsigned lastSubpatternId
;
121 QuantifierType quantityType
;
122 Checked
<unsigned> quantityCount
;
124 unsigned frameLocation
;
126 PatternTerm(UChar ch
)
127 : type(PatternTerm::TypePatternCharacter
)
131 patternCharacter
= ch
;
132 quantityType
= QuantifierFixedCount
;
136 PatternTerm(CharacterClass
* charClass
, bool invert
)
137 : type(PatternTerm::TypeCharacterClass
)
141 characterClass
= charClass
;
142 quantityType
= QuantifierFixedCount
;
146 PatternTerm(Type type
, unsigned subpatternId
, PatternDisjunction
* disjunction
, bool capture
= false, bool invert
= false)
151 parentheses
.disjunction
= disjunction
;
152 parentheses
.subpatternId
= subpatternId
;
153 parentheses
.isCopy
= false;
154 parentheses
.isTerminal
= false;
155 quantityType
= QuantifierFixedCount
;
159 PatternTerm(Type type
, bool invert
= false)
164 quantityType
= QuantifierFixedCount
;
168 PatternTerm(unsigned spatternId
)
169 : type(TypeBackReference
)
173 backReferenceSubpatternId
= spatternId
;
174 quantityType
= QuantifierFixedCount
;
178 PatternTerm(bool bolAnchor
, bool eolAnchor
)
179 : type(TypeDotStarEnclosure
)
183 anchors
.bolAnchor
= bolAnchor
;
184 anchors
.eolAnchor
= eolAnchor
;
185 quantityType
= QuantifierFixedCount
;
189 static PatternTerm
ForwardReference()
191 return PatternTerm(TypeForwardReference
);
194 static PatternTerm
BOL()
196 return PatternTerm(TypeAssertionBOL
);
199 static PatternTerm
EOL()
201 return PatternTerm(TypeAssertionEOL
);
204 static PatternTerm
WordBoundary(bool invert
)
206 return PatternTerm(TypeAssertionWordBoundary
, invert
);
219 void quantify(unsigned count
, QuantifierType type
)
221 quantityCount
= count
;
226 struct PatternAlternative
{
227 WTF_MAKE_FAST_ALLOCATED
;
229 PatternAlternative(PatternDisjunction
* disjunction
)
230 : m_parent(disjunction
)
231 , m_onceThrough(false)
232 , m_hasFixedSize(false)
233 , m_startsWithBOL(false)
234 , m_containsBOL(false)
238 PatternTerm
& lastTerm()
240 ASSERT(m_terms
.size());
241 return m_terms
[m_terms
.size() - 1];
244 void removeLastTerm()
246 ASSERT(m_terms
.size());
247 m_terms
.shrink(m_terms
.size() - 1);
250 void setOnceThrough()
252 m_onceThrough
= true;
257 return m_onceThrough
;
260 Vector
<PatternTerm
> m_terms
;
261 PatternDisjunction
* m_parent
;
262 unsigned m_minimumSize
;
263 bool m_onceThrough
: 1;
264 bool m_hasFixedSize
: 1;
265 bool m_startsWithBOL
: 1;
266 bool m_containsBOL
: 1;
269 struct PatternDisjunction
{
270 WTF_MAKE_FAST_ALLOCATED
;
272 PatternDisjunction(PatternAlternative
* parent
= 0)
274 , m_hasFixedSize(false)
278 ~PatternDisjunction()
280 deleteAllValues(m_alternatives
);
283 PatternAlternative
* addNewAlternative()
285 PatternAlternative
* alternative
= new PatternAlternative(this);
286 m_alternatives
.append(alternative
);
290 Vector
<PatternAlternative
*> m_alternatives
;
291 PatternAlternative
* m_parent
;
292 unsigned m_minimumSize
;
293 unsigned m_callFrameSize
;
297 // You probably don't want to be calling these functions directly
298 // (please to be calling newlineCharacterClass() et al on your
299 // friendly neighborhood YarrPattern instance to get nicely
301 CharacterClass
* newlineCreate();
302 CharacterClass
* digitsCreate();
303 CharacterClass
* spacesCreate();
304 CharacterClass
* wordcharCreate();
305 CharacterClass
* nondigitsCreate();
306 CharacterClass
* nonspacesCreate();
307 CharacterClass
* nonwordcharCreate();
310 TermChain(PatternTerm term
)
315 Vector
<TermChain
> hotTerms
;
319 JS_EXPORT_PRIVATE
YarrPattern(const UString
& pattern
, bool ignoreCase
, bool multiline
, const char** error
);
323 deleteAllValues(m_disjunctions
);
324 deleteAllValues(m_userCharacterClasses
);
329 m_numSubpatterns
= 0;
330 m_maxBackReference
= 0;
332 m_containsBackreferences
= false;
333 m_containsBOL
= false;
341 nonwordcharCached
= 0;
343 deleteAllValues(m_disjunctions
);
344 m_disjunctions
.clear();
345 deleteAllValues(m_userCharacterClasses
);
346 m_userCharacterClasses
.clear();
349 bool containsIllegalBackReference()
351 return m_maxBackReference
> m_numSubpatterns
;
354 CharacterClass
* newlineCharacterClass()
357 m_userCharacterClasses
.append(newlineCached
= newlineCreate());
358 return newlineCached
;
360 CharacterClass
* digitsCharacterClass()
363 m_userCharacterClasses
.append(digitsCached
= digitsCreate());
366 CharacterClass
* spacesCharacterClass()
369 m_userCharacterClasses
.append(spacesCached
= spacesCreate());
372 CharacterClass
* wordcharCharacterClass()
375 m_userCharacterClasses
.append(wordcharCached
= wordcharCreate());
376 return wordcharCached
;
378 CharacterClass
* nondigitsCharacterClass()
380 if (!nondigitsCached
)
381 m_userCharacterClasses
.append(nondigitsCached
= nondigitsCreate());
382 return nondigitsCached
;
384 CharacterClass
* nonspacesCharacterClass()
386 if (!nonspacesCached
)
387 m_userCharacterClasses
.append(nonspacesCached
= nonspacesCreate());
388 return nonspacesCached
;
390 CharacterClass
* nonwordcharCharacterClass()
392 if (!nonwordcharCached
)
393 m_userCharacterClasses
.append(nonwordcharCached
= nonwordcharCreate());
394 return nonwordcharCached
;
397 bool m_ignoreCase
: 1;
398 bool m_multiline
: 1;
399 bool m_containsBackreferences
: 1;
400 bool m_containsBOL
: 1;
401 unsigned m_numSubpatterns
;
402 unsigned m_maxBackReference
;
403 PatternDisjunction
* m_body
;
404 Vector
<PatternDisjunction
*, 4> m_disjunctions
;
405 Vector
<CharacterClass
*> m_userCharacterClasses
;
408 const char* compile(const UString
& patternString
);
410 CharacterClass
* newlineCached
;
411 CharacterClass
* digitsCached
;
412 CharacterClass
* spacesCached
;
413 CharacterClass
* wordcharCached
;
414 CharacterClass
* nondigitsCached
;
415 CharacterClass
* nonspacesCached
;
416 CharacterClass
* nonwordcharCached
;
419 } } // namespace JSC::Yarr
421 #endif // YarrPattern_h