2 * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #include <wtf/CheckedArithmetic.h>
31 #include <wtf/OwnPtr.h>
32 #include <wtf/PassOwnPtr.h>
33 #include <wtf/RefCounted.h>
34 #include <wtf/Vector.h>
35 #include <wtf/text/WTFString.h>
37 namespace JSC
{ namespace Yarr
{
39 struct PatternDisjunction
;
41 struct CharacterRange
{
45 CharacterRange(UChar begin
, UChar end
)
52 struct CharacterClass
{
53 WTF_MAKE_FAST_ALLOCATED
;
55 // All CharacterClass instances have to have the full set of matches and ranges,
56 // they may have an optional m_table for faster lookups (which must match the
57 // specified matches and ranges)
62 CharacterClass(const char* table
, bool inverted
)
64 , m_tableInverted(inverted
)
67 Vector
<UChar
> m_matches
;
68 Vector
<CharacterRange
> m_ranges
;
69 Vector
<UChar
> m_matchesUnicode
;
70 Vector
<CharacterRange
> m_rangesUnicode
;
86 TypeAssertionWordBoundary
,
91 TypeParenthesesSubpattern
,
92 TypeParentheticalAssertion
,
98 UChar patternCharacter
;
99 CharacterClass
* characterClass
;
100 unsigned backReferenceSubpatternId
;
102 PatternDisjunction
* disjunction
;
103 unsigned subpatternId
;
104 unsigned lastSubpatternId
;
113 QuantifierType quantityType
;
114 Checked
<unsigned> quantityCount
;
116 unsigned frameLocation
;
118 PatternTerm(UChar ch
)
119 : type(PatternTerm::TypePatternCharacter
)
123 patternCharacter
= ch
;
124 quantityType
= QuantifierFixedCount
;
128 PatternTerm(CharacterClass
* charClass
, bool invert
)
129 : type(PatternTerm::TypeCharacterClass
)
133 characterClass
= charClass
;
134 quantityType
= QuantifierFixedCount
;
138 PatternTerm(Type type
, unsigned subpatternId
, PatternDisjunction
* disjunction
, bool capture
= false, bool invert
= false)
143 parentheses
.disjunction
= disjunction
;
144 parentheses
.subpatternId
= subpatternId
;
145 parentheses
.isCopy
= false;
146 parentheses
.isTerminal
= false;
147 quantityType
= QuantifierFixedCount
;
151 PatternTerm(Type type
, bool invert
= false)
156 quantityType
= QuantifierFixedCount
;
160 PatternTerm(unsigned spatternId
)
161 : type(TypeBackReference
)
165 backReferenceSubpatternId
= spatternId
;
166 quantityType
= QuantifierFixedCount
;
170 PatternTerm(bool bolAnchor
, bool eolAnchor
)
171 : type(TypeDotStarEnclosure
)
175 anchors
.bolAnchor
= bolAnchor
;
176 anchors
.eolAnchor
= eolAnchor
;
177 quantityType
= QuantifierFixedCount
;
181 static PatternTerm
ForwardReference()
183 return PatternTerm(TypeForwardReference
);
186 static PatternTerm
BOL()
188 return PatternTerm(TypeAssertionBOL
);
191 static PatternTerm
EOL()
193 return PatternTerm(TypeAssertionEOL
);
196 static PatternTerm
WordBoundary(bool invert
)
198 return PatternTerm(TypeAssertionWordBoundary
, invert
);
211 void quantify(unsigned count
, QuantifierType type
)
213 quantityCount
= count
;
218 struct PatternAlternative
{
219 WTF_MAKE_FAST_ALLOCATED
;
221 PatternAlternative(PatternDisjunction
* disjunction
)
222 : m_parent(disjunction
)
223 , m_onceThrough(false)
224 , m_hasFixedSize(false)
225 , m_startsWithBOL(false)
226 , m_containsBOL(false)
230 PatternTerm
& lastTerm()
232 ASSERT(m_terms
.size());
233 return m_terms
[m_terms
.size() - 1];
236 void removeLastTerm()
238 ASSERT(m_terms
.size());
239 m_terms
.shrink(m_terms
.size() - 1);
242 void setOnceThrough()
244 m_onceThrough
= true;
249 return m_onceThrough
;
252 Vector
<PatternTerm
> m_terms
;
253 PatternDisjunction
* m_parent
;
254 unsigned m_minimumSize
;
255 bool m_onceThrough
: 1;
256 bool m_hasFixedSize
: 1;
257 bool m_startsWithBOL
: 1;
258 bool m_containsBOL
: 1;
261 struct PatternDisjunction
{
262 WTF_MAKE_FAST_ALLOCATED
;
264 PatternDisjunction(PatternAlternative
* parent
= 0)
266 , m_hasFixedSize(false)
270 PatternAlternative
* addNewAlternative()
272 PatternAlternative
* alternative
= new PatternAlternative(this);
273 m_alternatives
.append(adoptPtr(alternative
));
277 Vector
<OwnPtr
<PatternAlternative
>> m_alternatives
;
278 PatternAlternative
* m_parent
;
279 unsigned m_minimumSize
;
280 unsigned m_callFrameSize
;
284 // You probably don't want to be calling these functions directly
285 // (please to be calling newlineCharacterClass() et al on your
286 // friendly neighborhood YarrPattern instance to get nicely
288 CharacterClass
* newlineCreate();
289 CharacterClass
* digitsCreate();
290 CharacterClass
* spacesCreate();
291 CharacterClass
* wordcharCreate();
292 CharacterClass
* nondigitsCreate();
293 CharacterClass
* nonspacesCreate();
294 CharacterClass
* nonwordcharCreate();
297 TermChain(PatternTerm term
)
302 Vector
<TermChain
> hotTerms
;
306 JS_EXPORT_PRIVATE
YarrPattern(const String
& pattern
, bool ignoreCase
, bool multiline
, const char** error
);
310 m_numSubpatterns
= 0;
311 m_maxBackReference
= 0;
313 m_containsBackreferences
= false;
314 m_containsBOL
= false;
315 m_containsUnsignedLengthPattern
= false;
323 nonwordcharCached
= 0;
325 m_disjunctions
.clear();
326 m_userCharacterClasses
.clear();
329 bool containsIllegalBackReference()
331 return m_maxBackReference
> m_numSubpatterns
;
334 bool containsUnsignedLengthPattern()
336 return m_containsUnsignedLengthPattern
;
339 CharacterClass
* newlineCharacterClass()
342 m_userCharacterClasses
.append(adoptPtr(newlineCached
= newlineCreate()));
343 return newlineCached
;
345 CharacterClass
* digitsCharacterClass()
348 m_userCharacterClasses
.append(adoptPtr(digitsCached
= digitsCreate()));
351 CharacterClass
* spacesCharacterClass()
354 m_userCharacterClasses
.append(adoptPtr(spacesCached
= spacesCreate()));
357 CharacterClass
* wordcharCharacterClass()
360 m_userCharacterClasses
.append(adoptPtr(wordcharCached
= wordcharCreate()));
361 return wordcharCached
;
363 CharacterClass
* nondigitsCharacterClass()
365 if (!nondigitsCached
)
366 m_userCharacterClasses
.append(adoptPtr(nondigitsCached
= nondigitsCreate()));
367 return nondigitsCached
;
369 CharacterClass
* nonspacesCharacterClass()
371 if (!nonspacesCached
)
372 m_userCharacterClasses
.append(adoptPtr(nonspacesCached
= nonspacesCreate()));
373 return nonspacesCached
;
375 CharacterClass
* nonwordcharCharacterClass()
377 if (!nonwordcharCached
)
378 m_userCharacterClasses
.append(adoptPtr(nonwordcharCached
= nonwordcharCreate()));
379 return nonwordcharCached
;
382 bool m_ignoreCase
: 1;
383 bool m_multiline
: 1;
384 bool m_containsBackreferences
: 1;
385 bool m_containsBOL
: 1;
386 bool m_containsUnsignedLengthPattern
: 1;
387 unsigned m_numSubpatterns
;
388 unsigned m_maxBackReference
;
389 PatternDisjunction
* m_body
;
390 Vector
<OwnPtr
<PatternDisjunction
>, 4> m_disjunctions
;
391 Vector
<OwnPtr
<CharacterClass
>> m_userCharacterClasses
;
394 const char* compile(const String
& patternString
);
396 CharacterClass
* newlineCached
;
397 CharacterClass
* digitsCached
;
398 CharacterClass
* spacesCached
;
399 CharacterClass
* wordcharCached
;
400 CharacterClass
* nondigitsCached
;
401 CharacterClass
* nonspacesCached
;
402 CharacterClass
* nonwordcharCached
;
405 } } // namespace JSC::Yarr
407 #endif // YarrPattern_h