2 * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #include <wtf/CheckedArithmetic.h>
31 #include <wtf/OwnPtr.h>
32 #include <wtf/PassOwnPtr.h>
33 #include <wtf/RefCounted.h>
34 #include <wtf/Vector.h>
35 #include <wtf/text/WTFString.h>
36 #include <wtf/unicode/Unicode.h>
38 namespace JSC
{ namespace Yarr
{
40 struct PatternDisjunction
;
42 struct CharacterRange
{
46 CharacterRange(UChar begin
, UChar end
)
53 struct CharacterClass
{
54 WTF_MAKE_FAST_ALLOCATED
;
56 // All CharacterClass instances have to have the full set of matches and ranges,
57 // they may have an optional m_table for faster lookups (which must match the
58 // specified matches and ranges)
63 CharacterClass(const char* table
, bool inverted
)
65 , m_tableInverted(inverted
)
68 Vector
<UChar
> m_matches
;
69 Vector
<CharacterRange
> m_ranges
;
70 Vector
<UChar
> m_matchesUnicode
;
71 Vector
<CharacterRange
> m_rangesUnicode
;
87 TypeAssertionWordBoundary
,
92 TypeParenthesesSubpattern
,
93 TypeParentheticalAssertion
,
99 UChar patternCharacter
;
100 CharacterClass
* characterClass
;
101 unsigned backReferenceSubpatternId
;
103 PatternDisjunction
* disjunction
;
104 unsigned subpatternId
;
105 unsigned lastSubpatternId
;
114 QuantifierType quantityType
;
115 Checked
<unsigned> quantityCount
;
117 unsigned frameLocation
;
119 PatternTerm(UChar ch
)
120 : type(PatternTerm::TypePatternCharacter
)
124 patternCharacter
= ch
;
125 quantityType
= QuantifierFixedCount
;
129 PatternTerm(CharacterClass
* charClass
, bool invert
)
130 : type(PatternTerm::TypeCharacterClass
)
134 characterClass
= charClass
;
135 quantityType
= QuantifierFixedCount
;
139 PatternTerm(Type type
, unsigned subpatternId
, PatternDisjunction
* disjunction
, bool capture
= false, bool invert
= false)
144 parentheses
.disjunction
= disjunction
;
145 parentheses
.subpatternId
= subpatternId
;
146 parentheses
.isCopy
= false;
147 parentheses
.isTerminal
= false;
148 quantityType
= QuantifierFixedCount
;
152 PatternTerm(Type type
, bool invert
= false)
157 quantityType
= QuantifierFixedCount
;
161 PatternTerm(unsigned spatternId
)
162 : type(TypeBackReference
)
166 backReferenceSubpatternId
= spatternId
;
167 quantityType
= QuantifierFixedCount
;
171 PatternTerm(bool bolAnchor
, bool eolAnchor
)
172 : type(TypeDotStarEnclosure
)
176 anchors
.bolAnchor
= bolAnchor
;
177 anchors
.eolAnchor
= eolAnchor
;
178 quantityType
= QuantifierFixedCount
;
182 static PatternTerm
ForwardReference()
184 return PatternTerm(TypeForwardReference
);
187 static PatternTerm
BOL()
189 return PatternTerm(TypeAssertionBOL
);
192 static PatternTerm
EOL()
194 return PatternTerm(TypeAssertionEOL
);
197 static PatternTerm
WordBoundary(bool invert
)
199 return PatternTerm(TypeAssertionWordBoundary
, invert
);
212 void quantify(unsigned count
, QuantifierType type
)
214 quantityCount
= count
;
219 struct PatternAlternative
{
220 WTF_MAKE_FAST_ALLOCATED
;
222 PatternAlternative(PatternDisjunction
* disjunction
)
223 : m_parent(disjunction
)
224 , m_onceThrough(false)
225 , m_hasFixedSize(false)
226 , m_startsWithBOL(false)
227 , m_containsBOL(false)
231 PatternTerm
& lastTerm()
233 ASSERT(m_terms
.size());
234 return m_terms
[m_terms
.size() - 1];
237 void removeLastTerm()
239 ASSERT(m_terms
.size());
240 m_terms
.shrink(m_terms
.size() - 1);
243 void setOnceThrough()
245 m_onceThrough
= true;
250 return m_onceThrough
;
253 Vector
<PatternTerm
> m_terms
;
254 PatternDisjunction
* m_parent
;
255 unsigned m_minimumSize
;
256 bool m_onceThrough
: 1;
257 bool m_hasFixedSize
: 1;
258 bool m_startsWithBOL
: 1;
259 bool m_containsBOL
: 1;
262 struct PatternDisjunction
{
263 WTF_MAKE_FAST_ALLOCATED
;
265 PatternDisjunction(PatternAlternative
* parent
= 0)
267 , m_hasFixedSize(false)
271 PatternAlternative
* addNewAlternative()
273 PatternAlternative
* alternative
= new PatternAlternative(this);
274 m_alternatives
.append(adoptPtr(alternative
));
278 Vector
<OwnPtr
<PatternAlternative
> > m_alternatives
;
279 PatternAlternative
* m_parent
;
280 unsigned m_minimumSize
;
281 unsigned m_callFrameSize
;
285 // You probably don't want to be calling these functions directly
286 // (please to be calling newlineCharacterClass() et al on your
287 // friendly neighborhood YarrPattern instance to get nicely
289 CharacterClass
* newlineCreate();
290 CharacterClass
* digitsCreate();
291 CharacterClass
* spacesCreate();
292 CharacterClass
* wordcharCreate();
293 CharacterClass
* nondigitsCreate();
294 CharacterClass
* nonspacesCreate();
295 CharacterClass
* nonwordcharCreate();
298 TermChain(PatternTerm term
)
303 Vector
<TermChain
> hotTerms
;
307 JS_EXPORT_PRIVATE
YarrPattern(const String
& pattern
, bool ignoreCase
, bool multiline
, const char** error
);
311 m_numSubpatterns
= 0;
312 m_maxBackReference
= 0;
314 m_containsBackreferences
= false;
315 m_containsBOL
= false;
323 nonwordcharCached
= 0;
325 m_disjunctions
.clear();
326 m_userCharacterClasses
.clear();
329 bool containsIllegalBackReference()
331 return m_maxBackReference
> m_numSubpatterns
;
334 CharacterClass
* newlineCharacterClass()
337 m_userCharacterClasses
.append(adoptPtr(newlineCached
= newlineCreate()));
338 return newlineCached
;
340 CharacterClass
* digitsCharacterClass()
343 m_userCharacterClasses
.append(adoptPtr(digitsCached
= digitsCreate()));
346 CharacterClass
* spacesCharacterClass()
349 m_userCharacterClasses
.append(adoptPtr(spacesCached
= spacesCreate()));
352 CharacterClass
* wordcharCharacterClass()
355 m_userCharacterClasses
.append(adoptPtr(wordcharCached
= wordcharCreate()));
356 return wordcharCached
;
358 CharacterClass
* nondigitsCharacterClass()
360 if (!nondigitsCached
)
361 m_userCharacterClasses
.append(adoptPtr(nondigitsCached
= nondigitsCreate()));
362 return nondigitsCached
;
364 CharacterClass
* nonspacesCharacterClass()
366 if (!nonspacesCached
)
367 m_userCharacterClasses
.append(adoptPtr(nonspacesCached
= nonspacesCreate()));
368 return nonspacesCached
;
370 CharacterClass
* nonwordcharCharacterClass()
372 if (!nonwordcharCached
)
373 m_userCharacterClasses
.append(adoptPtr(nonwordcharCached
= nonwordcharCreate()));
374 return nonwordcharCached
;
377 bool m_ignoreCase
: 1;
378 bool m_multiline
: 1;
379 bool m_containsBackreferences
: 1;
380 bool m_containsBOL
: 1;
381 unsigned m_numSubpatterns
;
382 unsigned m_maxBackReference
;
383 PatternDisjunction
* m_body
;
384 Vector
<OwnPtr
<PatternDisjunction
>, 4> m_disjunctions
;
385 Vector
<OwnPtr
<CharacterClass
> > m_userCharacterClasses
;
388 const char* compile(const String
& patternString
);
390 CharacterClass
* newlineCached
;
391 CharacterClass
* digitsCached
;
392 CharacterClass
* spacesCached
;
393 CharacterClass
* wordcharCached
;
394 CharacterClass
* nondigitsCached
;
395 CharacterClass
* nonspacesCached
;
396 CharacterClass
* nonwordcharCached
;
399 } } // namespace JSC::Yarr
401 #endif // YarrPattern_h