2 * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #include <wtf/CheckedArithmetic.h>
31 #include <wtf/RefCounted.h>
32 #include <wtf/Vector.h>
33 #include <wtf/text/WTFString.h>
35 namespace JSC
{ namespace Yarr
{
37 struct PatternDisjunction
;
39 struct CharacterRange
{
43 CharacterRange(UChar begin
, UChar end
)
50 struct CharacterClass
{
51 WTF_MAKE_FAST_ALLOCATED
;
53 // All CharacterClass instances have to have the full set of matches and ranges,
54 // they may have an optional m_table for faster lookups (which must match the
55 // specified matches and ranges)
60 CharacterClass(const char* table
, bool inverted
)
62 , m_tableInverted(inverted
)
65 Vector
<UChar
> m_matches
;
66 Vector
<CharacterRange
> m_ranges
;
67 Vector
<UChar
> m_matchesUnicode
;
68 Vector
<CharacterRange
> m_rangesUnicode
;
84 TypeAssertionWordBoundary
,
89 TypeParenthesesSubpattern
,
90 TypeParentheticalAssertion
,
96 UChar patternCharacter
;
97 CharacterClass
* characterClass
;
98 unsigned backReferenceSubpatternId
;
100 PatternDisjunction
* disjunction
;
101 unsigned subpatternId
;
102 unsigned lastSubpatternId
;
111 QuantifierType quantityType
;
112 Checked
<unsigned> quantityCount
;
114 unsigned frameLocation
;
116 PatternTerm(UChar ch
)
117 : type(PatternTerm::TypePatternCharacter
)
121 patternCharacter
= ch
;
122 quantityType
= QuantifierFixedCount
;
126 PatternTerm(CharacterClass
* charClass
, bool invert
)
127 : type(PatternTerm::TypeCharacterClass
)
131 characterClass
= charClass
;
132 quantityType
= QuantifierFixedCount
;
136 PatternTerm(Type type
, unsigned subpatternId
, PatternDisjunction
* disjunction
, bool capture
= false, bool invert
= false)
141 parentheses
.disjunction
= disjunction
;
142 parentheses
.subpatternId
= subpatternId
;
143 parentheses
.isCopy
= false;
144 parentheses
.isTerminal
= false;
145 quantityType
= QuantifierFixedCount
;
149 PatternTerm(Type type
, bool invert
= false)
154 quantityType
= QuantifierFixedCount
;
158 PatternTerm(unsigned spatternId
)
159 : type(TypeBackReference
)
163 backReferenceSubpatternId
= spatternId
;
164 quantityType
= QuantifierFixedCount
;
168 PatternTerm(bool bolAnchor
, bool eolAnchor
)
169 : type(TypeDotStarEnclosure
)
173 anchors
.bolAnchor
= bolAnchor
;
174 anchors
.eolAnchor
= eolAnchor
;
175 quantityType
= QuantifierFixedCount
;
179 static PatternTerm
ForwardReference()
181 return PatternTerm(TypeForwardReference
);
184 static PatternTerm
BOL()
186 return PatternTerm(TypeAssertionBOL
);
189 static PatternTerm
EOL()
191 return PatternTerm(TypeAssertionEOL
);
194 static PatternTerm
WordBoundary(bool invert
)
196 return PatternTerm(TypeAssertionWordBoundary
, invert
);
209 void quantify(unsigned count
, QuantifierType type
)
211 quantityCount
= count
;
216 struct PatternAlternative
{
217 WTF_MAKE_FAST_ALLOCATED
;
219 PatternAlternative(PatternDisjunction
* disjunction
)
220 : m_parent(disjunction
)
221 , m_onceThrough(false)
222 , m_hasFixedSize(false)
223 , m_startsWithBOL(false)
224 , m_containsBOL(false)
228 PatternTerm
& lastTerm()
230 ASSERT(m_terms
.size());
231 return m_terms
[m_terms
.size() - 1];
234 void removeLastTerm()
236 ASSERT(m_terms
.size());
237 m_terms
.shrink(m_terms
.size() - 1);
240 void setOnceThrough()
242 m_onceThrough
= true;
247 return m_onceThrough
;
250 Vector
<PatternTerm
> m_terms
;
251 PatternDisjunction
* m_parent
;
252 unsigned m_minimumSize
;
253 bool m_onceThrough
: 1;
254 bool m_hasFixedSize
: 1;
255 bool m_startsWithBOL
: 1;
256 bool m_containsBOL
: 1;
259 struct PatternDisjunction
{
260 WTF_MAKE_FAST_ALLOCATED
;
262 PatternDisjunction(PatternAlternative
* parent
= 0)
264 , m_hasFixedSize(false)
268 PatternAlternative
* addNewAlternative()
270 m_alternatives
.append(std::make_unique
<PatternAlternative
>(this));
271 return static_cast<PatternAlternative
*>(m_alternatives
.last().get());
274 Vector
<std::unique_ptr
<PatternAlternative
>> m_alternatives
;
275 PatternAlternative
* m_parent
;
276 unsigned m_minimumSize
;
277 unsigned m_callFrameSize
;
281 // You probably don't want to be calling these functions directly
282 // (please to be calling newlineCharacterClass() et al on your
283 // friendly neighborhood YarrPattern instance to get nicely
285 std::unique_ptr
<CharacterClass
> newlineCreate();
286 std::unique_ptr
<CharacterClass
> digitsCreate();
287 std::unique_ptr
<CharacterClass
> spacesCreate();
288 std::unique_ptr
<CharacterClass
> wordcharCreate();
289 std::unique_ptr
<CharacterClass
> nondigitsCreate();
290 std::unique_ptr
<CharacterClass
> nonspacesCreate();
291 std::unique_ptr
<CharacterClass
> nonwordcharCreate();
294 TermChain(PatternTerm term
)
299 Vector
<TermChain
> hotTerms
;
303 JS_EXPORT_PRIVATE
YarrPattern(const String
& pattern
, bool ignoreCase
, bool multiline
, const char** error
);
307 m_numSubpatterns
= 0;
308 m_maxBackReference
= 0;
310 m_containsBackreferences
= false;
311 m_containsBOL
= false;
312 m_containsUnsignedLengthPattern
= false;
320 nonwordcharCached
= 0;
322 m_disjunctions
.clear();
323 m_userCharacterClasses
.clear();
326 bool containsIllegalBackReference()
328 return m_maxBackReference
> m_numSubpatterns
;
331 bool containsUnsignedLengthPattern()
333 return m_containsUnsignedLengthPattern
;
336 CharacterClass
* newlineCharacterClass()
338 if (!newlineCached
) {
339 m_userCharacterClasses
.append(newlineCreate());
340 newlineCached
= m_userCharacterClasses
.last().get();
342 return newlineCached
;
344 CharacterClass
* digitsCharacterClass()
347 m_userCharacterClasses
.append(digitsCreate());
348 digitsCached
= m_userCharacterClasses
.last().get();
352 CharacterClass
* spacesCharacterClass()
355 m_userCharacterClasses
.append(spacesCreate());
356 spacesCached
= m_userCharacterClasses
.last().get();
360 CharacterClass
* wordcharCharacterClass()
362 if (!wordcharCached
) {
363 m_userCharacterClasses
.append(wordcharCreate());
364 wordcharCached
= m_userCharacterClasses
.last().get();
366 return wordcharCached
;
368 CharacterClass
* nondigitsCharacterClass()
370 if (!nondigitsCached
) {
371 m_userCharacterClasses
.append(nondigitsCreate());
372 nondigitsCached
= m_userCharacterClasses
.last().get();
374 return nondigitsCached
;
376 CharacterClass
* nonspacesCharacterClass()
378 if (!nonspacesCached
) {
379 m_userCharacterClasses
.append(nonspacesCreate());
380 nonspacesCached
= m_userCharacterClasses
.last().get();
382 return nonspacesCached
;
384 CharacterClass
* nonwordcharCharacterClass()
386 if (!nonwordcharCached
) {
387 m_userCharacterClasses
.append(nonwordcharCreate());
388 nonwordcharCached
= m_userCharacterClasses
.last().get();
390 return nonwordcharCached
;
393 bool m_ignoreCase
: 1;
394 bool m_multiline
: 1;
395 bool m_containsBackreferences
: 1;
396 bool m_containsBOL
: 1;
397 bool m_containsUnsignedLengthPattern
: 1;
398 unsigned m_numSubpatterns
;
399 unsigned m_maxBackReference
;
400 PatternDisjunction
* m_body
;
401 Vector
<std::unique_ptr
<PatternDisjunction
>, 4> m_disjunctions
;
402 Vector
<std::unique_ptr
<CharacterClass
>> m_userCharacterClasses
;
405 const char* compile(const String
& patternString
);
407 CharacterClass
* newlineCached
;
408 CharacterClass
* digitsCached
;
409 CharacterClass
* spacesCached
;
410 CharacterClass
* wordcharCached
;
411 CharacterClass
* nondigitsCached
;
412 CharacterClass
* nonspacesCached
;
413 CharacterClass
* nonwordcharCached
;
416 } } // namespace JSC::Yarr
418 #endif // YarrPattern_h