2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef YarrInterpreter_h
27 #define YarrInterpreter_h
29 #include "YarrPattern.h"
32 class BumpPointerAllocator
;
34 using WTF::BumpPointerAllocator
;
36 namespace JSC
{ namespace Yarr
{
38 class ByteDisjunction
;
42 TypeBodyAlternativeBegin
,
43 TypeBodyAlternativeDisjunction
,
44 TypeBodyAlternativeEnd
,
46 TypeAlternativeDisjunction
,
52 TypeAssertionWordBoundary
,
53 TypePatternCharacterOnce
,
54 TypePatternCharacterFixed
,
55 TypePatternCharacterGreedy
,
56 TypePatternCharacterNonGreedy
,
57 TypePatternCasedCharacterOnce
,
58 TypePatternCasedCharacterFixed
,
59 TypePatternCasedCharacterGreedy
,
60 TypePatternCasedCharacterNonGreedy
,
63 TypeParenthesesSubpattern
,
64 TypeParenthesesSubpatternOnceBegin
,
65 TypeParenthesesSubpatternOnceEnd
,
66 TypeParenthesesSubpatternTerminalBegin
,
67 TypeParenthesesSubpatternTerminalEnd
,
68 TypeParentheticalAssertionBegin
,
69 TypeParentheticalAssertionEnd
,
77 UChar patternCharacter
;
82 CharacterClass
* characterClass
;
83 unsigned subpatternId
;
86 ByteDisjunction
* parenthesesDisjunction
;
87 unsigned parenthesesWidth
;
89 QuantifierType quantityType
;
90 unsigned quantityCount
;
101 unsigned checkInputCount
;
103 unsigned frameLocation
;
106 unsigned inputPosition
;
108 ByteTerm(UChar ch
, int inputPos
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
109 : frameLocation(frameLocation
)
113 switch (quantityType
) {
114 case QuantifierFixedCount
:
115 type
= (quantityCount
== 1) ? ByteTerm::TypePatternCharacterOnce
: ByteTerm::TypePatternCharacterFixed
;
117 case QuantifierGreedy
:
118 type
= ByteTerm::TypePatternCharacterGreedy
;
120 case QuantifierNonGreedy
:
121 type
= ByteTerm::TypePatternCharacterNonGreedy
;
125 atom
.patternCharacter
= ch
;
126 atom
.quantityType
= quantityType
;
127 atom
.quantityCount
= quantityCount
.unsafeGet();
128 inputPosition
= inputPos
;
131 ByteTerm(UChar lo
, UChar hi
, int inputPos
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
132 : frameLocation(frameLocation
)
136 switch (quantityType
) {
137 case QuantifierFixedCount
:
138 type
= (quantityCount
== 1) ? ByteTerm::TypePatternCasedCharacterOnce
: ByteTerm::TypePatternCasedCharacterFixed
;
140 case QuantifierGreedy
:
141 type
= ByteTerm::TypePatternCasedCharacterGreedy
;
143 case QuantifierNonGreedy
:
144 type
= ByteTerm::TypePatternCasedCharacterNonGreedy
;
148 atom
.casedCharacter
.lo
= lo
;
149 atom
.casedCharacter
.hi
= hi
;
150 atom
.quantityType
= quantityType
;
151 atom
.quantityCount
= quantityCount
.unsafeGet();
152 inputPosition
= inputPos
;
155 ByteTerm(CharacterClass
* characterClass
, bool invert
, int inputPos
)
156 : type(ByteTerm::TypeCharacterClass
)
160 atom
.characterClass
= characterClass
;
161 atom
.quantityType
= QuantifierFixedCount
;
162 atom
.quantityCount
= 1;
163 inputPosition
= inputPos
;
166 ByteTerm(Type type
, unsigned subpatternId
, ByteDisjunction
* parenthesesInfo
, bool capture
, int inputPos
)
171 atom
.subpatternId
= subpatternId
;
172 atom
.parenthesesDisjunction
= parenthesesInfo
;
173 atom
.quantityType
= QuantifierFixedCount
;
174 atom
.quantityCount
= 1;
175 inputPosition
= inputPos
;
178 ByteTerm(Type type
, bool invert
= false)
183 atom
.quantityType
= QuantifierFixedCount
;
184 atom
.quantityCount
= 1;
187 ByteTerm(Type type
, unsigned subpatternId
, bool capture
, bool invert
, int inputPos
)
192 atom
.subpatternId
= subpatternId
;
193 atom
.quantityType
= QuantifierFixedCount
;
194 atom
.quantityCount
= 1;
195 inputPosition
= inputPos
;
198 static ByteTerm
BOL(int inputPos
)
200 ByteTerm
term(TypeAssertionBOL
);
201 term
.inputPosition
= inputPos
;
205 static ByteTerm
CheckInput(Checked
<unsigned> count
)
207 ByteTerm
term(TypeCheckInput
);
208 term
.checkInputCount
= count
.unsafeGet();
212 static ByteTerm
UncheckInput(Checked
<unsigned> count
)
214 ByteTerm
term(TypeUncheckInput
);
215 term
.checkInputCount
= count
.unsafeGet();
219 static ByteTerm
EOL(int inputPos
)
221 ByteTerm
term(TypeAssertionEOL
);
222 term
.inputPosition
= inputPos
;
226 static ByteTerm
WordBoundary(bool invert
, int inputPos
)
228 ByteTerm
term(TypeAssertionWordBoundary
, invert
);
229 term
.inputPosition
= inputPos
;
233 static ByteTerm
BackReference(unsigned subpatternId
, int inputPos
)
235 return ByteTerm(TypeBackReference
, subpatternId
, false, false, inputPos
);
238 static ByteTerm
BodyAlternativeBegin(bool onceThrough
)
240 ByteTerm
term(TypeBodyAlternativeBegin
);
241 term
.alternative
.next
= 0;
242 term
.alternative
.end
= 0;
243 term
.alternative
.onceThrough
= onceThrough
;
247 static ByteTerm
BodyAlternativeDisjunction(bool onceThrough
)
249 ByteTerm
term(TypeBodyAlternativeDisjunction
);
250 term
.alternative
.next
= 0;
251 term
.alternative
.end
= 0;
252 term
.alternative
.onceThrough
= onceThrough
;
256 static ByteTerm
BodyAlternativeEnd()
258 ByteTerm
term(TypeBodyAlternativeEnd
);
259 term
.alternative
.next
= 0;
260 term
.alternative
.end
= 0;
261 term
.alternative
.onceThrough
= false;
265 static ByteTerm
AlternativeBegin()
267 ByteTerm
term(TypeAlternativeBegin
);
268 term
.alternative
.next
= 0;
269 term
.alternative
.end
= 0;
270 term
.alternative
.onceThrough
= false;
274 static ByteTerm
AlternativeDisjunction()
276 ByteTerm
term(TypeAlternativeDisjunction
);
277 term
.alternative
.next
= 0;
278 term
.alternative
.end
= 0;
279 term
.alternative
.onceThrough
= false;
283 static ByteTerm
AlternativeEnd()
285 ByteTerm
term(TypeAlternativeEnd
);
286 term
.alternative
.next
= 0;
287 term
.alternative
.end
= 0;
288 term
.alternative
.onceThrough
= false;
292 static ByteTerm
SubpatternBegin()
294 return ByteTerm(TypeSubpatternBegin
);
297 static ByteTerm
SubpatternEnd()
299 return ByteTerm(TypeSubpatternEnd
);
302 static ByteTerm
DotStarEnclosure(bool bolAnchor
, bool eolAnchor
)
304 ByteTerm
term(TypeDotStarEnclosure
);
305 term
.anchors
.m_bol
= bolAnchor
;
306 term
.anchors
.m_eol
= eolAnchor
;
321 class ByteDisjunction
{
322 WTF_MAKE_FAST_ALLOCATED
;
324 ByteDisjunction(unsigned numSubpatterns
, unsigned frameSize
)
325 : m_numSubpatterns(numSubpatterns
)
326 , m_frameSize(frameSize
)
330 Vector
<ByteTerm
> terms
;
331 unsigned m_numSubpatterns
;
332 unsigned m_frameSize
;
335 struct BytecodePattern
{
336 WTF_MAKE_FAST_ALLOCATED
;
338 BytecodePattern(std::unique_ptr
<ByteDisjunction
> body
, Vector
<std::unique_ptr
<ByteDisjunction
>>& parenthesesInfoToAdopt
, YarrPattern
& pattern
, BumpPointerAllocator
* allocator
)
339 : m_body(WTF::move(body
))
340 , m_ignoreCase(pattern
.m_ignoreCase
)
341 , m_multiline(pattern
.m_multiline
)
342 , m_allocator(allocator
)
344 m_body
->terms
.shrinkToFit();
346 newlineCharacterClass
= pattern
.newlineCharacterClass();
347 wordcharCharacterClass
= pattern
.wordcharCharacterClass();
349 m_allParenthesesInfo
.swap(parenthesesInfoToAdopt
);
350 m_allParenthesesInfo
.shrinkToFit();
352 m_userCharacterClasses
.swap(pattern
.m_userCharacterClasses
);
353 m_userCharacterClasses
.shrinkToFit();
356 std::unique_ptr
<ByteDisjunction
> m_body
;
359 // Each BytecodePattern is associated with a RegExp, each RegExp is associated
360 // with a VM. Cache a pointer to out VM's m_regExpAllocator.
361 BumpPointerAllocator
* m_allocator
;
363 CharacterClass
* newlineCharacterClass
;
364 CharacterClass
* wordcharCharacterClass
;
367 Vector
<std::unique_ptr
<ByteDisjunction
>> m_allParenthesesInfo
;
368 Vector
<std::unique_ptr
<CharacterClass
>> m_userCharacterClasses
;
371 JS_EXPORT_PRIVATE
std::unique_ptr
<BytecodePattern
> byteCompile(YarrPattern
&, BumpPointerAllocator
*);
372 JS_EXPORT_PRIVATE
unsigned interpret(BytecodePattern
*, const String
& input
, unsigned start
, unsigned* output
);
373 unsigned interpret(BytecodePattern
*, const LChar
* input
, unsigned length
, unsigned start
, unsigned* output
);
374 unsigned interpret(BytecodePattern
*, const UChar
* input
, unsigned length
, unsigned start
, unsigned* output
);
376 } } // namespace JSC::Yarr
378 #endif // YarrInterpreter_h