2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef YarrInterpreter_h
27 #define YarrInterpreter_h
29 #include "YarrPattern.h"
30 #include <wtf/PassOwnPtr.h>
33 class BumpPointerAllocator
;
35 using WTF::BumpPointerAllocator
;
37 namespace JSC
{ namespace Yarr
{
39 class ByteDisjunction
;
43 TypeBodyAlternativeBegin
,
44 TypeBodyAlternativeDisjunction
,
45 TypeBodyAlternativeEnd
,
47 TypeAlternativeDisjunction
,
53 TypeAssertionWordBoundary
,
54 TypePatternCharacterOnce
,
55 TypePatternCharacterFixed
,
56 TypePatternCharacterGreedy
,
57 TypePatternCharacterNonGreedy
,
58 TypePatternCasedCharacterOnce
,
59 TypePatternCasedCharacterFixed
,
60 TypePatternCasedCharacterGreedy
,
61 TypePatternCasedCharacterNonGreedy
,
64 TypeParenthesesSubpattern
,
65 TypeParenthesesSubpatternOnceBegin
,
66 TypeParenthesesSubpatternOnceEnd
,
67 TypeParenthesesSubpatternTerminalBegin
,
68 TypeParenthesesSubpatternTerminalEnd
,
69 TypeParentheticalAssertionBegin
,
70 TypeParentheticalAssertionEnd
,
78 UChar patternCharacter
;
83 CharacterClass
* characterClass
;
84 unsigned subpatternId
;
87 ByteDisjunction
* parenthesesDisjunction
;
88 unsigned parenthesesWidth
;
90 QuantifierType quantityType
;
91 unsigned quantityCount
;
102 unsigned checkInputCount
;
104 unsigned frameLocation
;
107 unsigned inputPosition
;
109 ByteTerm(UChar ch
, int inputPos
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
110 : frameLocation(frameLocation
)
114 switch (quantityType
) {
115 case QuantifierFixedCount
:
116 type
= (quantityCount
== 1) ? ByteTerm::TypePatternCharacterOnce
: ByteTerm::TypePatternCharacterFixed
;
118 case QuantifierGreedy
:
119 type
= ByteTerm::TypePatternCharacterGreedy
;
121 case QuantifierNonGreedy
:
122 type
= ByteTerm::TypePatternCharacterNonGreedy
;
126 atom
.patternCharacter
= ch
;
127 atom
.quantityType
= quantityType
;
128 atom
.quantityCount
= quantityCount
.unsafeGet();
129 inputPosition
= inputPos
;
132 ByteTerm(UChar lo
, UChar hi
, int inputPos
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
133 : frameLocation(frameLocation
)
137 switch (quantityType
) {
138 case QuantifierFixedCount
:
139 type
= (quantityCount
== 1) ? ByteTerm::TypePatternCasedCharacterOnce
: ByteTerm::TypePatternCasedCharacterFixed
;
141 case QuantifierGreedy
:
142 type
= ByteTerm::TypePatternCasedCharacterGreedy
;
144 case QuantifierNonGreedy
:
145 type
= ByteTerm::TypePatternCasedCharacterNonGreedy
;
149 atom
.casedCharacter
.lo
= lo
;
150 atom
.casedCharacter
.hi
= hi
;
151 atom
.quantityType
= quantityType
;
152 atom
.quantityCount
= quantityCount
.unsafeGet();
153 inputPosition
= inputPos
;
156 ByteTerm(CharacterClass
* characterClass
, bool invert
, int inputPos
)
157 : type(ByteTerm::TypeCharacterClass
)
161 atom
.characterClass
= characterClass
;
162 atom
.quantityType
= QuantifierFixedCount
;
163 atom
.quantityCount
= 1;
164 inputPosition
= inputPos
;
167 ByteTerm(Type type
, unsigned subpatternId
, ByteDisjunction
* parenthesesInfo
, bool capture
, int inputPos
)
172 atom
.subpatternId
= subpatternId
;
173 atom
.parenthesesDisjunction
= parenthesesInfo
;
174 atom
.quantityType
= QuantifierFixedCount
;
175 atom
.quantityCount
= 1;
176 inputPosition
= inputPos
;
179 ByteTerm(Type type
, bool invert
= false)
184 atom
.quantityType
= QuantifierFixedCount
;
185 atom
.quantityCount
= 1;
188 ByteTerm(Type type
, unsigned subpatternId
, bool capture
, bool invert
, int inputPos
)
193 atom
.subpatternId
= subpatternId
;
194 atom
.quantityType
= QuantifierFixedCount
;
195 atom
.quantityCount
= 1;
196 inputPosition
= inputPos
;
199 static ByteTerm
BOL(int inputPos
)
201 ByteTerm
term(TypeAssertionBOL
);
202 term
.inputPosition
= inputPos
;
206 static ByteTerm
CheckInput(Checked
<unsigned> count
)
208 ByteTerm
term(TypeCheckInput
);
209 term
.checkInputCount
= count
.unsafeGet();
213 static ByteTerm
UncheckInput(Checked
<unsigned> count
)
215 ByteTerm
term(TypeUncheckInput
);
216 term
.checkInputCount
= count
.unsafeGet();
220 static ByteTerm
EOL(int inputPos
)
222 ByteTerm
term(TypeAssertionEOL
);
223 term
.inputPosition
= inputPos
;
227 static ByteTerm
WordBoundary(bool invert
, int inputPos
)
229 ByteTerm
term(TypeAssertionWordBoundary
, invert
);
230 term
.inputPosition
= inputPos
;
234 static ByteTerm
BackReference(unsigned subpatternId
, int inputPos
)
236 return ByteTerm(TypeBackReference
, subpatternId
, false, false, inputPos
);
239 static ByteTerm
BodyAlternativeBegin(bool onceThrough
)
241 ByteTerm
term(TypeBodyAlternativeBegin
);
242 term
.alternative
.next
= 0;
243 term
.alternative
.end
= 0;
244 term
.alternative
.onceThrough
= onceThrough
;
248 static ByteTerm
BodyAlternativeDisjunction(bool onceThrough
)
250 ByteTerm
term(TypeBodyAlternativeDisjunction
);
251 term
.alternative
.next
= 0;
252 term
.alternative
.end
= 0;
253 term
.alternative
.onceThrough
= onceThrough
;
257 static ByteTerm
BodyAlternativeEnd()
259 ByteTerm
term(TypeBodyAlternativeEnd
);
260 term
.alternative
.next
= 0;
261 term
.alternative
.end
= 0;
262 term
.alternative
.onceThrough
= false;
266 static ByteTerm
AlternativeBegin()
268 ByteTerm
term(TypeAlternativeBegin
);
269 term
.alternative
.next
= 0;
270 term
.alternative
.end
= 0;
271 term
.alternative
.onceThrough
= false;
275 static ByteTerm
AlternativeDisjunction()
277 ByteTerm
term(TypeAlternativeDisjunction
);
278 term
.alternative
.next
= 0;
279 term
.alternative
.end
= 0;
280 term
.alternative
.onceThrough
= false;
284 static ByteTerm
AlternativeEnd()
286 ByteTerm
term(TypeAlternativeEnd
);
287 term
.alternative
.next
= 0;
288 term
.alternative
.end
= 0;
289 term
.alternative
.onceThrough
= false;
293 static ByteTerm
SubpatternBegin()
295 return ByteTerm(TypeSubpatternBegin
);
298 static ByteTerm
SubpatternEnd()
300 return ByteTerm(TypeSubpatternEnd
);
303 static ByteTerm
DotStarEnclosure(bool bolAnchor
, bool eolAnchor
)
305 ByteTerm
term(TypeDotStarEnclosure
);
306 term
.anchors
.m_bol
= bolAnchor
;
307 term
.anchors
.m_eol
= eolAnchor
;
322 class ByteDisjunction
{
323 WTF_MAKE_FAST_ALLOCATED
;
325 ByteDisjunction(unsigned numSubpatterns
, unsigned frameSize
)
326 : m_numSubpatterns(numSubpatterns
)
327 , m_frameSize(frameSize
)
331 Vector
<ByteTerm
> terms
;
332 unsigned m_numSubpatterns
;
333 unsigned m_frameSize
;
336 struct BytecodePattern
{
337 WTF_MAKE_FAST_ALLOCATED
;
339 BytecodePattern(PassOwnPtr
<ByteDisjunction
> body
, Vector
<OwnPtr
<ByteDisjunction
>>& parenthesesInfoToAdopt
, YarrPattern
& pattern
, BumpPointerAllocator
* allocator
)
341 , m_ignoreCase(pattern
.m_ignoreCase
)
342 , m_multiline(pattern
.m_multiline
)
343 , m_allocator(allocator
)
345 m_body
->terms
.shrinkToFit();
347 newlineCharacterClass
= pattern
.newlineCharacterClass();
348 wordcharCharacterClass
= pattern
.wordcharCharacterClass();
350 m_allParenthesesInfo
.swap(parenthesesInfoToAdopt
);
351 m_allParenthesesInfo
.shrinkToFit();
353 m_userCharacterClasses
.swap(pattern
.m_userCharacterClasses
);
354 m_userCharacterClasses
.shrinkToFit();
357 OwnPtr
<ByteDisjunction
> m_body
;
360 // Each BytecodePattern is associated with a RegExp, each RegExp is associated
361 // with a VM. Cache a pointer to out VM's m_regExpAllocator.
362 BumpPointerAllocator
* m_allocator
;
364 CharacterClass
* newlineCharacterClass
;
365 CharacterClass
* wordcharCharacterClass
;
368 Vector
<OwnPtr
<ByteDisjunction
>> m_allParenthesesInfo
;
369 Vector
<OwnPtr
<CharacterClass
>> m_userCharacterClasses
;
372 JS_EXPORT_PRIVATE PassOwnPtr
<BytecodePattern
> byteCompile(YarrPattern
&, BumpPointerAllocator
*);
373 JS_EXPORT_PRIVATE
unsigned interpret(BytecodePattern
*, const String
& input
, unsigned start
, unsigned* output
);
374 unsigned interpret(BytecodePattern
*, const LChar
* input
, unsigned length
, unsigned start
, unsigned* output
);
375 unsigned interpret(BytecodePattern
*, const UChar
* input
, unsigned length
, unsigned start
, unsigned* output
);
377 } } // namespace JSC::Yarr
379 #endif // YarrInterpreter_h