2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef YarrInterpreter_h
27 #define YarrInterpreter_h
29 #include "YarrPattern.h"
30 #include <wtf/PassOwnPtr.h>
31 #include <wtf/unicode/Unicode.h>
34 class BumpPointerAllocator
;
36 using WTF::BumpPointerAllocator
;
38 namespace JSC
{ namespace Yarr
{
40 class ByteDisjunction
;
44 TypeBodyAlternativeBegin
,
45 TypeBodyAlternativeDisjunction
,
46 TypeBodyAlternativeEnd
,
48 TypeAlternativeDisjunction
,
54 TypeAssertionWordBoundary
,
55 TypePatternCharacterOnce
,
56 TypePatternCharacterFixed
,
57 TypePatternCharacterGreedy
,
58 TypePatternCharacterNonGreedy
,
59 TypePatternCasedCharacterOnce
,
60 TypePatternCasedCharacterFixed
,
61 TypePatternCasedCharacterGreedy
,
62 TypePatternCasedCharacterNonGreedy
,
65 TypeParenthesesSubpattern
,
66 TypeParenthesesSubpatternOnceBegin
,
67 TypeParenthesesSubpatternOnceEnd
,
68 TypeParenthesesSubpatternTerminalBegin
,
69 TypeParenthesesSubpatternTerminalEnd
,
70 TypeParentheticalAssertionBegin
,
71 TypeParentheticalAssertionEnd
,
79 UChar patternCharacter
;
84 CharacterClass
* characterClass
;
85 unsigned subpatternId
;
88 ByteDisjunction
* parenthesesDisjunction
;
89 unsigned parenthesesWidth
;
91 QuantifierType quantityType
;
92 unsigned quantityCount
;
103 unsigned checkInputCount
;
105 unsigned frameLocation
;
108 unsigned inputPosition
;
110 ByteTerm(UChar ch
, int inputPos
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
111 : frameLocation(frameLocation
)
115 switch (quantityType
) {
116 case QuantifierFixedCount
:
117 type
= (quantityCount
== 1) ? ByteTerm::TypePatternCharacterOnce
: ByteTerm::TypePatternCharacterFixed
;
119 case QuantifierGreedy
:
120 type
= ByteTerm::TypePatternCharacterGreedy
;
122 case QuantifierNonGreedy
:
123 type
= ByteTerm::TypePatternCharacterNonGreedy
;
127 atom
.patternCharacter
= ch
;
128 atom
.quantityType
= quantityType
;
129 atom
.quantityCount
= quantityCount
.unsafeGet();
130 inputPosition
= inputPos
;
133 ByteTerm(UChar lo
, UChar hi
, int inputPos
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
134 : frameLocation(frameLocation
)
138 switch (quantityType
) {
139 case QuantifierFixedCount
:
140 type
= (quantityCount
== 1) ? ByteTerm::TypePatternCasedCharacterOnce
: ByteTerm::TypePatternCasedCharacterFixed
;
142 case QuantifierGreedy
:
143 type
= ByteTerm::TypePatternCasedCharacterGreedy
;
145 case QuantifierNonGreedy
:
146 type
= ByteTerm::TypePatternCasedCharacterNonGreedy
;
150 atom
.casedCharacter
.lo
= lo
;
151 atom
.casedCharacter
.hi
= hi
;
152 atom
.quantityType
= quantityType
;
153 atom
.quantityCount
= quantityCount
.unsafeGet();
154 inputPosition
= inputPos
;
157 ByteTerm(CharacterClass
* characterClass
, bool invert
, int inputPos
)
158 : type(ByteTerm::TypeCharacterClass
)
162 atom
.characterClass
= characterClass
;
163 atom
.quantityType
= QuantifierFixedCount
;
164 atom
.quantityCount
= 1;
165 inputPosition
= inputPos
;
168 ByteTerm(Type type
, unsigned subpatternId
, ByteDisjunction
* parenthesesInfo
, bool capture
, int inputPos
)
173 atom
.subpatternId
= subpatternId
;
174 atom
.parenthesesDisjunction
= parenthesesInfo
;
175 atom
.quantityType
= QuantifierFixedCount
;
176 atom
.quantityCount
= 1;
177 inputPosition
= inputPos
;
180 ByteTerm(Type type
, bool invert
= false)
185 atom
.quantityType
= QuantifierFixedCount
;
186 atom
.quantityCount
= 1;
189 ByteTerm(Type type
, unsigned subpatternId
, bool capture
, bool invert
, int inputPos
)
194 atom
.subpatternId
= subpatternId
;
195 atom
.quantityType
= QuantifierFixedCount
;
196 atom
.quantityCount
= 1;
197 inputPosition
= inputPos
;
200 static ByteTerm
BOL(int inputPos
)
202 ByteTerm
term(TypeAssertionBOL
);
203 term
.inputPosition
= inputPos
;
207 static ByteTerm
CheckInput(Checked
<unsigned> count
)
209 ByteTerm
term(TypeCheckInput
);
210 term
.checkInputCount
= count
.unsafeGet();
214 static ByteTerm
UncheckInput(Checked
<unsigned> count
)
216 ByteTerm
term(TypeUncheckInput
);
217 term
.checkInputCount
= count
.unsafeGet();
221 static ByteTerm
EOL(int inputPos
)
223 ByteTerm
term(TypeAssertionEOL
);
224 term
.inputPosition
= inputPos
;
228 static ByteTerm
WordBoundary(bool invert
, int inputPos
)
230 ByteTerm
term(TypeAssertionWordBoundary
, invert
);
231 term
.inputPosition
= inputPos
;
235 static ByteTerm
BackReference(unsigned subpatternId
, int inputPos
)
237 return ByteTerm(TypeBackReference
, subpatternId
, false, false, inputPos
);
240 static ByteTerm
BodyAlternativeBegin(bool onceThrough
)
242 ByteTerm
term(TypeBodyAlternativeBegin
);
243 term
.alternative
.next
= 0;
244 term
.alternative
.end
= 0;
245 term
.alternative
.onceThrough
= onceThrough
;
249 static ByteTerm
BodyAlternativeDisjunction(bool onceThrough
)
251 ByteTerm
term(TypeBodyAlternativeDisjunction
);
252 term
.alternative
.next
= 0;
253 term
.alternative
.end
= 0;
254 term
.alternative
.onceThrough
= onceThrough
;
258 static ByteTerm
BodyAlternativeEnd()
260 ByteTerm
term(TypeBodyAlternativeEnd
);
261 term
.alternative
.next
= 0;
262 term
.alternative
.end
= 0;
263 term
.alternative
.onceThrough
= false;
267 static ByteTerm
AlternativeBegin()
269 ByteTerm
term(TypeAlternativeBegin
);
270 term
.alternative
.next
= 0;
271 term
.alternative
.end
= 0;
272 term
.alternative
.onceThrough
= false;
276 static ByteTerm
AlternativeDisjunction()
278 ByteTerm
term(TypeAlternativeDisjunction
);
279 term
.alternative
.next
= 0;
280 term
.alternative
.end
= 0;
281 term
.alternative
.onceThrough
= false;
285 static ByteTerm
AlternativeEnd()
287 ByteTerm
term(TypeAlternativeEnd
);
288 term
.alternative
.next
= 0;
289 term
.alternative
.end
= 0;
290 term
.alternative
.onceThrough
= false;
294 static ByteTerm
SubpatternBegin()
296 return ByteTerm(TypeSubpatternBegin
);
299 static ByteTerm
SubpatternEnd()
301 return ByteTerm(TypeSubpatternEnd
);
304 static ByteTerm
DotStarEnclosure(bool bolAnchor
, bool eolAnchor
)
306 ByteTerm
term(TypeDotStarEnclosure
);
307 term
.anchors
.m_bol
= bolAnchor
;
308 term
.anchors
.m_eol
= eolAnchor
;
323 class ByteDisjunction
{
324 WTF_MAKE_FAST_ALLOCATED
;
326 ByteDisjunction(unsigned numSubpatterns
, unsigned frameSize
)
327 : m_numSubpatterns(numSubpatterns
)
328 , m_frameSize(frameSize
)
332 Vector
<ByteTerm
> terms
;
333 unsigned m_numSubpatterns
;
334 unsigned m_frameSize
;
337 struct BytecodePattern
{
338 WTF_MAKE_FAST_ALLOCATED
;
340 BytecodePattern(PassOwnPtr
<ByteDisjunction
> body
, Vector
<OwnPtr
<ByteDisjunction
> >& parenthesesInfoToAdopt
, YarrPattern
& pattern
, BumpPointerAllocator
* allocator
)
342 , m_ignoreCase(pattern
.m_ignoreCase
)
343 , m_multiline(pattern
.m_multiline
)
344 , m_allocator(allocator
)
346 m_body
->terms
.shrinkToFit();
348 newlineCharacterClass
= pattern
.newlineCharacterClass();
349 wordcharCharacterClass
= pattern
.wordcharCharacterClass();
351 m_allParenthesesInfo
.swap(parenthesesInfoToAdopt
);
352 m_allParenthesesInfo
.shrinkToFit();
354 m_userCharacterClasses
.swap(pattern
.m_userCharacterClasses
);
355 m_userCharacterClasses
.shrinkToFit();
358 OwnPtr
<ByteDisjunction
> m_body
;
361 // Each BytecodePattern is associated with a RegExp, each RegExp is associated
362 // with a VM. Cache a pointer to out VM's m_regExpAllocator.
363 BumpPointerAllocator
* m_allocator
;
365 CharacterClass
* newlineCharacterClass
;
366 CharacterClass
* wordcharCharacterClass
;
369 Vector
<OwnPtr
<ByteDisjunction
> > m_allParenthesesInfo
;
370 Vector
<OwnPtr
<CharacterClass
> > m_userCharacterClasses
;
373 JS_EXPORT_PRIVATE PassOwnPtr
<BytecodePattern
> byteCompile(YarrPattern
&, BumpPointerAllocator
*);
374 JS_EXPORT_PRIVATE
unsigned interpret(BytecodePattern
*, const String
& input
, unsigned start
, unsigned* output
);
375 unsigned interpret(BytecodePattern
*, const LChar
* input
, unsigned length
, unsigned start
, unsigned* output
);
376 unsigned interpret(BytecodePattern
*, const UChar
* input
, unsigned length
, unsigned start
, unsigned* output
);
378 } } // namespace JSC::Yarr
380 #endif // YarrInterpreter_h