2 * Copyright (C) 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef RegexInterpreter_h
27 #define RegexInterpreter_h
31 #include "RegexParser.h"
32 #include "RegexPattern.h"
33 #include <wtf/unicode/Unicode.h>
35 namespace JSC
{ namespace Yarr
{
37 class ByteDisjunction
;
41 TypeBodyAlternativeBegin
,
42 TypeBodyAlternativeDisjunction
,
43 TypeBodyAlternativeEnd
,
45 TypeAlternativeDisjunction
,
51 TypeAssertionWordBoundary
,
52 TypePatternCharacterOnce
,
53 TypePatternCharacterFixed
,
54 TypePatternCharacterGreedy
,
55 TypePatternCharacterNonGreedy
,
56 TypePatternCasedCharacterOnce
,
57 TypePatternCasedCharacterFixed
,
58 TypePatternCasedCharacterGreedy
,
59 TypePatternCasedCharacterNonGreedy
,
62 TypeParenthesesSubpattern
,
63 TypeParenthesesSubpatternOnceBegin
,
64 TypeParenthesesSubpatternOnceEnd
,
65 TypeParentheticalAssertionBegin
,
66 TypeParentheticalAssertionEnd
,
73 UChar patternCharacter
;
78 CharacterClass
* characterClass
;
79 unsigned subpatternId
;
82 ByteDisjunction
* parenthesesDisjunction
;
83 unsigned parenthesesWidth
;
85 QuantifierType quantityType
;
86 unsigned quantityCount
;
92 unsigned checkInputCount
;
94 unsigned frameLocation
;
97 ByteTerm(UChar ch
, int inputPos
, unsigned frameLocation
, unsigned quantityCount
, QuantifierType quantityType
)
98 : frameLocation(frameLocation
)
100 switch (quantityType
) {
101 case QuantifierFixedCount
:
102 type
= (quantityCount
== 1) ? ByteTerm::TypePatternCharacterOnce
: ByteTerm::TypePatternCharacterFixed
;
104 case QuantifierGreedy
:
105 type
= ByteTerm::TypePatternCharacterGreedy
;
107 case QuantifierNonGreedy
:
108 type
= ByteTerm::TypePatternCharacterNonGreedy
;
112 atom
.patternCharacter
= ch
;
113 atom
.quantityType
= quantityType
;
114 atom
.quantityCount
= quantityCount
;
115 inputPosition
= inputPos
;
118 ByteTerm(UChar lo
, UChar hi
, int inputPos
, unsigned frameLocation
, unsigned quantityCount
, QuantifierType quantityType
)
119 : frameLocation(frameLocation
)
121 switch (quantityType
) {
122 case QuantifierFixedCount
:
123 type
= (quantityCount
== 1) ? ByteTerm::TypePatternCasedCharacterOnce
: ByteTerm::TypePatternCasedCharacterFixed
;
125 case QuantifierGreedy
:
126 type
= ByteTerm::TypePatternCasedCharacterGreedy
;
128 case QuantifierNonGreedy
:
129 type
= ByteTerm::TypePatternCasedCharacterNonGreedy
;
133 atom
.casedCharacter
.lo
= lo
;
134 atom
.casedCharacter
.hi
= hi
;
135 atom
.quantityType
= quantityType
;
136 atom
.quantityCount
= quantityCount
;
137 inputPosition
= inputPos
;
140 ByteTerm(CharacterClass
* characterClass
, bool invert
, int inputPos
)
141 : type(ByteTerm::TypeCharacterClass
)
142 , invertOrCapture(invert
)
144 atom
.characterClass
= characterClass
;
145 atom
.quantityType
= QuantifierFixedCount
;
146 atom
.quantityCount
= 1;
147 inputPosition
= inputPos
;
150 ByteTerm(Type type
, unsigned subpatternId
, ByteDisjunction
* parenthesesInfo
, bool invertOrCapture
, int inputPos
)
152 , invertOrCapture(invertOrCapture
)
154 atom
.subpatternId
= subpatternId
;
155 atom
.parenthesesDisjunction
= parenthesesInfo
;
156 atom
.quantityType
= QuantifierFixedCount
;
157 atom
.quantityCount
= 1;
158 inputPosition
= inputPos
;
161 ByteTerm(Type type
, bool invert
= false)
163 , invertOrCapture(invert
)
165 atom
.quantityType
= QuantifierFixedCount
;
166 atom
.quantityCount
= 1;
169 ByteTerm(Type type
, unsigned subpatternId
, bool invertOrCapture
, int inputPos
)
171 , invertOrCapture(invertOrCapture
)
173 atom
.subpatternId
= subpatternId
;
174 atom
.quantityType
= QuantifierFixedCount
;
175 atom
.quantityCount
= 1;
176 inputPosition
= inputPos
;
179 static ByteTerm
BOL(int inputPos
)
181 ByteTerm
term(TypeAssertionBOL
);
182 term
.inputPosition
= inputPos
;
186 static ByteTerm
CheckInput(unsigned count
)
188 ByteTerm
term(TypeCheckInput
);
189 term
.checkInputCount
= count
;
193 static ByteTerm
EOL(int inputPos
)
195 ByteTerm
term(TypeAssertionEOL
);
196 term
.inputPosition
= inputPos
;
200 static ByteTerm
WordBoundary(bool invert
, int inputPos
)
202 ByteTerm
term(TypeAssertionWordBoundary
, invert
);
203 term
.inputPosition
= inputPos
;
207 static ByteTerm
BackReference(unsigned subpatternId
, int inputPos
)
209 return ByteTerm(TypeBackReference
, subpatternId
, false, inputPos
);
212 static ByteTerm
BodyAlternativeBegin()
214 ByteTerm
term(TypeBodyAlternativeBegin
);
215 term
.alternative
.next
= 0;
216 term
.alternative
.end
= 0;
220 static ByteTerm
BodyAlternativeDisjunction()
222 ByteTerm
term(TypeBodyAlternativeDisjunction
);
223 term
.alternative
.next
= 0;
224 term
.alternative
.end
= 0;
228 static ByteTerm
BodyAlternativeEnd()
230 ByteTerm
term(TypeBodyAlternativeEnd
);
231 term
.alternative
.next
= 0;
232 term
.alternative
.end
= 0;
236 static ByteTerm
AlternativeBegin()
238 ByteTerm
term(TypeAlternativeBegin
);
239 term
.alternative
.next
= 0;
240 term
.alternative
.end
= 0;
244 static ByteTerm
AlternativeDisjunction()
246 ByteTerm
term(TypeAlternativeDisjunction
);
247 term
.alternative
.next
= 0;
248 term
.alternative
.end
= 0;
252 static ByteTerm
AlternativeEnd()
254 ByteTerm
term(TypeAlternativeEnd
);
255 term
.alternative
.next
= 0;
256 term
.alternative
.end
= 0;
260 static ByteTerm
SubpatternBegin()
262 return ByteTerm(TypeSubpatternBegin
);
265 static ByteTerm
SubpatternEnd()
267 return ByteTerm(TypeSubpatternEnd
);
272 return invertOrCapture
;
277 return invertOrCapture
;
281 class ByteDisjunction
: public FastAllocBase
{
283 ByteDisjunction(unsigned numSubpatterns
, unsigned frameSize
)
284 : m_numSubpatterns(numSubpatterns
)
285 , m_frameSize(frameSize
)
289 Vector
<ByteTerm
> terms
;
290 unsigned m_numSubpatterns
;
291 unsigned m_frameSize
;
294 struct BytecodePattern
: FastAllocBase
{
295 BytecodePattern(ByteDisjunction
* body
, Vector
<ByteDisjunction
*> allParenthesesInfo
, RegexPattern
& pattern
)
297 , m_ignoreCase(pattern
.m_ignoreCase
)
298 , m_multiline(pattern
.m_multiline
)
300 newlineCharacterClass
= pattern
.newlineCharacterClass();
301 wordcharCharacterClass
= pattern
.wordcharCharacterClass();
303 m_allParenthesesInfo
.append(allParenthesesInfo
);
304 m_userCharacterClasses
.append(pattern
.m_userCharacterClasses
);
305 // 'Steal' the RegexPattern's CharacterClasses! We clear its
306 // array, so that it won't delete them on destruction. We'll
307 // take responsibility for that.
308 pattern
.m_userCharacterClasses
.clear();
313 deleteAllValues(m_allParenthesesInfo
);
314 deleteAllValues(m_userCharacterClasses
);
317 OwnPtr
<ByteDisjunction
> m_body
;
321 CharacterClass
* newlineCharacterClass
;
322 CharacterClass
* wordcharCharacterClass
;
324 Vector
<ByteDisjunction
*> m_allParenthesesInfo
;
325 Vector
<CharacterClass
*> m_userCharacterClasses
;
328 BytecodePattern
* byteCompileRegex(const UString
& pattern
, unsigned& numSubpatterns
, const char*& error
, bool ignoreCase
= false, bool multiline
= false);
329 int interpretRegex(BytecodePattern
* v_regex
, const UChar
* input
, unsigned start
, unsigned length
, int* output
);
331 } } // namespace JSC::Yarr
335 #endif // RegexInterpreter_h