2 * Copyright (C) 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef RegexInterpreter_h
27 #define RegexInterpreter_h
29 #include <wtf/Platform.h>
33 #include <wtf/unicode/Unicode.h>
34 #include "RegexParser.h"
35 #include "RegexPattern.h"
37 namespace JSC
{ namespace Yarr
{
39 class ByteDisjunction
;
43 TypeBodyAlternativeBegin
,
44 TypeBodyAlternativeDisjunction
,
45 TypeBodyAlternativeEnd
,
47 TypeAlternativeDisjunction
,
53 TypeAssertionWordBoundary
,
54 TypePatternCharacterOnce
,
55 TypePatternCharacterFixed
,
56 TypePatternCharacterGreedy
,
57 TypePatternCharacterNonGreedy
,
58 TypePatternCasedCharacterOnce
,
59 TypePatternCasedCharacterFixed
,
60 TypePatternCasedCharacterGreedy
,
61 TypePatternCasedCharacterNonGreedy
,
64 TypeParenthesesSubpattern
,
65 TypeParenthesesSubpatternOnceBegin
,
66 TypeParenthesesSubpatternOnceEnd
,
67 TypeParentheticalAssertionBegin
,
68 TypeParentheticalAssertionEnd
,
75 UChar patternCharacter
;
80 CharacterClass
* characterClass
;
81 unsigned subpatternId
;
84 ByteDisjunction
* parenthesesDisjunction
;
85 unsigned parenthesesWidth
;
87 QuantifierType quantityType
;
88 unsigned quantityCount
;
94 unsigned checkInputCount
;
96 unsigned frameLocation
;
99 ByteTerm(UChar ch
, int inputPos
, unsigned frameLocation
, unsigned quantityCount
, QuantifierType quantityType
)
100 : frameLocation(frameLocation
)
102 switch (quantityType
) {
103 case QuantifierFixedCount
:
104 type
= (quantityCount
== 1) ? ByteTerm::TypePatternCharacterOnce
: ByteTerm::TypePatternCharacterFixed
;
106 case QuantifierGreedy
:
107 type
= ByteTerm::TypePatternCharacterGreedy
;
109 case QuantifierNonGreedy
:
110 type
= ByteTerm::TypePatternCharacterNonGreedy
;
114 atom
.patternCharacter
= ch
;
115 atom
.quantityType
= quantityType
;
116 atom
.quantityCount
= quantityCount
;
117 inputPosition
= inputPos
;
120 ByteTerm(UChar lo
, UChar hi
, int inputPos
, unsigned frameLocation
, unsigned quantityCount
, QuantifierType quantityType
)
121 : frameLocation(frameLocation
)
123 switch (quantityType
) {
124 case QuantifierFixedCount
:
125 type
= (quantityCount
== 1) ? ByteTerm::TypePatternCasedCharacterOnce
: ByteTerm::TypePatternCasedCharacterFixed
;
127 case QuantifierGreedy
:
128 type
= ByteTerm::TypePatternCasedCharacterGreedy
;
130 case QuantifierNonGreedy
:
131 type
= ByteTerm::TypePatternCasedCharacterNonGreedy
;
135 atom
.casedCharacter
.lo
= lo
;
136 atom
.casedCharacter
.hi
= hi
;
137 atom
.quantityType
= quantityType
;
138 atom
.quantityCount
= quantityCount
;
139 inputPosition
= inputPos
;
142 ByteTerm(CharacterClass
* characterClass
, bool invert
, int inputPos
)
143 : type(ByteTerm::TypeCharacterClass
)
144 , invertOrCapture(invert
)
146 atom
.characterClass
= characterClass
;
147 atom
.quantityType
= QuantifierFixedCount
;
148 atom
.quantityCount
= 1;
149 inputPosition
= inputPos
;
152 ByteTerm(Type type
, unsigned subpatternId
, ByteDisjunction
* parenthesesInfo
, bool invertOrCapture
, int inputPos
)
154 , invertOrCapture(invertOrCapture
)
156 atom
.subpatternId
= subpatternId
;
157 atom
.parenthesesDisjunction
= parenthesesInfo
;
158 atom
.quantityType
= QuantifierFixedCount
;
159 atom
.quantityCount
= 1;
160 inputPosition
= inputPos
;
163 ByteTerm(Type type
, bool invert
= false)
165 , invertOrCapture(invert
)
167 atom
.quantityType
= QuantifierFixedCount
;
168 atom
.quantityCount
= 1;
171 ByteTerm(Type type
, unsigned subpatternId
, bool invertOrCapture
, int inputPos
)
173 , invertOrCapture(invertOrCapture
)
175 atom
.subpatternId
= subpatternId
;
176 atom
.quantityType
= QuantifierFixedCount
;
177 atom
.quantityCount
= 1;
178 inputPosition
= inputPos
;
181 static ByteTerm
BOL(int inputPos
)
183 ByteTerm
term(TypeAssertionBOL
);
184 term
.inputPosition
= inputPos
;
188 static ByteTerm
CheckInput(unsigned count
)
190 ByteTerm
term(TypeCheckInput
);
191 term
.checkInputCount
= count
;
195 static ByteTerm
EOL(int inputPos
)
197 ByteTerm
term(TypeAssertionEOL
);
198 term
.inputPosition
= inputPos
;
202 static ByteTerm
WordBoundary(bool invert
, int inputPos
)
204 ByteTerm
term(TypeAssertionWordBoundary
, invert
);
205 term
.inputPosition
= inputPos
;
209 static ByteTerm
BackReference(unsigned subpatternId
, int inputPos
)
211 return ByteTerm(TypeBackReference
, subpatternId
, false, inputPos
);
214 static ByteTerm
BodyAlternativeBegin()
216 ByteTerm
term(TypeBodyAlternativeBegin
);
217 term
.alternative
.next
= 0;
218 term
.alternative
.end
= 0;
222 static ByteTerm
BodyAlternativeDisjunction()
224 ByteTerm
term(TypeBodyAlternativeDisjunction
);
225 term
.alternative
.next
= 0;
226 term
.alternative
.end
= 0;
230 static ByteTerm
BodyAlternativeEnd()
232 ByteTerm
term(TypeBodyAlternativeEnd
);
233 term
.alternative
.next
= 0;
234 term
.alternative
.end
= 0;
238 static ByteTerm
AlternativeBegin()
240 ByteTerm
term(TypeAlternativeBegin
);
241 term
.alternative
.next
= 0;
242 term
.alternative
.end
= 0;
246 static ByteTerm
AlternativeDisjunction()
248 ByteTerm
term(TypeAlternativeDisjunction
);
249 term
.alternative
.next
= 0;
250 term
.alternative
.end
= 0;
254 static ByteTerm
AlternativeEnd()
256 ByteTerm
term(TypeAlternativeEnd
);
257 term
.alternative
.next
= 0;
258 term
.alternative
.end
= 0;
262 static ByteTerm
SubpatternBegin()
264 return ByteTerm(TypeSubpatternBegin
);
267 static ByteTerm
SubpatternEnd()
269 return ByteTerm(TypeSubpatternEnd
);
274 return invertOrCapture
;
279 return invertOrCapture
;
283 class ByteDisjunction
: public FastAllocBase
{
285 ByteDisjunction(unsigned numSubpatterns
, unsigned frameSize
)
286 : m_numSubpatterns(numSubpatterns
)
287 , m_frameSize(frameSize
)
291 Vector
<ByteTerm
> terms
;
292 unsigned m_numSubpatterns
;
293 unsigned m_frameSize
;
296 struct BytecodePattern
: FastAllocBase
{
297 BytecodePattern(ByteDisjunction
* body
, Vector
<ByteDisjunction
*> allParenthesesInfo
, RegexPattern
& pattern
)
299 , m_ignoreCase(pattern
.m_ignoreCase
)
300 , m_multiline(pattern
.m_multiline
)
302 newlineCharacterClass
= pattern
.newlineCharacterClass();
303 wordcharCharacterClass
= pattern
.wordcharCharacterClass();
305 m_allParenthesesInfo
.append(allParenthesesInfo
);
306 m_userCharacterClasses
.append(pattern
.m_userCharacterClasses
);
307 // 'Steal' the RegexPattern's CharacterClasses! We clear its
308 // array, so that it won't delete them on destruction. We'll
309 // take responsibility for that.
310 pattern
.m_userCharacterClasses
.clear();
315 deleteAllValues(m_allParenthesesInfo
);
316 deleteAllValues(m_userCharacterClasses
);
319 OwnPtr
<ByteDisjunction
> m_body
;
323 CharacterClass
* newlineCharacterClass
;
324 CharacterClass
* wordcharCharacterClass
;
326 Vector
<ByteDisjunction
*> m_allParenthesesInfo
;
327 Vector
<CharacterClass
*> m_userCharacterClasses
;
330 BytecodePattern
* byteCompileRegex(const UString
& pattern
, unsigned& numSubpatterns
, const char*& error
, bool ignoreCase
= false, bool multiline
= false);
331 int interpretRegex(BytecodePattern
* v_regex
, const UChar
* input
, unsigned start
, unsigned length
, int* output
);
333 } } // namespace JSC::Yarr
337 #endif // RegexInterpreter_h