]> git.saurik.com Git - apple/javascriptcore.git/blob - yarr/RegexInterpreter.h
JavaScriptCore-621.1.tar.gz
[apple/javascriptcore.git] / yarr / RegexInterpreter.h
1 /*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #ifndef RegexInterpreter_h
27 #define RegexInterpreter_h
28
29 #if ENABLE(YARR)
30
31 #include "RegexParser.h"
32 #include "RegexPattern.h"
33 #include <wtf/unicode/Unicode.h>
34
35 namespace JSC { namespace Yarr {
36
37 class ByteDisjunction;
38
39 struct ByteTerm {
40 enum Type {
41 TypeBodyAlternativeBegin,
42 TypeBodyAlternativeDisjunction,
43 TypeBodyAlternativeEnd,
44 TypeAlternativeBegin,
45 TypeAlternativeDisjunction,
46 TypeAlternativeEnd,
47 TypeSubpatternBegin,
48 TypeSubpatternEnd,
49 TypeAssertionBOL,
50 TypeAssertionEOL,
51 TypeAssertionWordBoundary,
52 TypePatternCharacterOnce,
53 TypePatternCharacterFixed,
54 TypePatternCharacterGreedy,
55 TypePatternCharacterNonGreedy,
56 TypePatternCasedCharacterOnce,
57 TypePatternCasedCharacterFixed,
58 TypePatternCasedCharacterGreedy,
59 TypePatternCasedCharacterNonGreedy,
60 TypeCharacterClass,
61 TypeBackReference,
62 TypeParenthesesSubpattern,
63 TypeParenthesesSubpatternOnceBegin,
64 TypeParenthesesSubpatternOnceEnd,
65 TypeParentheticalAssertionBegin,
66 TypeParentheticalAssertionEnd,
67 TypeCheckInput,
68 } type;
69 bool invertOrCapture;
70 union {
71 struct {
72 union {
73 UChar patternCharacter;
74 struct {
75 UChar lo;
76 UChar hi;
77 } casedCharacter;
78 CharacterClass* characterClass;
79 unsigned subpatternId;
80 };
81 union {
82 ByteDisjunction* parenthesesDisjunction;
83 unsigned parenthesesWidth;
84 };
85 QuantifierType quantityType;
86 unsigned quantityCount;
87 } atom;
88 struct {
89 int next;
90 int end;
91 } alternative;
92 unsigned checkInputCount;
93 };
94 unsigned frameLocation;
95 int inputPosition;
96
97 ByteTerm(UChar ch, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
98 : frameLocation(frameLocation)
99 {
100 switch (quantityType) {
101 case QuantifierFixedCount:
102 type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
103 break;
104 case QuantifierGreedy:
105 type = ByteTerm::TypePatternCharacterGreedy;
106 break;
107 case QuantifierNonGreedy:
108 type = ByteTerm::TypePatternCharacterNonGreedy;
109 break;
110 }
111
112 atom.patternCharacter = ch;
113 atom.quantityType = quantityType;
114 atom.quantityCount = quantityCount;
115 inputPosition = inputPos;
116 }
117
118 ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
119 : frameLocation(frameLocation)
120 {
121 switch (quantityType) {
122 case QuantifierFixedCount:
123 type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
124 break;
125 case QuantifierGreedy:
126 type = ByteTerm::TypePatternCasedCharacterGreedy;
127 break;
128 case QuantifierNonGreedy:
129 type = ByteTerm::TypePatternCasedCharacterNonGreedy;
130 break;
131 }
132
133 atom.casedCharacter.lo = lo;
134 atom.casedCharacter.hi = hi;
135 atom.quantityType = quantityType;
136 atom.quantityCount = quantityCount;
137 inputPosition = inputPos;
138 }
139
140 ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
141 : type(ByteTerm::TypeCharacterClass)
142 , invertOrCapture(invert)
143 {
144 atom.characterClass = characterClass;
145 atom.quantityType = QuantifierFixedCount;
146 atom.quantityCount = 1;
147 inputPosition = inputPos;
148 }
149
150 ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool invertOrCapture, int inputPos)
151 : type(type)
152 , invertOrCapture(invertOrCapture)
153 {
154 atom.subpatternId = subpatternId;
155 atom.parenthesesDisjunction = parenthesesInfo;
156 atom.quantityType = QuantifierFixedCount;
157 atom.quantityCount = 1;
158 inputPosition = inputPos;
159 }
160
161 ByteTerm(Type type, bool invert = false)
162 : type(type)
163 , invertOrCapture(invert)
164 {
165 atom.quantityType = QuantifierFixedCount;
166 atom.quantityCount = 1;
167 }
168
169 ByteTerm(Type type, unsigned subpatternId, bool invertOrCapture, int inputPos)
170 : type(type)
171 , invertOrCapture(invertOrCapture)
172 {
173 atom.subpatternId = subpatternId;
174 atom.quantityType = QuantifierFixedCount;
175 atom.quantityCount = 1;
176 inputPosition = inputPos;
177 }
178
179 static ByteTerm BOL(int inputPos)
180 {
181 ByteTerm term(TypeAssertionBOL);
182 term.inputPosition = inputPos;
183 return term;
184 }
185
186 static ByteTerm CheckInput(unsigned count)
187 {
188 ByteTerm term(TypeCheckInput);
189 term.checkInputCount = count;
190 return term;
191 }
192
193 static ByteTerm EOL(int inputPos)
194 {
195 ByteTerm term(TypeAssertionEOL);
196 term.inputPosition = inputPos;
197 return term;
198 }
199
200 static ByteTerm WordBoundary(bool invert, int inputPos)
201 {
202 ByteTerm term(TypeAssertionWordBoundary, invert);
203 term.inputPosition = inputPos;
204 return term;
205 }
206
207 static ByteTerm BackReference(unsigned subpatternId, int inputPos)
208 {
209 return ByteTerm(TypeBackReference, subpatternId, false, inputPos);
210 }
211
212 static ByteTerm BodyAlternativeBegin()
213 {
214 ByteTerm term(TypeBodyAlternativeBegin);
215 term.alternative.next = 0;
216 term.alternative.end = 0;
217 return term;
218 }
219
220 static ByteTerm BodyAlternativeDisjunction()
221 {
222 ByteTerm term(TypeBodyAlternativeDisjunction);
223 term.alternative.next = 0;
224 term.alternative.end = 0;
225 return term;
226 }
227
228 static ByteTerm BodyAlternativeEnd()
229 {
230 ByteTerm term(TypeBodyAlternativeEnd);
231 term.alternative.next = 0;
232 term.alternative.end = 0;
233 return term;
234 }
235
236 static ByteTerm AlternativeBegin()
237 {
238 ByteTerm term(TypeAlternativeBegin);
239 term.alternative.next = 0;
240 term.alternative.end = 0;
241 return term;
242 }
243
244 static ByteTerm AlternativeDisjunction()
245 {
246 ByteTerm term(TypeAlternativeDisjunction);
247 term.alternative.next = 0;
248 term.alternative.end = 0;
249 return term;
250 }
251
252 static ByteTerm AlternativeEnd()
253 {
254 ByteTerm term(TypeAlternativeEnd);
255 term.alternative.next = 0;
256 term.alternative.end = 0;
257 return term;
258 }
259
260 static ByteTerm SubpatternBegin()
261 {
262 return ByteTerm(TypeSubpatternBegin);
263 }
264
265 static ByteTerm SubpatternEnd()
266 {
267 return ByteTerm(TypeSubpatternEnd);
268 }
269
270 bool invert()
271 {
272 return invertOrCapture;
273 }
274
275 bool capture()
276 {
277 return invertOrCapture;
278 }
279 };
280
281 class ByteDisjunction : public FastAllocBase {
282 public:
283 ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
284 : m_numSubpatterns(numSubpatterns)
285 , m_frameSize(frameSize)
286 {
287 }
288
289 Vector<ByteTerm> terms;
290 unsigned m_numSubpatterns;
291 unsigned m_frameSize;
292 };
293
294 struct BytecodePattern : FastAllocBase {
295 BytecodePattern(ByteDisjunction* body, Vector<ByteDisjunction*> allParenthesesInfo, RegexPattern& pattern)
296 : m_body(body)
297 , m_ignoreCase(pattern.m_ignoreCase)
298 , m_multiline(pattern.m_multiline)
299 {
300 newlineCharacterClass = pattern.newlineCharacterClass();
301 wordcharCharacterClass = pattern.wordcharCharacterClass();
302
303 m_allParenthesesInfo.append(allParenthesesInfo);
304 m_userCharacterClasses.append(pattern.m_userCharacterClasses);
305 // 'Steal' the RegexPattern's CharacterClasses! We clear its
306 // array, so that it won't delete them on destruction. We'll
307 // take responsibility for that.
308 pattern.m_userCharacterClasses.clear();
309 }
310
311 ~BytecodePattern()
312 {
313 deleteAllValues(m_allParenthesesInfo);
314 deleteAllValues(m_userCharacterClasses);
315 }
316
317 OwnPtr<ByteDisjunction> m_body;
318 bool m_ignoreCase;
319 bool m_multiline;
320
321 CharacterClass* newlineCharacterClass;
322 CharacterClass* wordcharCharacterClass;
323 private:
324 Vector<ByteDisjunction*> m_allParenthesesInfo;
325 Vector<CharacterClass*> m_userCharacterClasses;
326 };
327
328 BytecodePattern* byteCompileRegex(const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase = false, bool multiline = false);
329 int interpretRegex(BytecodePattern* v_regex, const UChar* input, unsigned start, unsigned length, int* output);
330
331 } } // namespace JSC::Yarr
332
333 #endif
334
335 #endif // RegexInterpreter_h