]> git.saurik.com Git - apple/javascriptcore.git/blob - yarr/RegexInterpreter.h
JavaScriptCore-584.tar.gz
[apple/javascriptcore.git] / yarr / RegexInterpreter.h
1 /*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #ifndef RegexInterpreter_h
27 #define RegexInterpreter_h
28
29 #include <wtf/Platform.h>
30
31 #if ENABLE(YARR)
32
33 #include <wtf/unicode/Unicode.h>
34 #include "RegexParser.h"
35 #include "RegexPattern.h"
36
37 namespace JSC { namespace Yarr {
38
39 class ByteDisjunction;
40
41 struct ByteTerm {
42 enum Type {
43 TypeBodyAlternativeBegin,
44 TypeBodyAlternativeDisjunction,
45 TypeBodyAlternativeEnd,
46 TypeAlternativeBegin,
47 TypeAlternativeDisjunction,
48 TypeAlternativeEnd,
49 TypeSubpatternBegin,
50 TypeSubpatternEnd,
51 TypeAssertionBOL,
52 TypeAssertionEOL,
53 TypeAssertionWordBoundary,
54 TypePatternCharacterOnce,
55 TypePatternCharacterFixed,
56 TypePatternCharacterGreedy,
57 TypePatternCharacterNonGreedy,
58 TypePatternCasedCharacterOnce,
59 TypePatternCasedCharacterFixed,
60 TypePatternCasedCharacterGreedy,
61 TypePatternCasedCharacterNonGreedy,
62 TypeCharacterClass,
63 TypeBackReference,
64 TypeParenthesesSubpattern,
65 TypeParenthesesSubpatternOnceBegin,
66 TypeParenthesesSubpatternOnceEnd,
67 TypeParentheticalAssertionBegin,
68 TypeParentheticalAssertionEnd,
69 TypeCheckInput,
70 } type;
71 bool invertOrCapture;
72 union {
73 struct {
74 union {
75 UChar patternCharacter;
76 struct {
77 UChar lo;
78 UChar hi;
79 } casedCharacter;
80 CharacterClass* characterClass;
81 unsigned subpatternId;
82 };
83 union {
84 ByteDisjunction* parenthesesDisjunction;
85 unsigned parenthesesWidth;
86 };
87 QuantifierType quantityType;
88 unsigned quantityCount;
89 } atom;
90 struct {
91 int next;
92 int end;
93 } alternative;
94 unsigned checkInputCount;
95 };
96 unsigned frameLocation;
97 int inputPosition;
98
99 ByteTerm(UChar ch, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
100 : frameLocation(frameLocation)
101 {
102 switch (quantityType) {
103 case QuantifierFixedCount:
104 type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
105 break;
106 case QuantifierGreedy:
107 type = ByteTerm::TypePatternCharacterGreedy;
108 break;
109 case QuantifierNonGreedy:
110 type = ByteTerm::TypePatternCharacterNonGreedy;
111 break;
112 }
113
114 atom.patternCharacter = ch;
115 atom.quantityType = quantityType;
116 atom.quantityCount = quantityCount;
117 inputPosition = inputPos;
118 }
119
120 ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
121 : frameLocation(frameLocation)
122 {
123 switch (quantityType) {
124 case QuantifierFixedCount:
125 type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
126 break;
127 case QuantifierGreedy:
128 type = ByteTerm::TypePatternCasedCharacterGreedy;
129 break;
130 case QuantifierNonGreedy:
131 type = ByteTerm::TypePatternCasedCharacterNonGreedy;
132 break;
133 }
134
135 atom.casedCharacter.lo = lo;
136 atom.casedCharacter.hi = hi;
137 atom.quantityType = quantityType;
138 atom.quantityCount = quantityCount;
139 inputPosition = inputPos;
140 }
141
142 ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
143 : type(ByteTerm::TypeCharacterClass)
144 , invertOrCapture(invert)
145 {
146 atom.characterClass = characterClass;
147 atom.quantityType = QuantifierFixedCount;
148 atom.quantityCount = 1;
149 inputPosition = inputPos;
150 }
151
152 ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool invertOrCapture, int inputPos)
153 : type(type)
154 , invertOrCapture(invertOrCapture)
155 {
156 atom.subpatternId = subpatternId;
157 atom.parenthesesDisjunction = parenthesesInfo;
158 atom.quantityType = QuantifierFixedCount;
159 atom.quantityCount = 1;
160 inputPosition = inputPos;
161 }
162
163 ByteTerm(Type type, bool invert = false)
164 : type(type)
165 , invertOrCapture(invert)
166 {
167 atom.quantityType = QuantifierFixedCount;
168 atom.quantityCount = 1;
169 }
170
171 ByteTerm(Type type, unsigned subpatternId, bool invertOrCapture, int inputPos)
172 : type(type)
173 , invertOrCapture(invertOrCapture)
174 {
175 atom.subpatternId = subpatternId;
176 atom.quantityType = QuantifierFixedCount;
177 atom.quantityCount = 1;
178 inputPosition = inputPos;
179 }
180
181 static ByteTerm BOL(int inputPos)
182 {
183 ByteTerm term(TypeAssertionBOL);
184 term.inputPosition = inputPos;
185 return term;
186 }
187
188 static ByteTerm CheckInput(unsigned count)
189 {
190 ByteTerm term(TypeCheckInput);
191 term.checkInputCount = count;
192 return term;
193 }
194
195 static ByteTerm EOL(int inputPos)
196 {
197 ByteTerm term(TypeAssertionEOL);
198 term.inputPosition = inputPos;
199 return term;
200 }
201
202 static ByteTerm WordBoundary(bool invert, int inputPos)
203 {
204 ByteTerm term(TypeAssertionWordBoundary, invert);
205 term.inputPosition = inputPos;
206 return term;
207 }
208
209 static ByteTerm BackReference(unsigned subpatternId, int inputPos)
210 {
211 return ByteTerm(TypeBackReference, subpatternId, false, inputPos);
212 }
213
214 static ByteTerm BodyAlternativeBegin()
215 {
216 ByteTerm term(TypeBodyAlternativeBegin);
217 term.alternative.next = 0;
218 term.alternative.end = 0;
219 return term;
220 }
221
222 static ByteTerm BodyAlternativeDisjunction()
223 {
224 ByteTerm term(TypeBodyAlternativeDisjunction);
225 term.alternative.next = 0;
226 term.alternative.end = 0;
227 return term;
228 }
229
230 static ByteTerm BodyAlternativeEnd()
231 {
232 ByteTerm term(TypeBodyAlternativeEnd);
233 term.alternative.next = 0;
234 term.alternative.end = 0;
235 return term;
236 }
237
238 static ByteTerm AlternativeBegin()
239 {
240 ByteTerm term(TypeAlternativeBegin);
241 term.alternative.next = 0;
242 term.alternative.end = 0;
243 return term;
244 }
245
246 static ByteTerm AlternativeDisjunction()
247 {
248 ByteTerm term(TypeAlternativeDisjunction);
249 term.alternative.next = 0;
250 term.alternative.end = 0;
251 return term;
252 }
253
254 static ByteTerm AlternativeEnd()
255 {
256 ByteTerm term(TypeAlternativeEnd);
257 term.alternative.next = 0;
258 term.alternative.end = 0;
259 return term;
260 }
261
262 static ByteTerm SubpatternBegin()
263 {
264 return ByteTerm(TypeSubpatternBegin);
265 }
266
267 static ByteTerm SubpatternEnd()
268 {
269 return ByteTerm(TypeSubpatternEnd);
270 }
271
272 bool invert()
273 {
274 return invertOrCapture;
275 }
276
277 bool capture()
278 {
279 return invertOrCapture;
280 }
281 };
282
283 class ByteDisjunction : public FastAllocBase {
284 public:
285 ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
286 : m_numSubpatterns(numSubpatterns)
287 , m_frameSize(frameSize)
288 {
289 }
290
291 Vector<ByteTerm> terms;
292 unsigned m_numSubpatterns;
293 unsigned m_frameSize;
294 };
295
296 struct BytecodePattern : FastAllocBase {
297 BytecodePattern(ByteDisjunction* body, Vector<ByteDisjunction*> allParenthesesInfo, RegexPattern& pattern)
298 : m_body(body)
299 , m_ignoreCase(pattern.m_ignoreCase)
300 , m_multiline(pattern.m_multiline)
301 {
302 newlineCharacterClass = pattern.newlineCharacterClass();
303 wordcharCharacterClass = pattern.wordcharCharacterClass();
304
305 m_allParenthesesInfo.append(allParenthesesInfo);
306 m_userCharacterClasses.append(pattern.m_userCharacterClasses);
307 // 'Steal' the RegexPattern's CharacterClasses! We clear its
308 // array, so that it won't delete them on destruction. We'll
309 // take responsibility for that.
310 pattern.m_userCharacterClasses.clear();
311 }
312
313 ~BytecodePattern()
314 {
315 deleteAllValues(m_allParenthesesInfo);
316 deleteAllValues(m_userCharacterClasses);
317 }
318
319 OwnPtr<ByteDisjunction> m_body;
320 bool m_ignoreCase;
321 bool m_multiline;
322
323 CharacterClass* newlineCharacterClass;
324 CharacterClass* wordcharCharacterClass;
325 private:
326 Vector<ByteDisjunction*> m_allParenthesesInfo;
327 Vector<CharacterClass*> m_userCharacterClasses;
328 };
329
330 BytecodePattern* byteCompileRegex(const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase = false, bool multiline = false);
331 int interpretRegex(BytecodePattern* v_regex, const UChar* input, unsigned start, unsigned length, int* output);
332
333 } } // namespace JSC::Yarr
334
335 #endif
336
337 #endif // RegexInterpreter_h