]> git.saurik.com Git - apple/javascriptcore.git/blob - yarr/YarrInterpreter.h
JavaScriptCore-1218.34.tar.gz
[apple/javascriptcore.git] / yarr / YarrInterpreter.h
1 /*
2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #ifndef YarrInterpreter_h
27 #define YarrInterpreter_h
28
29 #include "YarrPattern.h"
30 #include <wtf/PassOwnPtr.h>
31 #include <wtf/unicode/Unicode.h>
32
33 namespace WTF {
34 class BumpPointerAllocator;
35 }
36 using WTF::BumpPointerAllocator;
37
38 namespace JSC { namespace Yarr {
39
40 class ByteDisjunction;
41
42 struct ByteTerm {
43 enum Type {
44 TypeBodyAlternativeBegin,
45 TypeBodyAlternativeDisjunction,
46 TypeBodyAlternativeEnd,
47 TypeAlternativeBegin,
48 TypeAlternativeDisjunction,
49 TypeAlternativeEnd,
50 TypeSubpatternBegin,
51 TypeSubpatternEnd,
52 TypeAssertionBOL,
53 TypeAssertionEOL,
54 TypeAssertionWordBoundary,
55 TypePatternCharacterOnce,
56 TypePatternCharacterFixed,
57 TypePatternCharacterGreedy,
58 TypePatternCharacterNonGreedy,
59 TypePatternCasedCharacterOnce,
60 TypePatternCasedCharacterFixed,
61 TypePatternCasedCharacterGreedy,
62 TypePatternCasedCharacterNonGreedy,
63 TypeCharacterClass,
64 TypeBackReference,
65 TypeParenthesesSubpattern,
66 TypeParenthesesSubpatternOnceBegin,
67 TypeParenthesesSubpatternOnceEnd,
68 TypeParenthesesSubpatternTerminalBegin,
69 TypeParenthesesSubpatternTerminalEnd,
70 TypeParentheticalAssertionBegin,
71 TypeParentheticalAssertionEnd,
72 TypeCheckInput,
73 TypeUncheckInput,
74 TypeDotStarEnclosure,
75 } type;
76 union {
77 struct {
78 union {
79 UChar patternCharacter;
80 struct {
81 UChar lo;
82 UChar hi;
83 } casedCharacter;
84 CharacterClass* characterClass;
85 unsigned subpatternId;
86 };
87 union {
88 ByteDisjunction* parenthesesDisjunction;
89 unsigned parenthesesWidth;
90 };
91 QuantifierType quantityType;
92 unsigned quantityCount;
93 } atom;
94 struct {
95 int next;
96 int end;
97 bool onceThrough;
98 } alternative;
99 struct {
100 bool m_bol : 1;
101 bool m_eol : 1;
102 } anchors;
103 unsigned checkInputCount;
104 };
105 unsigned frameLocation;
106 bool m_capture : 1;
107 bool m_invert : 1;
108 unsigned inputPosition;
109
110 ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
111 : frameLocation(frameLocation)
112 , m_capture(false)
113 , m_invert(false)
114 {
115 switch (quantityType) {
116 case QuantifierFixedCount:
117 type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
118 break;
119 case QuantifierGreedy:
120 type = ByteTerm::TypePatternCharacterGreedy;
121 break;
122 case QuantifierNonGreedy:
123 type = ByteTerm::TypePatternCharacterNonGreedy;
124 break;
125 }
126
127 atom.patternCharacter = ch;
128 atom.quantityType = quantityType;
129 atom.quantityCount = quantityCount.unsafeGet();
130 inputPosition = inputPos;
131 }
132
133 ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
134 : frameLocation(frameLocation)
135 , m_capture(false)
136 , m_invert(false)
137 {
138 switch (quantityType) {
139 case QuantifierFixedCount:
140 type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
141 break;
142 case QuantifierGreedy:
143 type = ByteTerm::TypePatternCasedCharacterGreedy;
144 break;
145 case QuantifierNonGreedy:
146 type = ByteTerm::TypePatternCasedCharacterNonGreedy;
147 break;
148 }
149
150 atom.casedCharacter.lo = lo;
151 atom.casedCharacter.hi = hi;
152 atom.quantityType = quantityType;
153 atom.quantityCount = quantityCount.unsafeGet();
154 inputPosition = inputPos;
155 }
156
157 ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
158 : type(ByteTerm::TypeCharacterClass)
159 , m_capture(false)
160 , m_invert(invert)
161 {
162 atom.characterClass = characterClass;
163 atom.quantityType = QuantifierFixedCount;
164 atom.quantityCount = 1;
165 inputPosition = inputPos;
166 }
167
168 ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos)
169 : type(type)
170 , m_capture(capture)
171 , m_invert(false)
172 {
173 atom.subpatternId = subpatternId;
174 atom.parenthesesDisjunction = parenthesesInfo;
175 atom.quantityType = QuantifierFixedCount;
176 atom.quantityCount = 1;
177 inputPosition = inputPos;
178 }
179
180 ByteTerm(Type type, bool invert = false)
181 : type(type)
182 , m_capture(false)
183 , m_invert(invert)
184 {
185 atom.quantityType = QuantifierFixedCount;
186 atom.quantityCount = 1;
187 }
188
189 ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos)
190 : type(type)
191 , m_capture(capture)
192 , m_invert(invert)
193 {
194 atom.subpatternId = subpatternId;
195 atom.quantityType = QuantifierFixedCount;
196 atom.quantityCount = 1;
197 inputPosition = inputPos;
198 }
199
200 static ByteTerm BOL(int inputPos)
201 {
202 ByteTerm term(TypeAssertionBOL);
203 term.inputPosition = inputPos;
204 return term;
205 }
206
207 static ByteTerm CheckInput(Checked<unsigned> count)
208 {
209 ByteTerm term(TypeCheckInput);
210 term.checkInputCount = count.unsafeGet();
211 return term;
212 }
213
214 static ByteTerm UncheckInput(Checked<unsigned> count)
215 {
216 ByteTerm term(TypeUncheckInput);
217 term.checkInputCount = count.unsafeGet();
218 return term;
219 }
220
221 static ByteTerm EOL(int inputPos)
222 {
223 ByteTerm term(TypeAssertionEOL);
224 term.inputPosition = inputPos;
225 return term;
226 }
227
228 static ByteTerm WordBoundary(bool invert, int inputPos)
229 {
230 ByteTerm term(TypeAssertionWordBoundary, invert);
231 term.inputPosition = inputPos;
232 return term;
233 }
234
235 static ByteTerm BackReference(unsigned subpatternId, int inputPos)
236 {
237 return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
238 }
239
240 static ByteTerm BodyAlternativeBegin(bool onceThrough)
241 {
242 ByteTerm term(TypeBodyAlternativeBegin);
243 term.alternative.next = 0;
244 term.alternative.end = 0;
245 term.alternative.onceThrough = onceThrough;
246 return term;
247 }
248
249 static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
250 {
251 ByteTerm term(TypeBodyAlternativeDisjunction);
252 term.alternative.next = 0;
253 term.alternative.end = 0;
254 term.alternative.onceThrough = onceThrough;
255 return term;
256 }
257
258 static ByteTerm BodyAlternativeEnd()
259 {
260 ByteTerm term(TypeBodyAlternativeEnd);
261 term.alternative.next = 0;
262 term.alternative.end = 0;
263 term.alternative.onceThrough = false;
264 return term;
265 }
266
267 static ByteTerm AlternativeBegin()
268 {
269 ByteTerm term(TypeAlternativeBegin);
270 term.alternative.next = 0;
271 term.alternative.end = 0;
272 term.alternative.onceThrough = false;
273 return term;
274 }
275
276 static ByteTerm AlternativeDisjunction()
277 {
278 ByteTerm term(TypeAlternativeDisjunction);
279 term.alternative.next = 0;
280 term.alternative.end = 0;
281 term.alternative.onceThrough = false;
282 return term;
283 }
284
285 static ByteTerm AlternativeEnd()
286 {
287 ByteTerm term(TypeAlternativeEnd);
288 term.alternative.next = 0;
289 term.alternative.end = 0;
290 term.alternative.onceThrough = false;
291 return term;
292 }
293
294 static ByteTerm SubpatternBegin()
295 {
296 return ByteTerm(TypeSubpatternBegin);
297 }
298
299 static ByteTerm SubpatternEnd()
300 {
301 return ByteTerm(TypeSubpatternEnd);
302 }
303
304 static ByteTerm DotStarEnclosure(bool bolAnchor, bool eolAnchor)
305 {
306 ByteTerm term(TypeDotStarEnclosure);
307 term.anchors.m_bol = bolAnchor;
308 term.anchors.m_eol = eolAnchor;
309 return term;
310 }
311
312 bool invert()
313 {
314 return m_invert;
315 }
316
317 bool capture()
318 {
319 return m_capture;
320 }
321 };
322
323 class ByteDisjunction {
324 WTF_MAKE_FAST_ALLOCATED;
325 public:
326 ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
327 : m_numSubpatterns(numSubpatterns)
328 , m_frameSize(frameSize)
329 {
330 }
331
332 Vector<ByteTerm> terms;
333 unsigned m_numSubpatterns;
334 unsigned m_frameSize;
335 };
336
337 struct BytecodePattern {
338 WTF_MAKE_FAST_ALLOCATED;
339 public:
340 BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<OwnPtr<ByteDisjunction> >& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator)
341 : m_body(body)
342 , m_ignoreCase(pattern.m_ignoreCase)
343 , m_multiline(pattern.m_multiline)
344 , m_allocator(allocator)
345 {
346 m_body->terms.shrinkToFit();
347
348 newlineCharacterClass = pattern.newlineCharacterClass();
349 wordcharCharacterClass = pattern.wordcharCharacterClass();
350
351 m_allParenthesesInfo.swap(parenthesesInfoToAdopt);
352 m_allParenthesesInfo.shrinkToFit();
353
354 m_userCharacterClasses.swap(pattern.m_userCharacterClasses);
355 m_userCharacterClasses.shrinkToFit();
356 }
357
358 OwnPtr<ByteDisjunction> m_body;
359 bool m_ignoreCase;
360 bool m_multiline;
361 // Each BytecodePattern is associated with a RegExp, each RegExp is associated
362 // with a VM. Cache a pointer to out VM's m_regExpAllocator.
363 BumpPointerAllocator* m_allocator;
364
365 CharacterClass* newlineCharacterClass;
366 CharacterClass* wordcharCharacterClass;
367
368 private:
369 Vector<OwnPtr<ByteDisjunction> > m_allParenthesesInfo;
370 Vector<OwnPtr<CharacterClass> > m_userCharacterClasses;
371 };
372
373 JS_EXPORT_PRIVATE PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
374 JS_EXPORT_PRIVATE unsigned interpret(BytecodePattern*, const String& input, unsigned start, unsigned* output);
375 unsigned interpret(BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output);
376 unsigned interpret(BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output);
377
378 } } // namespace JSC::Yarr
379
380 #endif // YarrInterpreter_h