]> git.saurik.com Git - apple/javascriptcore.git/blame_incremental - yarr/YarrInterpreter.h
JavaScriptCore-7600.1.4.11.8.tar.gz
[apple/javascriptcore.git] / yarr / YarrInterpreter.h
... / ...
CommitLineData
1/*
2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#ifndef YarrInterpreter_h
27#define YarrInterpreter_h
28
29#include "YarrPattern.h"
30#include <wtf/PassOwnPtr.h>
31
32namespace WTF {
33class BumpPointerAllocator;
34}
35using WTF::BumpPointerAllocator;
36
37namespace JSC { namespace Yarr {
38
39class ByteDisjunction;
40
41struct ByteTerm {
42 enum Type {
43 TypeBodyAlternativeBegin,
44 TypeBodyAlternativeDisjunction,
45 TypeBodyAlternativeEnd,
46 TypeAlternativeBegin,
47 TypeAlternativeDisjunction,
48 TypeAlternativeEnd,
49 TypeSubpatternBegin,
50 TypeSubpatternEnd,
51 TypeAssertionBOL,
52 TypeAssertionEOL,
53 TypeAssertionWordBoundary,
54 TypePatternCharacterOnce,
55 TypePatternCharacterFixed,
56 TypePatternCharacterGreedy,
57 TypePatternCharacterNonGreedy,
58 TypePatternCasedCharacterOnce,
59 TypePatternCasedCharacterFixed,
60 TypePatternCasedCharacterGreedy,
61 TypePatternCasedCharacterNonGreedy,
62 TypeCharacterClass,
63 TypeBackReference,
64 TypeParenthesesSubpattern,
65 TypeParenthesesSubpatternOnceBegin,
66 TypeParenthesesSubpatternOnceEnd,
67 TypeParenthesesSubpatternTerminalBegin,
68 TypeParenthesesSubpatternTerminalEnd,
69 TypeParentheticalAssertionBegin,
70 TypeParentheticalAssertionEnd,
71 TypeCheckInput,
72 TypeUncheckInput,
73 TypeDotStarEnclosure,
74 } type;
75 union {
76 struct {
77 union {
78 UChar patternCharacter;
79 struct {
80 UChar lo;
81 UChar hi;
82 } casedCharacter;
83 CharacterClass* characterClass;
84 unsigned subpatternId;
85 };
86 union {
87 ByteDisjunction* parenthesesDisjunction;
88 unsigned parenthesesWidth;
89 };
90 QuantifierType quantityType;
91 unsigned quantityCount;
92 } atom;
93 struct {
94 int next;
95 int end;
96 bool onceThrough;
97 } alternative;
98 struct {
99 bool m_bol : 1;
100 bool m_eol : 1;
101 } anchors;
102 unsigned checkInputCount;
103 };
104 unsigned frameLocation;
105 bool m_capture : 1;
106 bool m_invert : 1;
107 unsigned inputPosition;
108
109 ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
110 : frameLocation(frameLocation)
111 , m_capture(false)
112 , m_invert(false)
113 {
114 switch (quantityType) {
115 case QuantifierFixedCount:
116 type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
117 break;
118 case QuantifierGreedy:
119 type = ByteTerm::TypePatternCharacterGreedy;
120 break;
121 case QuantifierNonGreedy:
122 type = ByteTerm::TypePatternCharacterNonGreedy;
123 break;
124 }
125
126 atom.patternCharacter = ch;
127 atom.quantityType = quantityType;
128 atom.quantityCount = quantityCount.unsafeGet();
129 inputPosition = inputPos;
130 }
131
132 ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
133 : frameLocation(frameLocation)
134 , m_capture(false)
135 , m_invert(false)
136 {
137 switch (quantityType) {
138 case QuantifierFixedCount:
139 type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
140 break;
141 case QuantifierGreedy:
142 type = ByteTerm::TypePatternCasedCharacterGreedy;
143 break;
144 case QuantifierNonGreedy:
145 type = ByteTerm::TypePatternCasedCharacterNonGreedy;
146 break;
147 }
148
149 atom.casedCharacter.lo = lo;
150 atom.casedCharacter.hi = hi;
151 atom.quantityType = quantityType;
152 atom.quantityCount = quantityCount.unsafeGet();
153 inputPosition = inputPos;
154 }
155
156 ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
157 : type(ByteTerm::TypeCharacterClass)
158 , m_capture(false)
159 , m_invert(invert)
160 {
161 atom.characterClass = characterClass;
162 atom.quantityType = QuantifierFixedCount;
163 atom.quantityCount = 1;
164 inputPosition = inputPos;
165 }
166
167 ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos)
168 : type(type)
169 , m_capture(capture)
170 , m_invert(false)
171 {
172 atom.subpatternId = subpatternId;
173 atom.parenthesesDisjunction = parenthesesInfo;
174 atom.quantityType = QuantifierFixedCount;
175 atom.quantityCount = 1;
176 inputPosition = inputPos;
177 }
178
179 ByteTerm(Type type, bool invert = false)
180 : type(type)
181 , m_capture(false)
182 , m_invert(invert)
183 {
184 atom.quantityType = QuantifierFixedCount;
185 atom.quantityCount = 1;
186 }
187
188 ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos)
189 : type(type)
190 , m_capture(capture)
191 , m_invert(invert)
192 {
193 atom.subpatternId = subpatternId;
194 atom.quantityType = QuantifierFixedCount;
195 atom.quantityCount = 1;
196 inputPosition = inputPos;
197 }
198
199 static ByteTerm BOL(int inputPos)
200 {
201 ByteTerm term(TypeAssertionBOL);
202 term.inputPosition = inputPos;
203 return term;
204 }
205
206 static ByteTerm CheckInput(Checked<unsigned> count)
207 {
208 ByteTerm term(TypeCheckInput);
209 term.checkInputCount = count.unsafeGet();
210 return term;
211 }
212
213 static ByteTerm UncheckInput(Checked<unsigned> count)
214 {
215 ByteTerm term(TypeUncheckInput);
216 term.checkInputCount = count.unsafeGet();
217 return term;
218 }
219
220 static ByteTerm EOL(int inputPos)
221 {
222 ByteTerm term(TypeAssertionEOL);
223 term.inputPosition = inputPos;
224 return term;
225 }
226
227 static ByteTerm WordBoundary(bool invert, int inputPos)
228 {
229 ByteTerm term(TypeAssertionWordBoundary, invert);
230 term.inputPosition = inputPos;
231 return term;
232 }
233
234 static ByteTerm BackReference(unsigned subpatternId, int inputPos)
235 {
236 return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
237 }
238
239 static ByteTerm BodyAlternativeBegin(bool onceThrough)
240 {
241 ByteTerm term(TypeBodyAlternativeBegin);
242 term.alternative.next = 0;
243 term.alternative.end = 0;
244 term.alternative.onceThrough = onceThrough;
245 return term;
246 }
247
248 static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
249 {
250 ByteTerm term(TypeBodyAlternativeDisjunction);
251 term.alternative.next = 0;
252 term.alternative.end = 0;
253 term.alternative.onceThrough = onceThrough;
254 return term;
255 }
256
257 static ByteTerm BodyAlternativeEnd()
258 {
259 ByteTerm term(TypeBodyAlternativeEnd);
260 term.alternative.next = 0;
261 term.alternative.end = 0;
262 term.alternative.onceThrough = false;
263 return term;
264 }
265
266 static ByteTerm AlternativeBegin()
267 {
268 ByteTerm term(TypeAlternativeBegin);
269 term.alternative.next = 0;
270 term.alternative.end = 0;
271 term.alternative.onceThrough = false;
272 return term;
273 }
274
275 static ByteTerm AlternativeDisjunction()
276 {
277 ByteTerm term(TypeAlternativeDisjunction);
278 term.alternative.next = 0;
279 term.alternative.end = 0;
280 term.alternative.onceThrough = false;
281 return term;
282 }
283
284 static ByteTerm AlternativeEnd()
285 {
286 ByteTerm term(TypeAlternativeEnd);
287 term.alternative.next = 0;
288 term.alternative.end = 0;
289 term.alternative.onceThrough = false;
290 return term;
291 }
292
293 static ByteTerm SubpatternBegin()
294 {
295 return ByteTerm(TypeSubpatternBegin);
296 }
297
298 static ByteTerm SubpatternEnd()
299 {
300 return ByteTerm(TypeSubpatternEnd);
301 }
302
303 static ByteTerm DotStarEnclosure(bool bolAnchor, bool eolAnchor)
304 {
305 ByteTerm term(TypeDotStarEnclosure);
306 term.anchors.m_bol = bolAnchor;
307 term.anchors.m_eol = eolAnchor;
308 return term;
309 }
310
311 bool invert()
312 {
313 return m_invert;
314 }
315
316 bool capture()
317 {
318 return m_capture;
319 }
320};
321
322class ByteDisjunction {
323 WTF_MAKE_FAST_ALLOCATED;
324public:
325 ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
326 : m_numSubpatterns(numSubpatterns)
327 , m_frameSize(frameSize)
328 {
329 }
330
331 Vector<ByteTerm> terms;
332 unsigned m_numSubpatterns;
333 unsigned m_frameSize;
334};
335
336struct BytecodePattern {
337 WTF_MAKE_FAST_ALLOCATED;
338public:
339 BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<OwnPtr<ByteDisjunction>>& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator)
340 : m_body(body)
341 , m_ignoreCase(pattern.m_ignoreCase)
342 , m_multiline(pattern.m_multiline)
343 , m_allocator(allocator)
344 {
345 m_body->terms.shrinkToFit();
346
347 newlineCharacterClass = pattern.newlineCharacterClass();
348 wordcharCharacterClass = pattern.wordcharCharacterClass();
349
350 m_allParenthesesInfo.swap(parenthesesInfoToAdopt);
351 m_allParenthesesInfo.shrinkToFit();
352
353 m_userCharacterClasses.swap(pattern.m_userCharacterClasses);
354 m_userCharacterClasses.shrinkToFit();
355 }
356
357 OwnPtr<ByteDisjunction> m_body;
358 bool m_ignoreCase;
359 bool m_multiline;
360 // Each BytecodePattern is associated with a RegExp, each RegExp is associated
361 // with a VM. Cache a pointer to out VM's m_regExpAllocator.
362 BumpPointerAllocator* m_allocator;
363
364 CharacterClass* newlineCharacterClass;
365 CharacterClass* wordcharCharacterClass;
366
367private:
368 Vector<OwnPtr<ByteDisjunction>> m_allParenthesesInfo;
369 Vector<OwnPtr<CharacterClass>> m_userCharacterClasses;
370};
371
372JS_EXPORT_PRIVATE PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
373JS_EXPORT_PRIVATE unsigned interpret(BytecodePattern*, const String& input, unsigned start, unsigned* output);
374unsigned interpret(BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output);
375unsigned interpret(BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output);
376
377} } // namespace JSC::Yarr
378
379#endif // YarrInterpreter_h