]> git.saurik.com Git - apple/javascriptcore.git/blame - yarr/YarrInterpreter.h
JavaScriptCore-7600.1.4.17.5.tar.gz
[apple/javascriptcore.git] / yarr / YarrInterpreter.h
CommitLineData
ba379fdc 1/*
14957cd0 2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
ba379fdc
A
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
14957cd0
A
26#ifndef YarrInterpreter_h
27#define YarrInterpreter_h
ba379fdc 28
14957cd0
A
29#include "YarrPattern.h"
30#include <wtf/PassOwnPtr.h>
ba379fdc 31
14957cd0
A
32namespace WTF {
33class BumpPointerAllocator;
34}
35using WTF::BumpPointerAllocator;
36
ba379fdc
A
37namespace JSC { namespace Yarr {
38
39class ByteDisjunction;
40
41struct ByteTerm {
42 enum Type {
43 TypeBodyAlternativeBegin,
44 TypeBodyAlternativeDisjunction,
45 TypeBodyAlternativeEnd,
46 TypeAlternativeBegin,
47 TypeAlternativeDisjunction,
48 TypeAlternativeEnd,
49 TypeSubpatternBegin,
50 TypeSubpatternEnd,
51 TypeAssertionBOL,
52 TypeAssertionEOL,
53 TypeAssertionWordBoundary,
54 TypePatternCharacterOnce,
55 TypePatternCharacterFixed,
56 TypePatternCharacterGreedy,
57 TypePatternCharacterNonGreedy,
58 TypePatternCasedCharacterOnce,
59 TypePatternCasedCharacterFixed,
60 TypePatternCasedCharacterGreedy,
61 TypePatternCasedCharacterNonGreedy,
62 TypeCharacterClass,
63 TypeBackReference,
64 TypeParenthesesSubpattern,
65 TypeParenthesesSubpatternOnceBegin,
66 TypeParenthesesSubpatternOnceEnd,
14957cd0
A
67 TypeParenthesesSubpatternTerminalBegin,
68 TypeParenthesesSubpatternTerminalEnd,
ba379fdc
A
69 TypeParentheticalAssertionBegin,
70 TypeParentheticalAssertionEnd,
71 TypeCheckInput,
14957cd0
A
72 TypeUncheckInput,
73 TypeDotStarEnclosure,
ba379fdc 74 } type;
ba379fdc
A
75 union {
76 struct {
77 union {
78 UChar patternCharacter;
79 struct {
80 UChar lo;
81 UChar hi;
82 } casedCharacter;
83 CharacterClass* characterClass;
84 unsigned subpatternId;
85 };
86 union {
87 ByteDisjunction* parenthesesDisjunction;
88 unsigned parenthesesWidth;
89 };
90 QuantifierType quantityType;
91 unsigned quantityCount;
92 } atom;
93 struct {
94 int next;
95 int end;
14957cd0 96 bool onceThrough;
ba379fdc 97 } alternative;
14957cd0
A
98 struct {
99 bool m_bol : 1;
100 bool m_eol : 1;
101 } anchors;
ba379fdc
A
102 unsigned checkInputCount;
103 };
104 unsigned frameLocation;
14957cd0
A
105 bool m_capture : 1;
106 bool m_invert : 1;
6fe7ccc8 107 unsigned inputPosition;
ba379fdc 108
1df5f87f 109 ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
ba379fdc 110 : frameLocation(frameLocation)
14957cd0
A
111 , m_capture(false)
112 , m_invert(false)
ba379fdc
A
113 {
114 switch (quantityType) {
115 case QuantifierFixedCount:
116 type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
117 break;
118 case QuantifierGreedy:
119 type = ByteTerm::TypePatternCharacterGreedy;
120 break;
121 case QuantifierNonGreedy:
122 type = ByteTerm::TypePatternCharacterNonGreedy;
123 break;
124 }
125
126 atom.patternCharacter = ch;
127 atom.quantityType = quantityType;
1df5f87f 128 atom.quantityCount = quantityCount.unsafeGet();
ba379fdc
A
129 inputPosition = inputPos;
130 }
131
1df5f87f 132 ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
ba379fdc 133 : frameLocation(frameLocation)
14957cd0
A
134 , m_capture(false)
135 , m_invert(false)
ba379fdc
A
136 {
137 switch (quantityType) {
138 case QuantifierFixedCount:
139 type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
140 break;
141 case QuantifierGreedy:
142 type = ByteTerm::TypePatternCasedCharacterGreedy;
143 break;
144 case QuantifierNonGreedy:
145 type = ByteTerm::TypePatternCasedCharacterNonGreedy;
146 break;
147 }
148
149 atom.casedCharacter.lo = lo;
150 atom.casedCharacter.hi = hi;
151 atom.quantityType = quantityType;
1df5f87f 152 atom.quantityCount = quantityCount.unsafeGet();
ba379fdc
A
153 inputPosition = inputPos;
154 }
155
156 ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
157 : type(ByteTerm::TypeCharacterClass)
14957cd0
A
158 , m_capture(false)
159 , m_invert(invert)
ba379fdc
A
160 {
161 atom.characterClass = characterClass;
162 atom.quantityType = QuantifierFixedCount;
163 atom.quantityCount = 1;
164 inputPosition = inputPos;
165 }
166
14957cd0 167 ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos)
ba379fdc 168 : type(type)
14957cd0
A
169 , m_capture(capture)
170 , m_invert(false)
ba379fdc
A
171 {
172 atom.subpatternId = subpatternId;
173 atom.parenthesesDisjunction = parenthesesInfo;
174 atom.quantityType = QuantifierFixedCount;
175 atom.quantityCount = 1;
176 inputPosition = inputPos;
177 }
178
179 ByteTerm(Type type, bool invert = false)
180 : type(type)
14957cd0
A
181 , m_capture(false)
182 , m_invert(invert)
ba379fdc
A
183 {
184 atom.quantityType = QuantifierFixedCount;
185 atom.quantityCount = 1;
186 }
187
14957cd0 188 ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos)
ba379fdc 189 : type(type)
14957cd0
A
190 , m_capture(capture)
191 , m_invert(invert)
ba379fdc
A
192 {
193 atom.subpatternId = subpatternId;
194 atom.quantityType = QuantifierFixedCount;
195 atom.quantityCount = 1;
196 inputPosition = inputPos;
197 }
198
199 static ByteTerm BOL(int inputPos)
200 {
201 ByteTerm term(TypeAssertionBOL);
202 term.inputPosition = inputPos;
203 return term;
204 }
205
1df5f87f 206 static ByteTerm CheckInput(Checked<unsigned> count)
ba379fdc
A
207 {
208 ByteTerm term(TypeCheckInput);
1df5f87f 209 term.checkInputCount = count.unsafeGet();
ba379fdc
A
210 return term;
211 }
212
1df5f87f 213 static ByteTerm UncheckInput(Checked<unsigned> count)
14957cd0
A
214 {
215 ByteTerm term(TypeUncheckInput);
1df5f87f 216 term.checkInputCount = count.unsafeGet();
14957cd0
A
217 return term;
218 }
219
ba379fdc
A
220 static ByteTerm EOL(int inputPos)
221 {
222 ByteTerm term(TypeAssertionEOL);
223 term.inputPosition = inputPos;
224 return term;
225 }
226
227 static ByteTerm WordBoundary(bool invert, int inputPos)
228 {
229 ByteTerm term(TypeAssertionWordBoundary, invert);
230 term.inputPosition = inputPos;
231 return term;
232 }
233
234 static ByteTerm BackReference(unsigned subpatternId, int inputPos)
235 {
14957cd0 236 return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
ba379fdc
A
237 }
238
14957cd0 239 static ByteTerm BodyAlternativeBegin(bool onceThrough)
ba379fdc
A
240 {
241 ByteTerm term(TypeBodyAlternativeBegin);
242 term.alternative.next = 0;
243 term.alternative.end = 0;
14957cd0 244 term.alternative.onceThrough = onceThrough;
ba379fdc
A
245 return term;
246 }
247
14957cd0 248 static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
ba379fdc
A
249 {
250 ByteTerm term(TypeBodyAlternativeDisjunction);
251 term.alternative.next = 0;
252 term.alternative.end = 0;
14957cd0 253 term.alternative.onceThrough = onceThrough;
ba379fdc
A
254 return term;
255 }
256
257 static ByteTerm BodyAlternativeEnd()
258 {
259 ByteTerm term(TypeBodyAlternativeEnd);
260 term.alternative.next = 0;
261 term.alternative.end = 0;
14957cd0 262 term.alternative.onceThrough = false;
ba379fdc
A
263 return term;
264 }
265
266 static ByteTerm AlternativeBegin()
267 {
268 ByteTerm term(TypeAlternativeBegin);
269 term.alternative.next = 0;
270 term.alternative.end = 0;
14957cd0 271 term.alternative.onceThrough = false;
ba379fdc
A
272 return term;
273 }
274
275 static ByteTerm AlternativeDisjunction()
276 {
277 ByteTerm term(TypeAlternativeDisjunction);
278 term.alternative.next = 0;
279 term.alternative.end = 0;
14957cd0 280 term.alternative.onceThrough = false;
ba379fdc
A
281 return term;
282 }
283
284 static ByteTerm AlternativeEnd()
285 {
286 ByteTerm term(TypeAlternativeEnd);
287 term.alternative.next = 0;
288 term.alternative.end = 0;
14957cd0 289 term.alternative.onceThrough = false;
ba379fdc
A
290 return term;
291 }
292
293 static ByteTerm SubpatternBegin()
294 {
295 return ByteTerm(TypeSubpatternBegin);
296 }
297
298 static ByteTerm SubpatternEnd()
299 {
300 return ByteTerm(TypeSubpatternEnd);
301 }
14957cd0
A
302
303 static ByteTerm DotStarEnclosure(bool bolAnchor, bool eolAnchor)
304 {
305 ByteTerm term(TypeDotStarEnclosure);
306 term.anchors.m_bol = bolAnchor;
307 term.anchors.m_eol = eolAnchor;
308 return term;
309 }
ba379fdc
A
310
311 bool invert()
312 {
14957cd0 313 return m_invert;
ba379fdc
A
314 }
315
316 bool capture()
317 {
14957cd0 318 return m_capture;
ba379fdc
A
319 }
320};
321
14957cd0
A
322class ByteDisjunction {
323 WTF_MAKE_FAST_ALLOCATED;
ba379fdc
A
324public:
325 ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
326 : m_numSubpatterns(numSubpatterns)
327 , m_frameSize(frameSize)
328 {
329 }
330
331 Vector<ByteTerm> terms;
332 unsigned m_numSubpatterns;
333 unsigned m_frameSize;
334};
335
14957cd0
A
336struct BytecodePattern {
337 WTF_MAKE_FAST_ALLOCATED;
338public:
81345200 339 BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<OwnPtr<ByteDisjunction>>& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator)
ba379fdc
A
340 : m_body(body)
341 , m_ignoreCase(pattern.m_ignoreCase)
342 , m_multiline(pattern.m_multiline)
14957cd0 343 , m_allocator(allocator)
ba379fdc 344 {
93a37866
A
345 m_body->terms.shrinkToFit();
346
ba379fdc
A
347 newlineCharacterClass = pattern.newlineCharacterClass();
348 wordcharCharacterClass = pattern.wordcharCharacterClass();
349
93a37866
A
350 m_allParenthesesInfo.swap(parenthesesInfoToAdopt);
351 m_allParenthesesInfo.shrinkToFit();
ba379fdc 352
93a37866
A
353 m_userCharacterClasses.swap(pattern.m_userCharacterClasses);
354 m_userCharacterClasses.shrinkToFit();
ba379fdc
A
355 }
356
357 OwnPtr<ByteDisjunction> m_body;
358 bool m_ignoreCase;
359 bool m_multiline;
14957cd0 360 // Each BytecodePattern is associated with a RegExp, each RegExp is associated
93a37866 361 // with a VM. Cache a pointer to out VM's m_regExpAllocator.
14957cd0
A
362 BumpPointerAllocator* m_allocator;
363
ba379fdc
A
364 CharacterClass* newlineCharacterClass;
365 CharacterClass* wordcharCharacterClass;
14957cd0 366
ba379fdc 367private:
81345200
A
368 Vector<OwnPtr<ByteDisjunction>> m_allParenthesesInfo;
369 Vector<OwnPtr<CharacterClass>> m_userCharacterClasses;
ba379fdc
A
370};
371
6fe7ccc8 372JS_EXPORT_PRIVATE PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
93a37866 373JS_EXPORT_PRIVATE unsigned interpret(BytecodePattern*, const String& input, unsigned start, unsigned* output);
6fe7ccc8
A
374unsigned interpret(BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output);
375unsigned interpret(BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output);
376
ba379fdc
A
377} } // namespace JSC::Yarr
378
14957cd0 379#endif // YarrInterpreter_h