]> git.saurik.com Git - apple/javascriptcore.git/blame - yarr/YarrInterpreter.h
JavaScriptCore-1097.13.tar.gz
[apple/javascriptcore.git] / yarr / YarrInterpreter.h
CommitLineData
ba379fdc 1/*
14957cd0 2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
ba379fdc
A
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
14957cd0
A
26#ifndef YarrInterpreter_h
27#define YarrInterpreter_h
ba379fdc 28
14957cd0
A
29#include "YarrPattern.h"
30#include <wtf/PassOwnPtr.h>
4e4e5a6f 31#include <wtf/unicode/Unicode.h>
ba379fdc 32
14957cd0
A
33namespace WTF {
34class BumpPointerAllocator;
35}
36using WTF::BumpPointerAllocator;
37
ba379fdc
A
38namespace JSC { namespace Yarr {
39
40class ByteDisjunction;
41
42struct ByteTerm {
43 enum Type {
44 TypeBodyAlternativeBegin,
45 TypeBodyAlternativeDisjunction,
46 TypeBodyAlternativeEnd,
47 TypeAlternativeBegin,
48 TypeAlternativeDisjunction,
49 TypeAlternativeEnd,
50 TypeSubpatternBegin,
51 TypeSubpatternEnd,
52 TypeAssertionBOL,
53 TypeAssertionEOL,
54 TypeAssertionWordBoundary,
55 TypePatternCharacterOnce,
56 TypePatternCharacterFixed,
57 TypePatternCharacterGreedy,
58 TypePatternCharacterNonGreedy,
59 TypePatternCasedCharacterOnce,
60 TypePatternCasedCharacterFixed,
61 TypePatternCasedCharacterGreedy,
62 TypePatternCasedCharacterNonGreedy,
63 TypeCharacterClass,
64 TypeBackReference,
65 TypeParenthesesSubpattern,
66 TypeParenthesesSubpatternOnceBegin,
67 TypeParenthesesSubpatternOnceEnd,
14957cd0
A
68 TypeParenthesesSubpatternTerminalBegin,
69 TypeParenthesesSubpatternTerminalEnd,
ba379fdc
A
70 TypeParentheticalAssertionBegin,
71 TypeParentheticalAssertionEnd,
72 TypeCheckInput,
14957cd0
A
73 TypeUncheckInput,
74 TypeDotStarEnclosure,
ba379fdc 75 } type;
ba379fdc
A
76 union {
77 struct {
78 union {
79 UChar patternCharacter;
80 struct {
81 UChar lo;
82 UChar hi;
83 } casedCharacter;
84 CharacterClass* characterClass;
85 unsigned subpatternId;
86 };
87 union {
88 ByteDisjunction* parenthesesDisjunction;
89 unsigned parenthesesWidth;
90 };
91 QuantifierType quantityType;
92 unsigned quantityCount;
93 } atom;
94 struct {
95 int next;
96 int end;
14957cd0 97 bool onceThrough;
ba379fdc 98 } alternative;
14957cd0
A
99 struct {
100 bool m_bol : 1;
101 bool m_eol : 1;
102 } anchors;
ba379fdc
A
103 unsigned checkInputCount;
104 };
105 unsigned frameLocation;
14957cd0
A
106 bool m_capture : 1;
107 bool m_invert : 1;
6fe7ccc8 108 unsigned inputPosition;
ba379fdc 109
1df5f87f 110 ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
ba379fdc 111 : frameLocation(frameLocation)
14957cd0
A
112 , m_capture(false)
113 , m_invert(false)
ba379fdc
A
114 {
115 switch (quantityType) {
116 case QuantifierFixedCount:
117 type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
118 break;
119 case QuantifierGreedy:
120 type = ByteTerm::TypePatternCharacterGreedy;
121 break;
122 case QuantifierNonGreedy:
123 type = ByteTerm::TypePatternCharacterNonGreedy;
124 break;
125 }
126
127 atom.patternCharacter = ch;
128 atom.quantityType = quantityType;
1df5f87f 129 atom.quantityCount = quantityCount.unsafeGet();
ba379fdc
A
130 inputPosition = inputPos;
131 }
132
1df5f87f 133 ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
ba379fdc 134 : frameLocation(frameLocation)
14957cd0
A
135 , m_capture(false)
136 , m_invert(false)
ba379fdc
A
137 {
138 switch (quantityType) {
139 case QuantifierFixedCount:
140 type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
141 break;
142 case QuantifierGreedy:
143 type = ByteTerm::TypePatternCasedCharacterGreedy;
144 break;
145 case QuantifierNonGreedy:
146 type = ByteTerm::TypePatternCasedCharacterNonGreedy;
147 break;
148 }
149
150 atom.casedCharacter.lo = lo;
151 atom.casedCharacter.hi = hi;
152 atom.quantityType = quantityType;
1df5f87f 153 atom.quantityCount = quantityCount.unsafeGet();
ba379fdc
A
154 inputPosition = inputPos;
155 }
156
157 ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
158 : type(ByteTerm::TypeCharacterClass)
14957cd0
A
159 , m_capture(false)
160 , m_invert(invert)
ba379fdc
A
161 {
162 atom.characterClass = characterClass;
163 atom.quantityType = QuantifierFixedCount;
164 atom.quantityCount = 1;
165 inputPosition = inputPos;
166 }
167
14957cd0 168 ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos)
ba379fdc 169 : type(type)
14957cd0
A
170 , m_capture(capture)
171 , m_invert(false)
ba379fdc
A
172 {
173 atom.subpatternId = subpatternId;
174 atom.parenthesesDisjunction = parenthesesInfo;
175 atom.quantityType = QuantifierFixedCount;
176 atom.quantityCount = 1;
177 inputPosition = inputPos;
178 }
179
180 ByteTerm(Type type, bool invert = false)
181 : type(type)
14957cd0
A
182 , m_capture(false)
183 , m_invert(invert)
ba379fdc
A
184 {
185 atom.quantityType = QuantifierFixedCount;
186 atom.quantityCount = 1;
187 }
188
14957cd0 189 ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos)
ba379fdc 190 : type(type)
14957cd0
A
191 , m_capture(capture)
192 , m_invert(invert)
ba379fdc
A
193 {
194 atom.subpatternId = subpatternId;
195 atom.quantityType = QuantifierFixedCount;
196 atom.quantityCount = 1;
197 inputPosition = inputPos;
198 }
199
200 static ByteTerm BOL(int inputPos)
201 {
202 ByteTerm term(TypeAssertionBOL);
203 term.inputPosition = inputPos;
204 return term;
205 }
206
1df5f87f 207 static ByteTerm CheckInput(Checked<unsigned> count)
ba379fdc
A
208 {
209 ByteTerm term(TypeCheckInput);
1df5f87f 210 term.checkInputCount = count.unsafeGet();
ba379fdc
A
211 return term;
212 }
213
1df5f87f 214 static ByteTerm UncheckInput(Checked<unsigned> count)
14957cd0
A
215 {
216 ByteTerm term(TypeUncheckInput);
1df5f87f 217 term.checkInputCount = count.unsafeGet();
14957cd0
A
218 return term;
219 }
220
ba379fdc
A
221 static ByteTerm EOL(int inputPos)
222 {
223 ByteTerm term(TypeAssertionEOL);
224 term.inputPosition = inputPos;
225 return term;
226 }
227
228 static ByteTerm WordBoundary(bool invert, int inputPos)
229 {
230 ByteTerm term(TypeAssertionWordBoundary, invert);
231 term.inputPosition = inputPos;
232 return term;
233 }
234
235 static ByteTerm BackReference(unsigned subpatternId, int inputPos)
236 {
14957cd0 237 return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
ba379fdc
A
238 }
239
14957cd0 240 static ByteTerm BodyAlternativeBegin(bool onceThrough)
ba379fdc
A
241 {
242 ByteTerm term(TypeBodyAlternativeBegin);
243 term.alternative.next = 0;
244 term.alternative.end = 0;
14957cd0 245 term.alternative.onceThrough = onceThrough;
ba379fdc
A
246 return term;
247 }
248
14957cd0 249 static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
ba379fdc
A
250 {
251 ByteTerm term(TypeBodyAlternativeDisjunction);
252 term.alternative.next = 0;
253 term.alternative.end = 0;
14957cd0 254 term.alternative.onceThrough = onceThrough;
ba379fdc
A
255 return term;
256 }
257
258 static ByteTerm BodyAlternativeEnd()
259 {
260 ByteTerm term(TypeBodyAlternativeEnd);
261 term.alternative.next = 0;
262 term.alternative.end = 0;
14957cd0 263 term.alternative.onceThrough = false;
ba379fdc
A
264 return term;
265 }
266
267 static ByteTerm AlternativeBegin()
268 {
269 ByteTerm term(TypeAlternativeBegin);
270 term.alternative.next = 0;
271 term.alternative.end = 0;
14957cd0 272 term.alternative.onceThrough = false;
ba379fdc
A
273 return term;
274 }
275
276 static ByteTerm AlternativeDisjunction()
277 {
278 ByteTerm term(TypeAlternativeDisjunction);
279 term.alternative.next = 0;
280 term.alternative.end = 0;
14957cd0 281 term.alternative.onceThrough = false;
ba379fdc
A
282 return term;
283 }
284
285 static ByteTerm AlternativeEnd()
286 {
287 ByteTerm term(TypeAlternativeEnd);
288 term.alternative.next = 0;
289 term.alternative.end = 0;
14957cd0 290 term.alternative.onceThrough = false;
ba379fdc
A
291 return term;
292 }
293
294 static ByteTerm SubpatternBegin()
295 {
296 return ByteTerm(TypeSubpatternBegin);
297 }
298
299 static ByteTerm SubpatternEnd()
300 {
301 return ByteTerm(TypeSubpatternEnd);
302 }
14957cd0
A
303
304 static ByteTerm DotStarEnclosure(bool bolAnchor, bool eolAnchor)
305 {
306 ByteTerm term(TypeDotStarEnclosure);
307 term.anchors.m_bol = bolAnchor;
308 term.anchors.m_eol = eolAnchor;
309 return term;
310 }
ba379fdc
A
311
312 bool invert()
313 {
14957cd0 314 return m_invert;
ba379fdc
A
315 }
316
317 bool capture()
318 {
14957cd0 319 return m_capture;
ba379fdc
A
320 }
321};
322
14957cd0
A
323class ByteDisjunction {
324 WTF_MAKE_FAST_ALLOCATED;
ba379fdc
A
325public:
326 ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
327 : m_numSubpatterns(numSubpatterns)
328 , m_frameSize(frameSize)
329 {
330 }
331
332 Vector<ByteTerm> terms;
333 unsigned m_numSubpatterns;
334 unsigned m_frameSize;
335};
336
14957cd0
A
337struct BytecodePattern {
338 WTF_MAKE_FAST_ALLOCATED;
339public:
340 BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<ByteDisjunction*> allParenthesesInfo, YarrPattern& pattern, BumpPointerAllocator* allocator)
ba379fdc
A
341 : m_body(body)
342 , m_ignoreCase(pattern.m_ignoreCase)
343 , m_multiline(pattern.m_multiline)
14957cd0 344 , m_allocator(allocator)
ba379fdc
A
345 {
346 newlineCharacterClass = pattern.newlineCharacterClass();
347 wordcharCharacterClass = pattern.wordcharCharacterClass();
348
349 m_allParenthesesInfo.append(allParenthesesInfo);
350 m_userCharacterClasses.append(pattern.m_userCharacterClasses);
14957cd0 351 // 'Steal' the YarrPattern's CharacterClasses! We clear its
ba379fdc
A
352 // array, so that it won't delete them on destruction. We'll
353 // take responsibility for that.
354 pattern.m_userCharacterClasses.clear();
355 }
356
357 ~BytecodePattern()
358 {
359 deleteAllValues(m_allParenthesesInfo);
360 deleteAllValues(m_userCharacterClasses);
361 }
362
363 OwnPtr<ByteDisjunction> m_body;
364 bool m_ignoreCase;
365 bool m_multiline;
14957cd0
A
366 // Each BytecodePattern is associated with a RegExp, each RegExp is associated
367 // with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regExpAllocator.
368 BumpPointerAllocator* m_allocator;
369
ba379fdc
A
370 CharacterClass* newlineCharacterClass;
371 CharacterClass* wordcharCharacterClass;
14957cd0 372
ba379fdc
A
373private:
374 Vector<ByteDisjunction*> m_allParenthesesInfo;
375 Vector<CharacterClass*> m_userCharacterClasses;
376};
377
6fe7ccc8
A
378JS_EXPORT_PRIVATE PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
379JS_EXPORT_PRIVATE unsigned interpret(BytecodePattern*, const UString& input, unsigned start, unsigned* output);
380unsigned interpret(BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output);
381unsigned interpret(BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output);
382
ba379fdc
A
383} } // namespace JSC::Yarr
384
14957cd0 385#endif // YarrInterpreter_h