]> git.saurik.com Git - apple/javascriptcore.git/blob - yarr/YarrInterpreter.h
JavaScriptCore-7601.1.46.3.tar.gz
[apple/javascriptcore.git] / yarr / YarrInterpreter.h
1 /*
2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #ifndef YarrInterpreter_h
27 #define YarrInterpreter_h
28
29 #include "YarrPattern.h"
30
31 namespace WTF {
32 class BumpPointerAllocator;
33 }
34 using WTF::BumpPointerAllocator;
35
36 namespace JSC { namespace Yarr {
37
38 class ByteDisjunction;
39
40 struct ByteTerm {
41 enum Type {
42 TypeBodyAlternativeBegin,
43 TypeBodyAlternativeDisjunction,
44 TypeBodyAlternativeEnd,
45 TypeAlternativeBegin,
46 TypeAlternativeDisjunction,
47 TypeAlternativeEnd,
48 TypeSubpatternBegin,
49 TypeSubpatternEnd,
50 TypeAssertionBOL,
51 TypeAssertionEOL,
52 TypeAssertionWordBoundary,
53 TypePatternCharacterOnce,
54 TypePatternCharacterFixed,
55 TypePatternCharacterGreedy,
56 TypePatternCharacterNonGreedy,
57 TypePatternCasedCharacterOnce,
58 TypePatternCasedCharacterFixed,
59 TypePatternCasedCharacterGreedy,
60 TypePatternCasedCharacterNonGreedy,
61 TypeCharacterClass,
62 TypeBackReference,
63 TypeParenthesesSubpattern,
64 TypeParenthesesSubpatternOnceBegin,
65 TypeParenthesesSubpatternOnceEnd,
66 TypeParenthesesSubpatternTerminalBegin,
67 TypeParenthesesSubpatternTerminalEnd,
68 TypeParentheticalAssertionBegin,
69 TypeParentheticalAssertionEnd,
70 TypeCheckInput,
71 TypeUncheckInput,
72 TypeDotStarEnclosure,
73 } type;
74 union {
75 struct {
76 union {
77 UChar patternCharacter;
78 struct {
79 UChar lo;
80 UChar hi;
81 } casedCharacter;
82 CharacterClass* characterClass;
83 unsigned subpatternId;
84 };
85 union {
86 ByteDisjunction* parenthesesDisjunction;
87 unsigned parenthesesWidth;
88 };
89 QuantifierType quantityType;
90 unsigned quantityCount;
91 } atom;
92 struct {
93 int next;
94 int end;
95 bool onceThrough;
96 } alternative;
97 struct {
98 bool m_bol : 1;
99 bool m_eol : 1;
100 } anchors;
101 unsigned checkInputCount;
102 };
103 unsigned frameLocation;
104 bool m_capture : 1;
105 bool m_invert : 1;
106 unsigned inputPosition;
107
108 ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
109 : frameLocation(frameLocation)
110 , m_capture(false)
111 , m_invert(false)
112 {
113 switch (quantityType) {
114 case QuantifierFixedCount:
115 type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
116 break;
117 case QuantifierGreedy:
118 type = ByteTerm::TypePatternCharacterGreedy;
119 break;
120 case QuantifierNonGreedy:
121 type = ByteTerm::TypePatternCharacterNonGreedy;
122 break;
123 }
124
125 atom.patternCharacter = ch;
126 atom.quantityType = quantityType;
127 atom.quantityCount = quantityCount.unsafeGet();
128 inputPosition = inputPos;
129 }
130
131 ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
132 : frameLocation(frameLocation)
133 , m_capture(false)
134 , m_invert(false)
135 {
136 switch (quantityType) {
137 case QuantifierFixedCount:
138 type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
139 break;
140 case QuantifierGreedy:
141 type = ByteTerm::TypePatternCasedCharacterGreedy;
142 break;
143 case QuantifierNonGreedy:
144 type = ByteTerm::TypePatternCasedCharacterNonGreedy;
145 break;
146 }
147
148 atom.casedCharacter.lo = lo;
149 atom.casedCharacter.hi = hi;
150 atom.quantityType = quantityType;
151 atom.quantityCount = quantityCount.unsafeGet();
152 inputPosition = inputPos;
153 }
154
155 ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
156 : type(ByteTerm::TypeCharacterClass)
157 , m_capture(false)
158 , m_invert(invert)
159 {
160 atom.characterClass = characterClass;
161 atom.quantityType = QuantifierFixedCount;
162 atom.quantityCount = 1;
163 inputPosition = inputPos;
164 }
165
166 ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos)
167 : type(type)
168 , m_capture(capture)
169 , m_invert(false)
170 {
171 atom.subpatternId = subpatternId;
172 atom.parenthesesDisjunction = parenthesesInfo;
173 atom.quantityType = QuantifierFixedCount;
174 atom.quantityCount = 1;
175 inputPosition = inputPos;
176 }
177
178 ByteTerm(Type type, bool invert = false)
179 : type(type)
180 , m_capture(false)
181 , m_invert(invert)
182 {
183 atom.quantityType = QuantifierFixedCount;
184 atom.quantityCount = 1;
185 }
186
187 ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos)
188 : type(type)
189 , m_capture(capture)
190 , m_invert(invert)
191 {
192 atom.subpatternId = subpatternId;
193 atom.quantityType = QuantifierFixedCount;
194 atom.quantityCount = 1;
195 inputPosition = inputPos;
196 }
197
198 static ByteTerm BOL(int inputPos)
199 {
200 ByteTerm term(TypeAssertionBOL);
201 term.inputPosition = inputPos;
202 return term;
203 }
204
205 static ByteTerm CheckInput(Checked<unsigned> count)
206 {
207 ByteTerm term(TypeCheckInput);
208 term.checkInputCount = count.unsafeGet();
209 return term;
210 }
211
212 static ByteTerm UncheckInput(Checked<unsigned> count)
213 {
214 ByteTerm term(TypeUncheckInput);
215 term.checkInputCount = count.unsafeGet();
216 return term;
217 }
218
219 static ByteTerm EOL(int inputPos)
220 {
221 ByteTerm term(TypeAssertionEOL);
222 term.inputPosition = inputPos;
223 return term;
224 }
225
226 static ByteTerm WordBoundary(bool invert, int inputPos)
227 {
228 ByteTerm term(TypeAssertionWordBoundary, invert);
229 term.inputPosition = inputPos;
230 return term;
231 }
232
233 static ByteTerm BackReference(unsigned subpatternId, int inputPos)
234 {
235 return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
236 }
237
238 static ByteTerm BodyAlternativeBegin(bool onceThrough)
239 {
240 ByteTerm term(TypeBodyAlternativeBegin);
241 term.alternative.next = 0;
242 term.alternative.end = 0;
243 term.alternative.onceThrough = onceThrough;
244 return term;
245 }
246
247 static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
248 {
249 ByteTerm term(TypeBodyAlternativeDisjunction);
250 term.alternative.next = 0;
251 term.alternative.end = 0;
252 term.alternative.onceThrough = onceThrough;
253 return term;
254 }
255
256 static ByteTerm BodyAlternativeEnd()
257 {
258 ByteTerm term(TypeBodyAlternativeEnd);
259 term.alternative.next = 0;
260 term.alternative.end = 0;
261 term.alternative.onceThrough = false;
262 return term;
263 }
264
265 static ByteTerm AlternativeBegin()
266 {
267 ByteTerm term(TypeAlternativeBegin);
268 term.alternative.next = 0;
269 term.alternative.end = 0;
270 term.alternative.onceThrough = false;
271 return term;
272 }
273
274 static ByteTerm AlternativeDisjunction()
275 {
276 ByteTerm term(TypeAlternativeDisjunction);
277 term.alternative.next = 0;
278 term.alternative.end = 0;
279 term.alternative.onceThrough = false;
280 return term;
281 }
282
283 static ByteTerm AlternativeEnd()
284 {
285 ByteTerm term(TypeAlternativeEnd);
286 term.alternative.next = 0;
287 term.alternative.end = 0;
288 term.alternative.onceThrough = false;
289 return term;
290 }
291
292 static ByteTerm SubpatternBegin()
293 {
294 return ByteTerm(TypeSubpatternBegin);
295 }
296
297 static ByteTerm SubpatternEnd()
298 {
299 return ByteTerm(TypeSubpatternEnd);
300 }
301
302 static ByteTerm DotStarEnclosure(bool bolAnchor, bool eolAnchor)
303 {
304 ByteTerm term(TypeDotStarEnclosure);
305 term.anchors.m_bol = bolAnchor;
306 term.anchors.m_eol = eolAnchor;
307 return term;
308 }
309
310 bool invert()
311 {
312 return m_invert;
313 }
314
315 bool capture()
316 {
317 return m_capture;
318 }
319 };
320
321 class ByteDisjunction {
322 WTF_MAKE_FAST_ALLOCATED;
323 public:
324 ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
325 : m_numSubpatterns(numSubpatterns)
326 , m_frameSize(frameSize)
327 {
328 }
329
330 Vector<ByteTerm> terms;
331 unsigned m_numSubpatterns;
332 unsigned m_frameSize;
333 };
334
335 struct BytecodePattern {
336 WTF_MAKE_FAST_ALLOCATED;
337 public:
338 BytecodePattern(std::unique_ptr<ByteDisjunction> body, Vector<std::unique_ptr<ByteDisjunction>>& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator)
339 : m_body(WTF::move(body))
340 , m_ignoreCase(pattern.m_ignoreCase)
341 , m_multiline(pattern.m_multiline)
342 , m_allocator(allocator)
343 {
344 m_body->terms.shrinkToFit();
345
346 newlineCharacterClass = pattern.newlineCharacterClass();
347 wordcharCharacterClass = pattern.wordcharCharacterClass();
348
349 m_allParenthesesInfo.swap(parenthesesInfoToAdopt);
350 m_allParenthesesInfo.shrinkToFit();
351
352 m_userCharacterClasses.swap(pattern.m_userCharacterClasses);
353 m_userCharacterClasses.shrinkToFit();
354 }
355
356 std::unique_ptr<ByteDisjunction> m_body;
357 bool m_ignoreCase;
358 bool m_multiline;
359 // Each BytecodePattern is associated with a RegExp, each RegExp is associated
360 // with a VM. Cache a pointer to out VM's m_regExpAllocator.
361 BumpPointerAllocator* m_allocator;
362
363 CharacterClass* newlineCharacterClass;
364 CharacterClass* wordcharCharacterClass;
365
366 private:
367 Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo;
368 Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses;
369 };
370
371 JS_EXPORT_PRIVATE std::unique_ptr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
372 JS_EXPORT_PRIVATE unsigned interpret(BytecodePattern*, const String& input, unsigned start, unsigned* output);
373 unsigned interpret(BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output);
374 unsigned interpret(BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output);
375
376 } } // namespace JSC::Yarr
377
378 #endif // YarrInterpreter_h