]> git.saurik.com Git - apple/javascriptcore.git/blame - yarr/YarrInterpreter.h
JavaScriptCore-7601.1.46.3.tar.gz
[apple/javascriptcore.git] / yarr / YarrInterpreter.h
CommitLineData
ba379fdc 1/*
14957cd0 2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
ba379fdc
A
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
14957cd0
A
26#ifndef YarrInterpreter_h
27#define YarrInterpreter_h
ba379fdc 28
14957cd0 29#include "YarrPattern.h"
ba379fdc 30
14957cd0
A
31namespace WTF {
32class BumpPointerAllocator;
33}
34using WTF::BumpPointerAllocator;
35
ba379fdc
A
36namespace JSC { namespace Yarr {
37
38class ByteDisjunction;
39
40struct ByteTerm {
41 enum Type {
42 TypeBodyAlternativeBegin,
43 TypeBodyAlternativeDisjunction,
44 TypeBodyAlternativeEnd,
45 TypeAlternativeBegin,
46 TypeAlternativeDisjunction,
47 TypeAlternativeEnd,
48 TypeSubpatternBegin,
49 TypeSubpatternEnd,
50 TypeAssertionBOL,
51 TypeAssertionEOL,
52 TypeAssertionWordBoundary,
53 TypePatternCharacterOnce,
54 TypePatternCharacterFixed,
55 TypePatternCharacterGreedy,
56 TypePatternCharacterNonGreedy,
57 TypePatternCasedCharacterOnce,
58 TypePatternCasedCharacterFixed,
59 TypePatternCasedCharacterGreedy,
60 TypePatternCasedCharacterNonGreedy,
61 TypeCharacterClass,
62 TypeBackReference,
63 TypeParenthesesSubpattern,
64 TypeParenthesesSubpatternOnceBegin,
65 TypeParenthesesSubpatternOnceEnd,
14957cd0
A
66 TypeParenthesesSubpatternTerminalBegin,
67 TypeParenthesesSubpatternTerminalEnd,
ba379fdc
A
68 TypeParentheticalAssertionBegin,
69 TypeParentheticalAssertionEnd,
70 TypeCheckInput,
14957cd0
A
71 TypeUncheckInput,
72 TypeDotStarEnclosure,
ba379fdc 73 } type;
ba379fdc
A
74 union {
75 struct {
76 union {
77 UChar patternCharacter;
78 struct {
79 UChar lo;
80 UChar hi;
81 } casedCharacter;
82 CharacterClass* characterClass;
83 unsigned subpatternId;
84 };
85 union {
86 ByteDisjunction* parenthesesDisjunction;
87 unsigned parenthesesWidth;
88 };
89 QuantifierType quantityType;
90 unsigned quantityCount;
91 } atom;
92 struct {
93 int next;
94 int end;
14957cd0 95 bool onceThrough;
ba379fdc 96 } alternative;
14957cd0
A
97 struct {
98 bool m_bol : 1;
99 bool m_eol : 1;
100 } anchors;
ba379fdc
A
101 unsigned checkInputCount;
102 };
103 unsigned frameLocation;
14957cd0
A
104 bool m_capture : 1;
105 bool m_invert : 1;
6fe7ccc8 106 unsigned inputPosition;
ba379fdc 107
1df5f87f 108 ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
ba379fdc 109 : frameLocation(frameLocation)
14957cd0
A
110 , m_capture(false)
111 , m_invert(false)
ba379fdc
A
112 {
113 switch (quantityType) {
114 case QuantifierFixedCount:
115 type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
116 break;
117 case QuantifierGreedy:
118 type = ByteTerm::TypePatternCharacterGreedy;
119 break;
120 case QuantifierNonGreedy:
121 type = ByteTerm::TypePatternCharacterNonGreedy;
122 break;
123 }
124
125 atom.patternCharacter = ch;
126 atom.quantityType = quantityType;
1df5f87f 127 atom.quantityCount = quantityCount.unsafeGet();
ba379fdc
A
128 inputPosition = inputPos;
129 }
130
1df5f87f 131 ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
ba379fdc 132 : frameLocation(frameLocation)
14957cd0
A
133 , m_capture(false)
134 , m_invert(false)
ba379fdc
A
135 {
136 switch (quantityType) {
137 case QuantifierFixedCount:
138 type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
139 break;
140 case QuantifierGreedy:
141 type = ByteTerm::TypePatternCasedCharacterGreedy;
142 break;
143 case QuantifierNonGreedy:
144 type = ByteTerm::TypePatternCasedCharacterNonGreedy;
145 break;
146 }
147
148 atom.casedCharacter.lo = lo;
149 atom.casedCharacter.hi = hi;
150 atom.quantityType = quantityType;
1df5f87f 151 atom.quantityCount = quantityCount.unsafeGet();
ba379fdc
A
152 inputPosition = inputPos;
153 }
154
155 ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
156 : type(ByteTerm::TypeCharacterClass)
14957cd0
A
157 , m_capture(false)
158 , m_invert(invert)
ba379fdc
A
159 {
160 atom.characterClass = characterClass;
161 atom.quantityType = QuantifierFixedCount;
162 atom.quantityCount = 1;
163 inputPosition = inputPos;
164 }
165
14957cd0 166 ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos)
ba379fdc 167 : type(type)
14957cd0
A
168 , m_capture(capture)
169 , m_invert(false)
ba379fdc
A
170 {
171 atom.subpatternId = subpatternId;
172 atom.parenthesesDisjunction = parenthesesInfo;
173 atom.quantityType = QuantifierFixedCount;
174 atom.quantityCount = 1;
175 inputPosition = inputPos;
176 }
177
178 ByteTerm(Type type, bool invert = false)
179 : type(type)
14957cd0
A
180 , m_capture(false)
181 , m_invert(invert)
ba379fdc
A
182 {
183 atom.quantityType = QuantifierFixedCount;
184 atom.quantityCount = 1;
185 }
186
14957cd0 187 ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos)
ba379fdc 188 : type(type)
14957cd0
A
189 , m_capture(capture)
190 , m_invert(invert)
ba379fdc
A
191 {
192 atom.subpatternId = subpatternId;
193 atom.quantityType = QuantifierFixedCount;
194 atom.quantityCount = 1;
195 inputPosition = inputPos;
196 }
197
198 static ByteTerm BOL(int inputPos)
199 {
200 ByteTerm term(TypeAssertionBOL);
201 term.inputPosition = inputPos;
202 return term;
203 }
204
1df5f87f 205 static ByteTerm CheckInput(Checked<unsigned> count)
ba379fdc
A
206 {
207 ByteTerm term(TypeCheckInput);
1df5f87f 208 term.checkInputCount = count.unsafeGet();
ba379fdc
A
209 return term;
210 }
211
1df5f87f 212 static ByteTerm UncheckInput(Checked<unsigned> count)
14957cd0
A
213 {
214 ByteTerm term(TypeUncheckInput);
1df5f87f 215 term.checkInputCount = count.unsafeGet();
14957cd0
A
216 return term;
217 }
218
ba379fdc
A
219 static ByteTerm EOL(int inputPos)
220 {
221 ByteTerm term(TypeAssertionEOL);
222 term.inputPosition = inputPos;
223 return term;
224 }
225
226 static ByteTerm WordBoundary(bool invert, int inputPos)
227 {
228 ByteTerm term(TypeAssertionWordBoundary, invert);
229 term.inputPosition = inputPos;
230 return term;
231 }
232
233 static ByteTerm BackReference(unsigned subpatternId, int inputPos)
234 {
14957cd0 235 return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
ba379fdc
A
236 }
237
14957cd0 238 static ByteTerm BodyAlternativeBegin(bool onceThrough)
ba379fdc
A
239 {
240 ByteTerm term(TypeBodyAlternativeBegin);
241 term.alternative.next = 0;
242 term.alternative.end = 0;
14957cd0 243 term.alternative.onceThrough = onceThrough;
ba379fdc
A
244 return term;
245 }
246
14957cd0 247 static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
ba379fdc
A
248 {
249 ByteTerm term(TypeBodyAlternativeDisjunction);
250 term.alternative.next = 0;
251 term.alternative.end = 0;
14957cd0 252 term.alternative.onceThrough = onceThrough;
ba379fdc
A
253 return term;
254 }
255
256 static ByteTerm BodyAlternativeEnd()
257 {
258 ByteTerm term(TypeBodyAlternativeEnd);
259 term.alternative.next = 0;
260 term.alternative.end = 0;
14957cd0 261 term.alternative.onceThrough = false;
ba379fdc
A
262 return term;
263 }
264
265 static ByteTerm AlternativeBegin()
266 {
267 ByteTerm term(TypeAlternativeBegin);
268 term.alternative.next = 0;
269 term.alternative.end = 0;
14957cd0 270 term.alternative.onceThrough = false;
ba379fdc
A
271 return term;
272 }
273
274 static ByteTerm AlternativeDisjunction()
275 {
276 ByteTerm term(TypeAlternativeDisjunction);
277 term.alternative.next = 0;
278 term.alternative.end = 0;
14957cd0 279 term.alternative.onceThrough = false;
ba379fdc
A
280 return term;
281 }
282
283 static ByteTerm AlternativeEnd()
284 {
285 ByteTerm term(TypeAlternativeEnd);
286 term.alternative.next = 0;
287 term.alternative.end = 0;
14957cd0 288 term.alternative.onceThrough = false;
ba379fdc
A
289 return term;
290 }
291
292 static ByteTerm SubpatternBegin()
293 {
294 return ByteTerm(TypeSubpatternBegin);
295 }
296
297 static ByteTerm SubpatternEnd()
298 {
299 return ByteTerm(TypeSubpatternEnd);
300 }
14957cd0
A
301
302 static ByteTerm DotStarEnclosure(bool bolAnchor, bool eolAnchor)
303 {
304 ByteTerm term(TypeDotStarEnclosure);
305 term.anchors.m_bol = bolAnchor;
306 term.anchors.m_eol = eolAnchor;
307 return term;
308 }
ba379fdc
A
309
310 bool invert()
311 {
14957cd0 312 return m_invert;
ba379fdc
A
313 }
314
315 bool capture()
316 {
14957cd0 317 return m_capture;
ba379fdc
A
318 }
319};
320
14957cd0
A
321class ByteDisjunction {
322 WTF_MAKE_FAST_ALLOCATED;
ba379fdc
A
323public:
324 ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
325 : m_numSubpatterns(numSubpatterns)
326 , m_frameSize(frameSize)
327 {
328 }
329
330 Vector<ByteTerm> terms;
331 unsigned m_numSubpatterns;
332 unsigned m_frameSize;
333};
334
14957cd0
A
335struct BytecodePattern {
336 WTF_MAKE_FAST_ALLOCATED;
337public:
ed1e77d3
A
338 BytecodePattern(std::unique_ptr<ByteDisjunction> body, Vector<std::unique_ptr<ByteDisjunction>>& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator)
339 : m_body(WTF::move(body))
ba379fdc
A
340 , m_ignoreCase(pattern.m_ignoreCase)
341 , m_multiline(pattern.m_multiline)
14957cd0 342 , m_allocator(allocator)
ba379fdc 343 {
93a37866
A
344 m_body->terms.shrinkToFit();
345
ba379fdc
A
346 newlineCharacterClass = pattern.newlineCharacterClass();
347 wordcharCharacterClass = pattern.wordcharCharacterClass();
348
93a37866
A
349 m_allParenthesesInfo.swap(parenthesesInfoToAdopt);
350 m_allParenthesesInfo.shrinkToFit();
ba379fdc 351
93a37866
A
352 m_userCharacterClasses.swap(pattern.m_userCharacterClasses);
353 m_userCharacterClasses.shrinkToFit();
ba379fdc
A
354 }
355
ed1e77d3 356 std::unique_ptr<ByteDisjunction> m_body;
ba379fdc
A
357 bool m_ignoreCase;
358 bool m_multiline;
14957cd0 359 // Each BytecodePattern is associated with a RegExp, each RegExp is associated
93a37866 360 // with a VM. Cache a pointer to out VM's m_regExpAllocator.
14957cd0
A
361 BumpPointerAllocator* m_allocator;
362
ba379fdc
A
363 CharacterClass* newlineCharacterClass;
364 CharacterClass* wordcharCharacterClass;
14957cd0 365
ba379fdc 366private:
ed1e77d3
A
367 Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo;
368 Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses;
ba379fdc
A
369};
370
ed1e77d3 371JS_EXPORT_PRIVATE std::unique_ptr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
93a37866 372JS_EXPORT_PRIVATE unsigned interpret(BytecodePattern*, const String& input, unsigned start, unsigned* output);
6fe7ccc8
A
373unsigned interpret(BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output);
374unsigned interpret(BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output);
375
ba379fdc
A
376} } // namespace JSC::Yarr
377
14957cd0 378#endif // YarrInterpreter_h