]>
Commit | Line | Data |
---|---|---|
ba379fdc | 1 | /* |
14957cd0 | 2 | * Copyright (C) 2009, 2010 Apple Inc. All rights reserved. |
ba379fdc A |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without | |
5 | * modification, are permitted provided that the following conditions | |
6 | * are met: | |
7 | * 1. Redistributions of source code must retain the above copyright | |
8 | * notice, this list of conditions and the following disclaimer. | |
9 | * 2. Redistributions in binary form must reproduce the above copyright | |
10 | * notice, this list of conditions and the following disclaimer in the | |
11 | * documentation and/or other materials provided with the distribution. | |
12 | * | |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
24 | */ | |
25 | ||
14957cd0 A |
26 | #ifndef YarrInterpreter_h |
27 | #define YarrInterpreter_h | |
ba379fdc | 28 | |
14957cd0 | 29 | #include "YarrPattern.h" |
ba379fdc | 30 | |
14957cd0 A |
31 | namespace WTF { |
32 | class BumpPointerAllocator; | |
33 | } | |
34 | using WTF::BumpPointerAllocator; | |
35 | ||
ba379fdc A |
36 | namespace JSC { namespace Yarr { |
37 | ||
38 | class ByteDisjunction; | |
39 | ||
40 | struct ByteTerm { | |
41 | enum Type { | |
42 | TypeBodyAlternativeBegin, | |
43 | TypeBodyAlternativeDisjunction, | |
44 | TypeBodyAlternativeEnd, | |
45 | TypeAlternativeBegin, | |
46 | TypeAlternativeDisjunction, | |
47 | TypeAlternativeEnd, | |
48 | TypeSubpatternBegin, | |
49 | TypeSubpatternEnd, | |
50 | TypeAssertionBOL, | |
51 | TypeAssertionEOL, | |
52 | TypeAssertionWordBoundary, | |
53 | TypePatternCharacterOnce, | |
54 | TypePatternCharacterFixed, | |
55 | TypePatternCharacterGreedy, | |
56 | TypePatternCharacterNonGreedy, | |
57 | TypePatternCasedCharacterOnce, | |
58 | TypePatternCasedCharacterFixed, | |
59 | TypePatternCasedCharacterGreedy, | |
60 | TypePatternCasedCharacterNonGreedy, | |
61 | TypeCharacterClass, | |
62 | TypeBackReference, | |
63 | TypeParenthesesSubpattern, | |
64 | TypeParenthesesSubpatternOnceBegin, | |
65 | TypeParenthesesSubpatternOnceEnd, | |
14957cd0 A |
66 | TypeParenthesesSubpatternTerminalBegin, |
67 | TypeParenthesesSubpatternTerminalEnd, | |
ba379fdc A |
68 | TypeParentheticalAssertionBegin, |
69 | TypeParentheticalAssertionEnd, | |
70 | TypeCheckInput, | |
14957cd0 A |
71 | TypeUncheckInput, |
72 | TypeDotStarEnclosure, | |
ba379fdc | 73 | } type; |
ba379fdc A |
74 | union { |
75 | struct { | |
76 | union { | |
77 | UChar patternCharacter; | |
78 | struct { | |
79 | UChar lo; | |
80 | UChar hi; | |
81 | } casedCharacter; | |
82 | CharacterClass* characterClass; | |
83 | unsigned subpatternId; | |
84 | }; | |
85 | union { | |
86 | ByteDisjunction* parenthesesDisjunction; | |
87 | unsigned parenthesesWidth; | |
88 | }; | |
89 | QuantifierType quantityType; | |
90 | unsigned quantityCount; | |
91 | } atom; | |
92 | struct { | |
93 | int next; | |
94 | int end; | |
14957cd0 | 95 | bool onceThrough; |
ba379fdc | 96 | } alternative; |
14957cd0 A |
97 | struct { |
98 | bool m_bol : 1; | |
99 | bool m_eol : 1; | |
100 | } anchors; | |
ba379fdc A |
101 | unsigned checkInputCount; |
102 | }; | |
103 | unsigned frameLocation; | |
14957cd0 A |
104 | bool m_capture : 1; |
105 | bool m_invert : 1; | |
6fe7ccc8 | 106 | unsigned inputPosition; |
ba379fdc | 107 | |
1df5f87f | 108 | ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) |
ba379fdc | 109 | : frameLocation(frameLocation) |
14957cd0 A |
110 | , m_capture(false) |
111 | , m_invert(false) | |
ba379fdc A |
112 | { |
113 | switch (quantityType) { | |
114 | case QuantifierFixedCount: | |
115 | type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed; | |
116 | break; | |
117 | case QuantifierGreedy: | |
118 | type = ByteTerm::TypePatternCharacterGreedy; | |
119 | break; | |
120 | case QuantifierNonGreedy: | |
121 | type = ByteTerm::TypePatternCharacterNonGreedy; | |
122 | break; | |
123 | } | |
124 | ||
125 | atom.patternCharacter = ch; | |
126 | atom.quantityType = quantityType; | |
1df5f87f | 127 | atom.quantityCount = quantityCount.unsafeGet(); |
ba379fdc A |
128 | inputPosition = inputPos; |
129 | } | |
130 | ||
1df5f87f | 131 | ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) |
ba379fdc | 132 | : frameLocation(frameLocation) |
14957cd0 A |
133 | , m_capture(false) |
134 | , m_invert(false) | |
ba379fdc A |
135 | { |
136 | switch (quantityType) { | |
137 | case QuantifierFixedCount: | |
138 | type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed; | |
139 | break; | |
140 | case QuantifierGreedy: | |
141 | type = ByteTerm::TypePatternCasedCharacterGreedy; | |
142 | break; | |
143 | case QuantifierNonGreedy: | |
144 | type = ByteTerm::TypePatternCasedCharacterNonGreedy; | |
145 | break; | |
146 | } | |
147 | ||
148 | atom.casedCharacter.lo = lo; | |
149 | atom.casedCharacter.hi = hi; | |
150 | atom.quantityType = quantityType; | |
1df5f87f | 151 | atom.quantityCount = quantityCount.unsafeGet(); |
ba379fdc A |
152 | inputPosition = inputPos; |
153 | } | |
154 | ||
155 | ByteTerm(CharacterClass* characterClass, bool invert, int inputPos) | |
156 | : type(ByteTerm::TypeCharacterClass) | |
14957cd0 A |
157 | , m_capture(false) |
158 | , m_invert(invert) | |
ba379fdc A |
159 | { |
160 | atom.characterClass = characterClass; | |
161 | atom.quantityType = QuantifierFixedCount; | |
162 | atom.quantityCount = 1; | |
163 | inputPosition = inputPos; | |
164 | } | |
165 | ||
14957cd0 | 166 | ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos) |
ba379fdc | 167 | : type(type) |
14957cd0 A |
168 | , m_capture(capture) |
169 | , m_invert(false) | |
ba379fdc A |
170 | { |
171 | atom.subpatternId = subpatternId; | |
172 | atom.parenthesesDisjunction = parenthesesInfo; | |
173 | atom.quantityType = QuantifierFixedCount; | |
174 | atom.quantityCount = 1; | |
175 | inputPosition = inputPos; | |
176 | } | |
177 | ||
178 | ByteTerm(Type type, bool invert = false) | |
179 | : type(type) | |
14957cd0 A |
180 | , m_capture(false) |
181 | , m_invert(invert) | |
ba379fdc A |
182 | { |
183 | atom.quantityType = QuantifierFixedCount; | |
184 | atom.quantityCount = 1; | |
185 | } | |
186 | ||
14957cd0 | 187 | ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos) |
ba379fdc | 188 | : type(type) |
14957cd0 A |
189 | , m_capture(capture) |
190 | , m_invert(invert) | |
ba379fdc A |
191 | { |
192 | atom.subpatternId = subpatternId; | |
193 | atom.quantityType = QuantifierFixedCount; | |
194 | atom.quantityCount = 1; | |
195 | inputPosition = inputPos; | |
196 | } | |
197 | ||
198 | static ByteTerm BOL(int inputPos) | |
199 | { | |
200 | ByteTerm term(TypeAssertionBOL); | |
201 | term.inputPosition = inputPos; | |
202 | return term; | |
203 | } | |
204 | ||
1df5f87f | 205 | static ByteTerm CheckInput(Checked<unsigned> count) |
ba379fdc A |
206 | { |
207 | ByteTerm term(TypeCheckInput); | |
1df5f87f | 208 | term.checkInputCount = count.unsafeGet(); |
ba379fdc A |
209 | return term; |
210 | } | |
211 | ||
1df5f87f | 212 | static ByteTerm UncheckInput(Checked<unsigned> count) |
14957cd0 A |
213 | { |
214 | ByteTerm term(TypeUncheckInput); | |
1df5f87f | 215 | term.checkInputCount = count.unsafeGet(); |
14957cd0 A |
216 | return term; |
217 | } | |
218 | ||
ba379fdc A |
219 | static ByteTerm EOL(int inputPos) |
220 | { | |
221 | ByteTerm term(TypeAssertionEOL); | |
222 | term.inputPosition = inputPos; | |
223 | return term; | |
224 | } | |
225 | ||
226 | static ByteTerm WordBoundary(bool invert, int inputPos) | |
227 | { | |
228 | ByteTerm term(TypeAssertionWordBoundary, invert); | |
229 | term.inputPosition = inputPos; | |
230 | return term; | |
231 | } | |
232 | ||
233 | static ByteTerm BackReference(unsigned subpatternId, int inputPos) | |
234 | { | |
14957cd0 | 235 | return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos); |
ba379fdc A |
236 | } |
237 | ||
14957cd0 | 238 | static ByteTerm BodyAlternativeBegin(bool onceThrough) |
ba379fdc A |
239 | { |
240 | ByteTerm term(TypeBodyAlternativeBegin); | |
241 | term.alternative.next = 0; | |
242 | term.alternative.end = 0; | |
14957cd0 | 243 | term.alternative.onceThrough = onceThrough; |
ba379fdc A |
244 | return term; |
245 | } | |
246 | ||
14957cd0 | 247 | static ByteTerm BodyAlternativeDisjunction(bool onceThrough) |
ba379fdc A |
248 | { |
249 | ByteTerm term(TypeBodyAlternativeDisjunction); | |
250 | term.alternative.next = 0; | |
251 | term.alternative.end = 0; | |
14957cd0 | 252 | term.alternative.onceThrough = onceThrough; |
ba379fdc A |
253 | return term; |
254 | } | |
255 | ||
256 | static ByteTerm BodyAlternativeEnd() | |
257 | { | |
258 | ByteTerm term(TypeBodyAlternativeEnd); | |
259 | term.alternative.next = 0; | |
260 | term.alternative.end = 0; | |
14957cd0 | 261 | term.alternative.onceThrough = false; |
ba379fdc A |
262 | return term; |
263 | } | |
264 | ||
265 | static ByteTerm AlternativeBegin() | |
266 | { | |
267 | ByteTerm term(TypeAlternativeBegin); | |
268 | term.alternative.next = 0; | |
269 | term.alternative.end = 0; | |
14957cd0 | 270 | term.alternative.onceThrough = false; |
ba379fdc A |
271 | return term; |
272 | } | |
273 | ||
274 | static ByteTerm AlternativeDisjunction() | |
275 | { | |
276 | ByteTerm term(TypeAlternativeDisjunction); | |
277 | term.alternative.next = 0; | |
278 | term.alternative.end = 0; | |
14957cd0 | 279 | term.alternative.onceThrough = false; |
ba379fdc A |
280 | return term; |
281 | } | |
282 | ||
283 | static ByteTerm AlternativeEnd() | |
284 | { | |
285 | ByteTerm term(TypeAlternativeEnd); | |
286 | term.alternative.next = 0; | |
287 | term.alternative.end = 0; | |
14957cd0 | 288 | term.alternative.onceThrough = false; |
ba379fdc A |
289 | return term; |
290 | } | |
291 | ||
292 | static ByteTerm SubpatternBegin() | |
293 | { | |
294 | return ByteTerm(TypeSubpatternBegin); | |
295 | } | |
296 | ||
297 | static ByteTerm SubpatternEnd() | |
298 | { | |
299 | return ByteTerm(TypeSubpatternEnd); | |
300 | } | |
14957cd0 A |
301 | |
302 | static ByteTerm DotStarEnclosure(bool bolAnchor, bool eolAnchor) | |
303 | { | |
304 | ByteTerm term(TypeDotStarEnclosure); | |
305 | term.anchors.m_bol = bolAnchor; | |
306 | term.anchors.m_eol = eolAnchor; | |
307 | return term; | |
308 | } | |
ba379fdc A |
309 | |
310 | bool invert() | |
311 | { | |
14957cd0 | 312 | return m_invert; |
ba379fdc A |
313 | } |
314 | ||
315 | bool capture() | |
316 | { | |
14957cd0 | 317 | return m_capture; |
ba379fdc A |
318 | } |
319 | }; | |
320 | ||
14957cd0 A |
321 | class ByteDisjunction { |
322 | WTF_MAKE_FAST_ALLOCATED; | |
ba379fdc A |
323 | public: |
324 | ByteDisjunction(unsigned numSubpatterns, unsigned frameSize) | |
325 | : m_numSubpatterns(numSubpatterns) | |
326 | , m_frameSize(frameSize) | |
327 | { | |
328 | } | |
329 | ||
330 | Vector<ByteTerm> terms; | |
331 | unsigned m_numSubpatterns; | |
332 | unsigned m_frameSize; | |
333 | }; | |
334 | ||
14957cd0 A |
335 | struct BytecodePattern { |
336 | WTF_MAKE_FAST_ALLOCATED; | |
337 | public: | |
ed1e77d3 A |
338 | BytecodePattern(std::unique_ptr<ByteDisjunction> body, Vector<std::unique_ptr<ByteDisjunction>>& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator) |
339 | : m_body(WTF::move(body)) | |
ba379fdc A |
340 | , m_ignoreCase(pattern.m_ignoreCase) |
341 | , m_multiline(pattern.m_multiline) | |
14957cd0 | 342 | , m_allocator(allocator) |
ba379fdc | 343 | { |
93a37866 A |
344 | m_body->terms.shrinkToFit(); |
345 | ||
ba379fdc A |
346 | newlineCharacterClass = pattern.newlineCharacterClass(); |
347 | wordcharCharacterClass = pattern.wordcharCharacterClass(); | |
348 | ||
93a37866 A |
349 | m_allParenthesesInfo.swap(parenthesesInfoToAdopt); |
350 | m_allParenthesesInfo.shrinkToFit(); | |
ba379fdc | 351 | |
93a37866 A |
352 | m_userCharacterClasses.swap(pattern.m_userCharacterClasses); |
353 | m_userCharacterClasses.shrinkToFit(); | |
ba379fdc A |
354 | } |
355 | ||
ed1e77d3 | 356 | std::unique_ptr<ByteDisjunction> m_body; |
ba379fdc A |
357 | bool m_ignoreCase; |
358 | bool m_multiline; | |
14957cd0 | 359 | // Each BytecodePattern is associated with a RegExp, each RegExp is associated |
93a37866 | 360 | // with a VM. Cache a pointer to out VM's m_regExpAllocator. |
14957cd0 A |
361 | BumpPointerAllocator* m_allocator; |
362 | ||
ba379fdc A |
363 | CharacterClass* newlineCharacterClass; |
364 | CharacterClass* wordcharCharacterClass; | |
14957cd0 | 365 | |
ba379fdc | 366 | private: |
ed1e77d3 A |
367 | Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo; |
368 | Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses; | |
ba379fdc A |
369 | }; |
370 | ||
ed1e77d3 | 371 | JS_EXPORT_PRIVATE std::unique_ptr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*); |
93a37866 | 372 | JS_EXPORT_PRIVATE unsigned interpret(BytecodePattern*, const String& input, unsigned start, unsigned* output); |
6fe7ccc8 A |
373 | unsigned interpret(BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output); |
374 | unsigned interpret(BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output); | |
375 | ||
ba379fdc A |
376 | } } // namespace JSC::Yarr |
377 | ||
14957cd0 | 378 | #endif // YarrInterpreter_h |