]>
Commit | Line | Data |
---|---|---|
14957cd0 A |
1 | /* |
2 | * Copyright (C) 2009 Apple Inc. All rights reserved. | |
3 | * | |
4 | * Redistribution and use in source and binary forms, with or without | |
5 | * modification, are permitted provided that the following conditions | |
6 | * are met: | |
7 | * 1. Redistributions of source code must retain the above copyright | |
8 | * notice, this list of conditions and the following disclaimer. | |
9 | * 2. Redistributions in binary form must reproduce the above copyright | |
10 | * notice, this list of conditions and the following disclaimer in the | |
11 | * documentation and/or other materials provided with the distribution. | |
12 | * | |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
24 | */ | |
25 | ||
26 | #include "config.h" | |
27 | #include "YarrJIT.h" | |
28 | ||
6fe7ccc8 | 29 | #include <wtf/ASCIICType.h> |
14957cd0 A |
30 | #include "LinkBuffer.h" |
31 | #include "Yarr.h" | |
6fe7ccc8 | 32 | #include "YarrCanonicalizeUCS2.h" |
14957cd0 A |
33 | |
34 | #if ENABLE(YARR_JIT) | |
35 | ||
36 | using namespace WTF; | |
37 | ||
38 | namespace JSC { namespace Yarr { | |
39 | ||
6fe7ccc8 | 40 | template<YarrJITCompileMode compileMode> |
14957cd0 A |
41 | class YarrGenerator : private MacroAssembler { |
42 | friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); | |
43 | ||
44 | #if CPU(ARM) | |
45 | static const RegisterID input = ARMRegisters::r0; | |
46 | static const RegisterID index = ARMRegisters::r1; | |
47 | static const RegisterID length = ARMRegisters::r2; | |
48 | static const RegisterID output = ARMRegisters::r4; | |
49 | ||
50 | static const RegisterID regT0 = ARMRegisters::r5; | |
51 | static const RegisterID regT1 = ARMRegisters::r6; | |
52 | ||
53 | static const RegisterID returnRegister = ARMRegisters::r0; | |
6fe7ccc8 | 54 | static const RegisterID returnRegister2 = ARMRegisters::r1; |
14957cd0 A |
55 | #elif CPU(MIPS) |
56 | static const RegisterID input = MIPSRegisters::a0; | |
57 | static const RegisterID index = MIPSRegisters::a1; | |
58 | static const RegisterID length = MIPSRegisters::a2; | |
59 | static const RegisterID output = MIPSRegisters::a3; | |
60 | ||
61 | static const RegisterID regT0 = MIPSRegisters::t4; | |
62 | static const RegisterID regT1 = MIPSRegisters::t5; | |
63 | ||
64 | static const RegisterID returnRegister = MIPSRegisters::v0; | |
6fe7ccc8 | 65 | static const RegisterID returnRegister2 = MIPSRegisters::v1; |
14957cd0 A |
66 | #elif CPU(SH4) |
67 | static const RegisterID input = SH4Registers::r4; | |
68 | static const RegisterID index = SH4Registers::r5; | |
69 | static const RegisterID length = SH4Registers::r6; | |
70 | static const RegisterID output = SH4Registers::r7; | |
71 | ||
72 | static const RegisterID regT0 = SH4Registers::r0; | |
73 | static const RegisterID regT1 = SH4Registers::r1; | |
74 | ||
75 | static const RegisterID returnRegister = SH4Registers::r0; | |
6fe7ccc8 | 76 | static const RegisterID returnRegister2 = SH4Registers::r1; |
14957cd0 A |
77 | #elif CPU(X86) |
78 | static const RegisterID input = X86Registers::eax; | |
79 | static const RegisterID index = X86Registers::edx; | |
80 | static const RegisterID length = X86Registers::ecx; | |
81 | static const RegisterID output = X86Registers::edi; | |
82 | ||
83 | static const RegisterID regT0 = X86Registers::ebx; | |
84 | static const RegisterID regT1 = X86Registers::esi; | |
85 | ||
86 | static const RegisterID returnRegister = X86Registers::eax; | |
6fe7ccc8 | 87 | static const RegisterID returnRegister2 = X86Registers::edx; |
14957cd0 A |
88 | #elif CPU(X86_64) |
89 | static const RegisterID input = X86Registers::edi; | |
90 | static const RegisterID index = X86Registers::esi; | |
91 | static const RegisterID length = X86Registers::edx; | |
92 | static const RegisterID output = X86Registers::ecx; | |
93 | ||
94 | static const RegisterID regT0 = X86Registers::eax; | |
95 | static const RegisterID regT1 = X86Registers::ebx; | |
96 | ||
97 | static const RegisterID returnRegister = X86Registers::eax; | |
6fe7ccc8 | 98 | static const RegisterID returnRegister2 = X86Registers::edx; |
14957cd0 A |
99 | #endif |
100 | ||
101 | void optimizeAlternative(PatternAlternative* alternative) | |
102 | { | |
103 | if (!alternative->m_terms.size()) | |
104 | return; | |
105 | ||
106 | for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) { | |
107 | PatternTerm& term = alternative->m_terms[i]; | |
108 | PatternTerm& nextTerm = alternative->m_terms[i + 1]; | |
109 | ||
110 | if ((term.type == PatternTerm::TypeCharacterClass) | |
111 | && (term.quantityType == QuantifierFixedCount) | |
112 | && (nextTerm.type == PatternTerm::TypePatternCharacter) | |
113 | && (nextTerm.quantityType == QuantifierFixedCount)) { | |
114 | PatternTerm termCopy = term; | |
115 | alternative->m_terms[i] = nextTerm; | |
116 | alternative->m_terms[i + 1] = termCopy; | |
117 | } | |
118 | } | |
119 | } | |
120 | ||
121 | void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount) | |
122 | { | |
123 | do { | |
124 | // pick which range we're going to generate | |
125 | int which = count >> 1; | |
126 | char lo = ranges[which].begin; | |
127 | char hi = ranges[which].end; | |
128 | ||
129 | // check if there are any ranges or matches below lo. If not, just jl to failure - | |
130 | // if there is anything else to check, check that first, if it falls through jmp to failure. | |
131 | if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { | |
132 | Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); | |
133 | ||
134 | // generate code for all ranges before this one | |
135 | if (which) | |
136 | matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); | |
137 | ||
138 | while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { | |
139 | matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex]))); | |
140 | ++*matchIndex; | |
141 | } | |
142 | failures.append(jump()); | |
143 | ||
144 | loOrAbove.link(this); | |
145 | } else if (which) { | |
146 | Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); | |
147 | ||
148 | matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); | |
149 | failures.append(jump()); | |
150 | ||
151 | loOrAbove.link(this); | |
152 | } else | |
153 | failures.append(branch32(LessThan, character, Imm32((unsigned short)lo))); | |
154 | ||
155 | while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi)) | |
156 | ++*matchIndex; | |
157 | ||
158 | matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi))); | |
159 | // fall through to here, the value is above hi. | |
160 | ||
161 | // shuffle along & loop around if there are any more matches to handle. | |
162 | unsigned next = which + 1; | |
163 | ranges += next; | |
164 | count -= next; | |
165 | } while (count); | |
166 | } | |
167 | ||
168 | void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass) | |
169 | { | |
170 | if (charClass->m_table) { | |
171 | ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table)); | |
172 | matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry)); | |
173 | return; | |
174 | } | |
175 | Jump unicodeFail; | |
176 | if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) { | |
177 | Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f)); | |
178 | ||
179 | if (charClass->m_matchesUnicode.size()) { | |
180 | for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) { | |
181 | UChar ch = charClass->m_matchesUnicode[i]; | |
182 | matchDest.append(branch32(Equal, character, Imm32(ch))); | |
183 | } | |
184 | } | |
185 | ||
186 | if (charClass->m_rangesUnicode.size()) { | |
187 | for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) { | |
188 | UChar lo = charClass->m_rangesUnicode[i].begin; | |
189 | UChar hi = charClass->m_rangesUnicode[i].end; | |
190 | ||
191 | Jump below = branch32(LessThan, character, Imm32(lo)); | |
192 | matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); | |
193 | below.link(this); | |
194 | } | |
195 | } | |
196 | ||
197 | unicodeFail = jump(); | |
198 | isAscii.link(this); | |
199 | } | |
200 | ||
201 | if (charClass->m_ranges.size()) { | |
202 | unsigned matchIndex = 0; | |
203 | JumpList failures; | |
204 | matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size()); | |
205 | while (matchIndex < charClass->m_matches.size()) | |
206 | matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++]))); | |
207 | ||
208 | failures.link(this); | |
209 | } else if (charClass->m_matches.size()) { | |
210 | // optimization: gather 'a','A' etc back together, can mask & test once. | |
211 | Vector<char> matchesAZaz; | |
212 | ||
213 | for (unsigned i = 0; i < charClass->m_matches.size(); ++i) { | |
214 | char ch = charClass->m_matches[i]; | |
215 | if (m_pattern.m_ignoreCase) { | |
216 | if (isASCIILower(ch)) { | |
217 | matchesAZaz.append(ch); | |
218 | continue; | |
219 | } | |
220 | if (isASCIIUpper(ch)) | |
221 | continue; | |
222 | } | |
223 | matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch))); | |
224 | } | |
225 | ||
226 | if (unsigned countAZaz = matchesAZaz.size()) { | |
227 | or32(TrustedImm32(32), character); | |
228 | for (unsigned i = 0; i < countAZaz; ++i) | |
229 | matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i]))); | |
230 | } | |
231 | } | |
232 | ||
233 | if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) | |
234 | unicodeFail.link(this); | |
235 | } | |
236 | ||
237 | // Jumps if input not available; will have (incorrectly) incremented already! | |
238 | Jump jumpIfNoAvailableInput(unsigned countToCheck = 0) | |
239 | { | |
240 | if (countToCheck) | |
241 | add32(Imm32(countToCheck), index); | |
242 | return branch32(Above, index, length); | |
243 | } | |
244 | ||
245 | Jump jumpIfAvailableInput(unsigned countToCheck) | |
246 | { | |
247 | add32(Imm32(countToCheck), index); | |
248 | return branch32(BelowOrEqual, index, length); | |
249 | } | |
250 | ||
251 | Jump checkInput() | |
252 | { | |
253 | return branch32(BelowOrEqual, index, length); | |
254 | } | |
255 | ||
256 | Jump atEndOfInput() | |
257 | { | |
258 | return branch32(Equal, index, length); | |
259 | } | |
260 | ||
261 | Jump notAtEndOfInput() | |
262 | { | |
263 | return branch32(NotEqual, index, length); | |
264 | } | |
265 | ||
6fe7ccc8 | 266 | Jump jumpIfCharNotEquals(UChar ch, int inputPosition, RegisterID character) |
14957cd0 | 267 | { |
6fe7ccc8 | 268 | readCharacter(inputPosition, character); |
14957cd0 | 269 | |
6fe7ccc8 A |
270 | // For case-insesitive compares, non-ascii characters that have different |
271 | // upper & lower case representations are converted to a character class. | |
272 | ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); | |
273 | if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { | |
274 | or32(TrustedImm32(0x20), character); | |
275 | ch |= 0x20; | |
276 | } | |
277 | ||
278 | return branch32(NotEqual, character, Imm32(ch)); | |
14957cd0 A |
279 | } |
280 | ||
281 | void readCharacter(int inputPosition, RegisterID reg) | |
282 | { | |
6fe7ccc8 A |
283 | if (m_charSize == Char8) |
284 | load8(BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), reg); | |
285 | else | |
286 | load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg); | |
14957cd0 A |
287 | } |
288 | ||
289 | void storeToFrame(RegisterID reg, unsigned frameLocation) | |
290 | { | |
291 | poke(reg, frameLocation); | |
292 | } | |
293 | ||
294 | void storeToFrame(TrustedImm32 imm, unsigned frameLocation) | |
295 | { | |
296 | poke(imm, frameLocation); | |
297 | } | |
298 | ||
299 | DataLabelPtr storeToFrameWithPatch(unsigned frameLocation) | |
300 | { | |
301 | return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*))); | |
302 | } | |
303 | ||
304 | void loadFromFrame(unsigned frameLocation, RegisterID reg) | |
305 | { | |
306 | peek(reg, frameLocation); | |
307 | } | |
308 | ||
309 | void loadFromFrameAndJump(unsigned frameLocation) | |
310 | { | |
311 | jump(Address(stackPointerRegister, frameLocation * sizeof(void*))); | |
312 | } | |
313 | ||
6fe7ccc8 A |
314 | void initCallFrame() |
315 | { | |
316 | unsigned callFrameSize = m_pattern.m_body->m_callFrameSize; | |
317 | if (callFrameSize) | |
318 | subPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister); | |
319 | } | |
320 | void removeCallFrame() | |
321 | { | |
322 | unsigned callFrameSize = m_pattern.m_body->m_callFrameSize; | |
323 | if (callFrameSize) | |
324 | addPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister); | |
325 | } | |
326 | ||
327 | // Used to record subpatters, should only be called if compileMode is IncludeSubpatterns. | |
328 | void setSubpatternStart(RegisterID reg, unsigned subpattern) | |
329 | { | |
330 | ASSERT(subpattern); | |
331 | // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-( | |
332 | store32(reg, Address(output, (subpattern << 1) * sizeof(int))); | |
333 | } | |
334 | void setSubpatternEnd(RegisterID reg, unsigned subpattern) | |
335 | { | |
336 | ASSERT(subpattern); | |
337 | // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-( | |
338 | store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int))); | |
339 | } | |
340 | void clearSubpatternStart(unsigned subpattern) | |
341 | { | |
342 | ASSERT(subpattern); | |
343 | // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-( | |
344 | store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int))); | |
345 | } | |
346 | ||
347 | // We use one of three different strategies to track the start of the current match, | |
348 | // while matching. | |
349 | // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily | |
350 | // at the end of matching. This is irrespective of compileMode, and in this case | |
351 | // these methods should never be called. | |
352 | // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output | |
353 | // vector, store the match start in the output vector. | |
354 | // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly | |
355 | // in this register. | |
356 | void setMatchStart(RegisterID reg) | |
357 | { | |
358 | ASSERT(!m_pattern.m_body->m_hasFixedSize); | |
359 | if (compileMode == IncludeSubpatterns) | |
360 | store32(reg, output); | |
361 | else | |
362 | move(reg, output); | |
363 | } | |
364 | void getMatchStart(RegisterID reg) | |
365 | { | |
366 | ASSERT(!m_pattern.m_body->m_hasFixedSize); | |
367 | if (compileMode == IncludeSubpatterns) | |
368 | load32(output, reg); | |
369 | else | |
370 | move(output, reg); | |
371 | } | |
372 | ||
14957cd0 A |
373 | enum YarrOpCode { |
374 | // These nodes wrap body alternatives - those in the main disjunction, | |
375 | // rather than subpatterns or assertions. These are chained together in | |
376 | // a doubly linked list, with a 'begin' node for the first alternative, | |
377 | // a 'next' node for each subsequent alternative, and an 'end' node at | |
378 | // the end. In the case of repeating alternatives, the 'end' node also | |
379 | // has a reference back to 'begin'. | |
380 | OpBodyAlternativeBegin, | |
381 | OpBodyAlternativeNext, | |
382 | OpBodyAlternativeEnd, | |
383 | // Similar to the body alternatives, but used for subpatterns with two | |
384 | // or more alternatives. | |
385 | OpNestedAlternativeBegin, | |
386 | OpNestedAlternativeNext, | |
387 | OpNestedAlternativeEnd, | |
388 | // Used for alternatives in subpatterns where there is only a single | |
389 | // alternative (backtrackingis easier in these cases), or for alternatives | |
390 | // which never need to be backtracked (those in parenthetical assertions, | |
391 | // terminal subpatterns). | |
392 | OpSimpleNestedAlternativeBegin, | |
393 | OpSimpleNestedAlternativeNext, | |
394 | OpSimpleNestedAlternativeEnd, | |
395 | // Used to wrap 'Once' subpattern matches (quantityCount == 1). | |
396 | OpParenthesesSubpatternOnceBegin, | |
397 | OpParenthesesSubpatternOnceEnd, | |
398 | // Used to wrap 'Terminal' subpattern matches (at the end of the regexp). | |
399 | OpParenthesesSubpatternTerminalBegin, | |
400 | OpParenthesesSubpatternTerminalEnd, | |
401 | // Used to wrap parenthetical assertions. | |
402 | OpParentheticalAssertionBegin, | |
403 | OpParentheticalAssertionEnd, | |
404 | // Wraps all simple terms (pattern characters, character classes). | |
405 | OpTerm, | |
406 | // Where an expression contains only 'once through' body alternatives | |
407 | // and no repeating ones, this op is used to return match failure. | |
408 | OpMatchFailed | |
409 | }; | |
410 | ||
411 | // This structure is used to hold the compiled opcode information, | |
412 | // including reference back to the original PatternTerm/PatternAlternatives, | |
413 | // and JIT compilation data structures. | |
414 | struct YarrOp { | |
415 | explicit YarrOp(PatternTerm* term) | |
416 | : m_op(OpTerm) | |
417 | , m_term(term) | |
418 | , m_isDeadCode(false) | |
419 | { | |
420 | } | |
421 | ||
422 | explicit YarrOp(YarrOpCode op) | |
423 | : m_op(op) | |
424 | , m_isDeadCode(false) | |
425 | { | |
426 | } | |
427 | ||
428 | // The operation, as a YarrOpCode, and also a reference to the PatternTerm. | |
429 | YarrOpCode m_op; | |
430 | PatternTerm* m_term; | |
431 | ||
432 | // For alternatives, this holds the PatternAlternative and doubly linked | |
433 | // references to this alternative's siblings. In the case of the | |
434 | // OpBodyAlternativeEnd node at the end of a section of repeating nodes, | |
435 | // m_nextOp will reference the OpBodyAlternativeBegin node of the first | |
436 | // repeating alternative. | |
437 | PatternAlternative* m_alternative; | |
438 | size_t m_previousOp; | |
439 | size_t m_nextOp; | |
440 | ||
441 | // Used to record a set of Jumps out of the generated code, typically | |
442 | // used for jumps out to backtracking code, and a single reentry back | |
443 | // into the code for a node (likely where a backtrack will trigger | |
444 | // rematching). | |
445 | Label m_reentry; | |
446 | JumpList m_jumps; | |
447 | ||
6fe7ccc8 A |
448 | // Used for backtracking when the prior alternative did not consume any |
449 | // characters but matched. | |
450 | Jump m_zeroLengthMatch; | |
451 | ||
14957cd0 A |
452 | // This flag is used to null out the second pattern character, when |
453 | // two are fused to match a pair together. | |
454 | bool m_isDeadCode; | |
455 | ||
456 | // Currently used in the case of some of the more complex management of | |
457 | // 'm_checked', to cache the offset used in this alternative, to avoid | |
458 | // recalculating it. | |
459 | int m_checkAdjust; | |
460 | ||
461 | // Used by OpNestedAlternativeNext/End to hold the pointer to the | |
462 | // value that will be pushed into the pattern's frame to return to, | |
463 | // upon backtracking back into the disjunction. | |
464 | DataLabelPtr m_returnAddress; | |
465 | }; | |
466 | ||
467 | // BacktrackingState | |
468 | // This class encapsulates information about the state of code generation | |
469 | // whilst generating the code for backtracking, when a term fails to match. | |
470 | // Upon entry to code generation of the backtracking code for a given node, | |
471 | // the Backtracking state will hold references to all control flow sources | |
472 | // that are outputs in need of further backtracking from the prior node | |
473 | // generated (which is the subsequent operation in the regular expression, | |
474 | // and in the m_ops Vector, since we generated backtracking backwards). | |
475 | // These references to control flow take the form of: | |
476 | // - A jump list of jumps, to be linked to code that will backtrack them | |
477 | // further. | |
478 | // - A set of DataLabelPtr values, to be populated with values to be | |
479 | // treated effectively as return addresses backtracking into complex | |
480 | // subpatterns. | |
481 | // - A flag indicating that the current sequence of generated code up to | |
482 | // this point requires backtracking. | |
483 | class BacktrackingState { | |
484 | public: | |
485 | BacktrackingState() | |
486 | : m_pendingFallthrough(false) | |
487 | { | |
488 | } | |
489 | ||
490 | // Add a jump or jumps, a return address, or set the flag indicating | |
491 | // that the current 'fallthrough' control flow requires backtracking. | |
492 | void append(const Jump& jump) | |
493 | { | |
494 | m_laterFailures.append(jump); | |
495 | } | |
496 | void append(JumpList& jumpList) | |
497 | { | |
498 | m_laterFailures.append(jumpList); | |
499 | } | |
500 | void append(const DataLabelPtr& returnAddress) | |
501 | { | |
502 | m_pendingReturns.append(returnAddress); | |
503 | } | |
504 | void fallthrough() | |
505 | { | |
506 | ASSERT(!m_pendingFallthrough); | |
507 | m_pendingFallthrough = true; | |
508 | } | |
509 | ||
510 | // These methods clear the backtracking state, either linking to the | |
511 | // current location, a provided label, or copying the backtracking out | |
512 | // to a JumpList. All actions may require code generation to take place, | |
513 | // and as such are passed a pointer to the assembler. | |
514 | void link(MacroAssembler* assembler) | |
515 | { | |
516 | if (m_pendingReturns.size()) { | |
517 | Label here(assembler); | |
518 | for (unsigned i = 0; i < m_pendingReturns.size(); ++i) | |
519 | m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here)); | |
520 | m_pendingReturns.clear(); | |
521 | } | |
522 | m_laterFailures.link(assembler); | |
523 | m_laterFailures.clear(); | |
524 | m_pendingFallthrough = false; | |
525 | } | |
526 | void linkTo(Label label, MacroAssembler* assembler) | |
527 | { | |
528 | if (m_pendingReturns.size()) { | |
529 | for (unsigned i = 0; i < m_pendingReturns.size(); ++i) | |
530 | m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label)); | |
531 | m_pendingReturns.clear(); | |
532 | } | |
533 | if (m_pendingFallthrough) | |
534 | assembler->jump(label); | |
535 | m_laterFailures.linkTo(label, assembler); | |
536 | m_laterFailures.clear(); | |
537 | m_pendingFallthrough = false; | |
538 | } | |
539 | void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler) | |
540 | { | |
541 | if (m_pendingReturns.size()) { | |
542 | Label here(assembler); | |
543 | for (unsigned i = 0; i < m_pendingReturns.size(); ++i) | |
544 | m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here)); | |
545 | m_pendingReturns.clear(); | |
546 | m_pendingFallthrough = true; | |
547 | } | |
548 | if (m_pendingFallthrough) | |
549 | jumpList.append(assembler->jump()); | |
550 | jumpList.append(m_laterFailures); | |
551 | m_laterFailures.clear(); | |
552 | m_pendingFallthrough = false; | |
553 | } | |
554 | ||
555 | bool isEmpty() | |
556 | { | |
557 | return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough; | |
558 | } | |
559 | ||
560 | // Called at the end of code generation to link all return addresses. | |
561 | void linkDataLabels(LinkBuffer& linkBuffer) | |
562 | { | |
563 | ASSERT(isEmpty()); | |
564 | for (unsigned i = 0; i < m_backtrackRecords.size(); ++i) | |
565 | linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation)); | |
566 | } | |
567 | ||
568 | private: | |
569 | struct ReturnAddressRecord { | |
570 | ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation) | |
571 | : m_dataLabel(dataLabel) | |
572 | , m_backtrackLocation(backtrackLocation) | |
573 | { | |
574 | } | |
575 | ||
576 | DataLabelPtr m_dataLabel; | |
577 | Label m_backtrackLocation; | |
578 | }; | |
579 | ||
580 | JumpList m_laterFailures; | |
581 | bool m_pendingFallthrough; | |
582 | Vector<DataLabelPtr, 4> m_pendingReturns; | |
583 | Vector<ReturnAddressRecord, 4> m_backtrackRecords; | |
584 | }; | |
585 | ||
586 | // Generation methods: | |
587 | // =================== | |
588 | ||
589 | // This method provides a default implementation of backtracking common | |
590 | // to many terms; terms commonly jump out of the forwards matching path | |
591 | // on any failed conditions, and add these jumps to the m_jumps list. If | |
592 | // no special handling is required we can often just backtrack to m_jumps. | |
593 | void backtrackTermDefault(size_t opIndex) | |
594 | { | |
595 | YarrOp& op = m_ops[opIndex]; | |
596 | m_backtrackingState.append(op.m_jumps); | |
597 | } | |
598 | ||
599 | void generateAssertionBOL(size_t opIndex) | |
600 | { | |
601 | YarrOp& op = m_ops[opIndex]; | |
602 | PatternTerm* term = op.m_term; | |
603 | ||
604 | if (m_pattern.m_multiline) { | |
605 | const RegisterID character = regT0; | |
606 | ||
607 | JumpList matchDest; | |
608 | if (!term->inputPosition) | |
609 | matchDest.append(branch32(Equal, index, Imm32(m_checked))); | |
610 | ||
611 | readCharacter((term->inputPosition - m_checked) - 1, character); | |
612 | matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); | |
613 | op.m_jumps.append(jump()); | |
614 | ||
615 | matchDest.link(this); | |
616 | } else { | |
617 | // Erk, really should poison out these alternatives early. :-/ | |
618 | if (term->inputPosition) | |
619 | op.m_jumps.append(jump()); | |
620 | else | |
621 | op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked))); | |
622 | } | |
623 | } | |
624 | void backtrackAssertionBOL(size_t opIndex) | |
625 | { | |
626 | backtrackTermDefault(opIndex); | |
627 | } | |
628 | ||
629 | void generateAssertionEOL(size_t opIndex) | |
630 | { | |
631 | YarrOp& op = m_ops[opIndex]; | |
632 | PatternTerm* term = op.m_term; | |
633 | ||
634 | if (m_pattern.m_multiline) { | |
635 | const RegisterID character = regT0; | |
636 | ||
637 | JumpList matchDest; | |
638 | if (term->inputPosition == m_checked) | |
639 | matchDest.append(atEndOfInput()); | |
640 | ||
1df5f87f | 641 | readCharacter(term->inputPosition - m_checked, character); |
14957cd0 A |
642 | matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); |
643 | op.m_jumps.append(jump()); | |
644 | ||
645 | matchDest.link(this); | |
646 | } else { | |
647 | if (term->inputPosition == m_checked) | |
648 | op.m_jumps.append(notAtEndOfInput()); | |
649 | // Erk, really should poison out these alternatives early. :-/ | |
650 | else | |
651 | op.m_jumps.append(jump()); | |
652 | } | |
653 | } | |
654 | void backtrackAssertionEOL(size_t opIndex) | |
655 | { | |
656 | backtrackTermDefault(opIndex); | |
657 | } | |
658 | ||
659 | // Also falls though on nextIsNotWordChar. | |
660 | void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar) | |
661 | { | |
662 | YarrOp& op = m_ops[opIndex]; | |
663 | PatternTerm* term = op.m_term; | |
664 | ||
665 | const RegisterID character = regT0; | |
666 | ||
667 | if (term->inputPosition == m_checked) | |
668 | nextIsNotWordChar.append(atEndOfInput()); | |
669 | ||
670 | readCharacter((term->inputPosition - m_checked), character); | |
671 | matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass()); | |
672 | } | |
673 | ||
674 | void generateAssertionWordBoundary(size_t opIndex) | |
675 | { | |
676 | YarrOp& op = m_ops[opIndex]; | |
677 | PatternTerm* term = op.m_term; | |
678 | ||
679 | const RegisterID character = regT0; | |
680 | ||
681 | Jump atBegin; | |
682 | JumpList matchDest; | |
683 | if (!term->inputPosition) | |
684 | atBegin = branch32(Equal, index, Imm32(m_checked)); | |
685 | readCharacter((term->inputPosition - m_checked) - 1, character); | |
686 | matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass()); | |
687 | if (!term->inputPosition) | |
688 | atBegin.link(this); | |
689 | ||
690 | // We fall through to here if the last character was not a wordchar. | |
691 | JumpList nonWordCharThenWordChar; | |
692 | JumpList nonWordCharThenNonWordChar; | |
693 | if (term->invert()) { | |
694 | matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar); | |
695 | nonWordCharThenWordChar.append(jump()); | |
696 | } else { | |
697 | matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar); | |
698 | nonWordCharThenNonWordChar.append(jump()); | |
699 | } | |
700 | op.m_jumps.append(nonWordCharThenNonWordChar); | |
701 | ||
702 | // We jump here if the last character was a wordchar. | |
703 | matchDest.link(this); | |
704 | JumpList wordCharThenWordChar; | |
705 | JumpList wordCharThenNonWordChar; | |
706 | if (term->invert()) { | |
707 | matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar); | |
708 | wordCharThenWordChar.append(jump()); | |
709 | } else { | |
710 | matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar); | |
711 | // This can fall-though! | |
712 | } | |
713 | ||
714 | op.m_jumps.append(wordCharThenWordChar); | |
715 | ||
716 | nonWordCharThenWordChar.link(this); | |
717 | wordCharThenNonWordChar.link(this); | |
718 | } | |
719 | void backtrackAssertionWordBoundary(size_t opIndex) | |
720 | { | |
721 | backtrackTermDefault(opIndex); | |
722 | } | |
723 | ||
724 | void generatePatternCharacterOnce(size_t opIndex) | |
725 | { | |
726 | YarrOp& op = m_ops[opIndex]; | |
727 | ||
6fe7ccc8 A |
728 | if (op.m_isDeadCode) |
729 | return; | |
730 | ||
14957cd0 A |
731 | // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed |
732 | // node, so there must always be at least one more node. | |
733 | ASSERT(opIndex + 1 < m_ops.size()); | |
6fe7ccc8 | 734 | YarrOp* nextOp = &m_ops[opIndex + 1]; |
14957cd0 A |
735 | |
736 | PatternTerm* term = op.m_term; | |
737 | UChar ch = term->patternCharacter; | |
738 | ||
6fe7ccc8 A |
739 | if ((ch > 0xff) && (m_charSize == Char8)) { |
740 | // Have a 16 bit pattern character and an 8 bit string - short circuit | |
741 | op.m_jumps.append(jump()); | |
742 | return; | |
743 | } | |
744 | ||
14957cd0 | 745 | const RegisterID character = regT0; |
6fe7ccc8 A |
746 | int maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2; |
747 | unsigned ignoreCaseMask = 0; | |
748 | int allCharacters = ch; | |
749 | int numberCharacters; | |
750 | int startTermPosition = term->inputPosition; | |
14957cd0 | 751 | |
6fe7ccc8 A |
752 | // For case-insesitive compares, non-ascii characters that have different |
753 | // upper & lower case representations are converted to a character class. | |
754 | ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); | |
14957cd0 | 755 | |
6fe7ccc8 A |
756 | if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) |
757 | ignoreCaseMask |= 32; | |
14957cd0 | 758 | |
6fe7ccc8 A |
759 | for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) { |
760 | PatternTerm* nextTerm = nextOp->m_term; | |
761 | ||
762 | if (nextTerm->type != PatternTerm::TypePatternCharacter | |
763 | || nextTerm->quantityType != QuantifierFixedCount | |
764 | || nextTerm->quantityCount != 1 | |
765 | || nextTerm->inputPosition != (startTermPosition + numberCharacters)) | |
766 | break; | |
14957cd0 | 767 | |
6fe7ccc8 | 768 | nextOp->m_isDeadCode = true; |
14957cd0 | 769 | |
6fe7ccc8 | 770 | int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters; |
14957cd0 | 771 | |
6fe7ccc8 A |
772 | UChar currentCharacter = nextTerm->patternCharacter; |
773 | ||
774 | if ((currentCharacter > 0xff) && (m_charSize == Char8)) { | |
775 | // Have a 16 bit pattern character and an 8 bit string - short circuit | |
776 | op.m_jumps.append(jump()); | |
14957cd0 A |
777 | return; |
778 | } | |
6fe7ccc8 A |
779 | |
780 | // For case-insesitive compares, non-ascii characters that have different | |
781 | // upper & lower case representations are converted to a character class. | |
782 | ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter)); | |
783 | ||
784 | allCharacters |= (currentCharacter << shiftAmount); | |
785 | ||
786 | if ((m_pattern.m_ignoreCase) && (isASCIIAlpha(currentCharacter))) | |
787 | ignoreCaseMask |= 32 << shiftAmount; | |
14957cd0 A |
788 | } |
789 | ||
6fe7ccc8 A |
790 | if (m_charSize == Char8) { |
791 | switch (numberCharacters) { | |
792 | case 1: | |
793 | op.m_jumps.append(jumpIfCharNotEquals(ch, startTermPosition - m_checked, character)); | |
794 | return; | |
795 | case 2: { | |
796 | BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); | |
797 | load16Unaligned(address, character); | |
798 | break; | |
799 | } | |
800 | case 3: { | |
801 | BaseIndex highAddress(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); | |
802 | load16Unaligned(highAddress, character); | |
803 | if (ignoreCaseMask) | |
804 | or32(Imm32(ignoreCaseMask), character); | |
805 | op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask))); | |
806 | op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, startTermPosition + 2 - m_checked, character)); | |
807 | return; | |
808 | } | |
809 | case 4: { | |
810 | BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); | |
811 | load32WithUnalignedHalfWords(address, character); | |
812 | break; | |
813 | } | |
814 | } | |
14957cd0 | 815 | } else { |
6fe7ccc8 A |
816 | switch (numberCharacters) { |
817 | case 1: | |
818 | op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); | |
819 | return; | |
820 | case 2: | |
821 | BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar)); | |
822 | load32WithUnalignedHalfWords(address, character); | |
823 | break; | |
824 | } | |
14957cd0 | 825 | } |
6fe7ccc8 A |
826 | |
827 | if (ignoreCaseMask) | |
828 | or32(Imm32(ignoreCaseMask), character); | |
829 | op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask))); | |
830 | return; | |
14957cd0 A |
831 | } |
832 | void backtrackPatternCharacterOnce(size_t opIndex) | |
833 | { | |
834 | backtrackTermDefault(opIndex); | |
835 | } | |
836 | ||
837 | void generatePatternCharacterFixed(size_t opIndex) | |
838 | { | |
839 | YarrOp& op = m_ops[opIndex]; | |
840 | PatternTerm* term = op.m_term; | |
841 | UChar ch = term->patternCharacter; | |
842 | ||
843 | const RegisterID character = regT0; | |
844 | const RegisterID countRegister = regT1; | |
845 | ||
846 | move(index, countRegister); | |
1df5f87f | 847 | sub32(Imm32(term->quantityCount.unsafeGet()), countRegister); |
14957cd0 A |
848 | |
849 | Label loop(this); | |
6fe7ccc8 | 850 | BaseIndex address(input, countRegister, m_charScale, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(m_charSize == Char8 ? sizeof(char) : sizeof(UChar))).unsafeGet()); |
14957cd0 | 851 | |
6fe7ccc8 A |
852 | if (m_charSize == Char8) |
853 | load8(address, character); | |
854 | else | |
14957cd0 | 855 | load16(address, character); |
6fe7ccc8 A |
856 | |
857 | // For case-insesitive compares, non-ascii characters that have different | |
858 | // upper & lower case representations are converted to a character class. | |
859 | ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); | |
860 | if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { | |
861 | or32(TrustedImm32(0x20), character); | |
862 | ch |= 0x20; | |
14957cd0 | 863 | } |
6fe7ccc8 A |
864 | |
865 | op.m_jumps.append(branch32(NotEqual, character, Imm32(ch))); | |
14957cd0 A |
866 | add32(TrustedImm32(1), countRegister); |
867 | branch32(NotEqual, countRegister, index).linkTo(loop, this); | |
868 | } | |
869 | void backtrackPatternCharacterFixed(size_t opIndex) | |
870 | { | |
871 | backtrackTermDefault(opIndex); | |
872 | } | |
873 | ||
874 | void generatePatternCharacterGreedy(size_t opIndex) | |
875 | { | |
876 | YarrOp& op = m_ops[opIndex]; | |
877 | PatternTerm* term = op.m_term; | |
878 | UChar ch = term->patternCharacter; | |
879 | ||
880 | const RegisterID character = regT0; | |
881 | const RegisterID countRegister = regT1; | |
882 | ||
883 | move(TrustedImm32(0), countRegister); | |
884 | ||
6fe7ccc8 A |
885 | // Unless have a 16 bit pattern character and an 8 bit string - short circuit |
886 | if (!((ch > 0xff) && (m_charSize == Char8))) { | |
887 | JumpList failures; | |
888 | Label loop(this); | |
889 | failures.append(atEndOfInput()); | |
890 | failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); | |
891 | ||
892 | add32(TrustedImm32(1), countRegister); | |
893 | add32(TrustedImm32(1), index); | |
894 | if (term->quantityCount == quantifyInfinite) | |
895 | jump(loop); | |
896 | else | |
897 | branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this); | |
14957cd0 | 898 | |
6fe7ccc8 A |
899 | failures.link(this); |
900 | } | |
14957cd0 A |
901 | op.m_reentry = label(); |
902 | ||
903 | storeToFrame(countRegister, term->frameLocation); | |
14957cd0 A |
904 | } |
905 | void backtrackPatternCharacterGreedy(size_t opIndex) | |
906 | { | |
907 | YarrOp& op = m_ops[opIndex]; | |
908 | PatternTerm* term = op.m_term; | |
909 | ||
910 | const RegisterID countRegister = regT1; | |
911 | ||
912 | m_backtrackingState.link(this); | |
913 | ||
914 | loadFromFrame(term->frameLocation, countRegister); | |
915 | m_backtrackingState.append(branchTest32(Zero, countRegister)); | |
916 | sub32(TrustedImm32(1), countRegister); | |
917 | sub32(TrustedImm32(1), index); | |
918 | jump(op.m_reentry); | |
919 | } | |
920 | ||
921 | void generatePatternCharacterNonGreedy(size_t opIndex) | |
922 | { | |
923 | YarrOp& op = m_ops[opIndex]; | |
924 | PatternTerm* term = op.m_term; | |
925 | ||
926 | const RegisterID countRegister = regT1; | |
927 | ||
928 | move(TrustedImm32(0), countRegister); | |
929 | op.m_reentry = label(); | |
930 | storeToFrame(countRegister, term->frameLocation); | |
931 | } | |
932 | void backtrackPatternCharacterNonGreedy(size_t opIndex) | |
933 | { | |
934 | YarrOp& op = m_ops[opIndex]; | |
935 | PatternTerm* term = op.m_term; | |
936 | UChar ch = term->patternCharacter; | |
937 | ||
938 | const RegisterID character = regT0; | |
939 | const RegisterID countRegister = regT1; | |
940 | ||
14957cd0 A |
941 | m_backtrackingState.link(this); |
942 | ||
943 | loadFromFrame(term->frameLocation, countRegister); | |
944 | ||
6fe7ccc8 A |
945 | // Unless have a 16 bit pattern character and an 8 bit string - short circuit |
946 | if (!((ch > 0xff) && (m_charSize == Char8))) { | |
947 | JumpList nonGreedyFailures; | |
948 | nonGreedyFailures.append(atEndOfInput()); | |
949 | if (term->quantityCount != quantifyInfinite) | |
950 | nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet()))); | |
951 | nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); | |
14957cd0 | 952 | |
6fe7ccc8 A |
953 | add32(TrustedImm32(1), countRegister); |
954 | add32(TrustedImm32(1), index); | |
14957cd0 | 955 | |
6fe7ccc8 A |
956 | jump(op.m_reentry); |
957 | nonGreedyFailures.link(this); | |
958 | } | |
14957cd0 | 959 | |
14957cd0 A |
960 | sub32(countRegister, index); |
961 | m_backtrackingState.fallthrough(); | |
962 | } | |
963 | ||
964 | void generateCharacterClassOnce(size_t opIndex) | |
965 | { | |
966 | YarrOp& op = m_ops[opIndex]; | |
967 | PatternTerm* term = op.m_term; | |
968 | ||
969 | const RegisterID character = regT0; | |
970 | ||
971 | JumpList matchDest; | |
1df5f87f | 972 | readCharacter(term->inputPosition - m_checked, character); |
14957cd0 A |
973 | matchCharacterClass(character, matchDest, term->characterClass); |
974 | ||
975 | if (term->invert()) | |
976 | op.m_jumps.append(matchDest); | |
977 | else { | |
978 | op.m_jumps.append(jump()); | |
979 | matchDest.link(this); | |
980 | } | |
981 | } | |
982 | void backtrackCharacterClassOnce(size_t opIndex) | |
983 | { | |
984 | backtrackTermDefault(opIndex); | |
985 | } | |
986 | ||
987 | void generateCharacterClassFixed(size_t opIndex) | |
988 | { | |
989 | YarrOp& op = m_ops[opIndex]; | |
990 | PatternTerm* term = op.m_term; | |
991 | ||
992 | const RegisterID character = regT0; | |
993 | const RegisterID countRegister = regT1; | |
994 | ||
995 | move(index, countRegister); | |
1df5f87f | 996 | sub32(Imm32(term->quantityCount.unsafeGet()), countRegister); |
14957cd0 A |
997 | |
998 | Label loop(this); | |
999 | JumpList matchDest; | |
6fe7ccc8 A |
1000 | if (m_charSize == Char8) |
1001 | load8(BaseIndex(input, countRegister, TimesOne, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(char))).unsafeGet()), character); | |
1002 | else | |
1003 | load16(BaseIndex(input, countRegister, TimesTwo, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet()), character); | |
14957cd0 A |
1004 | matchCharacterClass(character, matchDest, term->characterClass); |
1005 | ||
1006 | if (term->invert()) | |
1007 | op.m_jumps.append(matchDest); | |
1008 | else { | |
1009 | op.m_jumps.append(jump()); | |
1010 | matchDest.link(this); | |
1011 | } | |
1012 | ||
1013 | add32(TrustedImm32(1), countRegister); | |
1014 | branch32(NotEqual, countRegister, index).linkTo(loop, this); | |
1015 | } | |
1016 | void backtrackCharacterClassFixed(size_t opIndex) | |
1017 | { | |
1018 | backtrackTermDefault(opIndex); | |
1019 | } | |
1020 | ||
1021 | void generateCharacterClassGreedy(size_t opIndex) | |
1022 | { | |
1023 | YarrOp& op = m_ops[opIndex]; | |
1024 | PatternTerm* term = op.m_term; | |
1025 | ||
1026 | const RegisterID character = regT0; | |
1027 | const RegisterID countRegister = regT1; | |
1028 | ||
1029 | move(TrustedImm32(0), countRegister); | |
1030 | ||
1031 | JumpList failures; | |
1032 | Label loop(this); | |
1033 | failures.append(atEndOfInput()); | |
1034 | ||
1035 | if (term->invert()) { | |
1036 | readCharacter(term->inputPosition - m_checked, character); | |
1037 | matchCharacterClass(character, failures, term->characterClass); | |
1038 | } else { | |
1039 | JumpList matchDest; | |
1040 | readCharacter(term->inputPosition - m_checked, character); | |
1041 | matchCharacterClass(character, matchDest, term->characterClass); | |
1042 | failures.append(jump()); | |
1043 | matchDest.link(this); | |
1044 | } | |
1045 | ||
1046 | add32(TrustedImm32(1), countRegister); | |
1047 | add32(TrustedImm32(1), index); | |
1048 | if (term->quantityCount != quantifyInfinite) { | |
1df5f87f | 1049 | branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this); |
14957cd0 A |
1050 | failures.append(jump()); |
1051 | } else | |
1052 | jump(loop); | |
1053 | ||
1054 | failures.link(this); | |
1055 | op.m_reentry = label(); | |
1056 | ||
1057 | storeToFrame(countRegister, term->frameLocation); | |
1058 | } | |
1059 | void backtrackCharacterClassGreedy(size_t opIndex) | |
1060 | { | |
1061 | YarrOp& op = m_ops[opIndex]; | |
1062 | PatternTerm* term = op.m_term; | |
1063 | ||
1064 | const RegisterID countRegister = regT1; | |
1065 | ||
1066 | m_backtrackingState.link(this); | |
1067 | ||
1068 | loadFromFrame(term->frameLocation, countRegister); | |
1069 | m_backtrackingState.append(branchTest32(Zero, countRegister)); | |
1070 | sub32(TrustedImm32(1), countRegister); | |
1071 | sub32(TrustedImm32(1), index); | |
1072 | jump(op.m_reentry); | |
1073 | } | |
1074 | ||
1075 | void generateCharacterClassNonGreedy(size_t opIndex) | |
1076 | { | |
1077 | YarrOp& op = m_ops[opIndex]; | |
1078 | PatternTerm* term = op.m_term; | |
1079 | ||
1080 | const RegisterID countRegister = regT1; | |
1081 | ||
1082 | move(TrustedImm32(0), countRegister); | |
1083 | op.m_reentry = label(); | |
1084 | storeToFrame(countRegister, term->frameLocation); | |
1085 | } | |
1086 | void backtrackCharacterClassNonGreedy(size_t opIndex) | |
1087 | { | |
1088 | YarrOp& op = m_ops[opIndex]; | |
1089 | PatternTerm* term = op.m_term; | |
1090 | ||
1091 | const RegisterID character = regT0; | |
1092 | const RegisterID countRegister = regT1; | |
1093 | ||
1094 | JumpList nonGreedyFailures; | |
1095 | ||
1096 | m_backtrackingState.link(this); | |
1097 | ||
14957cd0 A |
1098 | loadFromFrame(term->frameLocation, countRegister); |
1099 | ||
1100 | nonGreedyFailures.append(atEndOfInput()); | |
1df5f87f | 1101 | nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet()))); |
14957cd0 A |
1102 | |
1103 | JumpList matchDest; | |
1104 | readCharacter(term->inputPosition - m_checked, character); | |
1105 | matchCharacterClass(character, matchDest, term->characterClass); | |
1106 | ||
1107 | if (term->invert()) | |
1108 | nonGreedyFailures.append(matchDest); | |
1109 | else { | |
1110 | nonGreedyFailures.append(jump()); | |
1111 | matchDest.link(this); | |
1112 | } | |
1113 | ||
1114 | add32(TrustedImm32(1), countRegister); | |
1115 | add32(TrustedImm32(1), index); | |
1116 | ||
1117 | jump(op.m_reentry); | |
1118 | ||
1119 | nonGreedyFailures.link(this); | |
1120 | sub32(countRegister, index); | |
1121 | m_backtrackingState.fallthrough(); | |
1122 | } | |
1123 | ||
1124 | void generateDotStarEnclosure(size_t opIndex) | |
1125 | { | |
1126 | YarrOp& op = m_ops[opIndex]; | |
1127 | PatternTerm* term = op.m_term; | |
1128 | ||
1129 | const RegisterID character = regT0; | |
1130 | const RegisterID matchPos = regT1; | |
1131 | ||
1132 | JumpList foundBeginningNewLine; | |
1133 | JumpList saveStartIndex; | |
1134 | JumpList foundEndingNewLine; | |
1135 | ||
6fe7ccc8 A |
1136 | ASSERT(!m_pattern.m_body->m_hasFixedSize); |
1137 | getMatchStart(matchPos); | |
14957cd0 A |
1138 | |
1139 | saveStartIndex.append(branchTest32(Zero, matchPos)); | |
1140 | Label findBOLLoop(this); | |
1141 | sub32(TrustedImm32(1), matchPos); | |
6fe7ccc8 A |
1142 | if (m_charSize == Char8) |
1143 | load8(BaseIndex(input, matchPos, TimesOne, 0), character); | |
1144 | else | |
1145 | load16(BaseIndex(input, matchPos, TimesTwo, 0), character); | |
14957cd0 A |
1146 | matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass()); |
1147 | branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this); | |
1148 | saveStartIndex.append(jump()); | |
1149 | ||
1150 | foundBeginningNewLine.link(this); | |
1151 | add32(TrustedImm32(1), matchPos); // Advance past newline | |
1152 | saveStartIndex.link(this); | |
1153 | ||
1154 | if (!m_pattern.m_multiline && term->anchors.bolAnchor) | |
1155 | op.m_jumps.append(branchTest32(NonZero, matchPos)); | |
1156 | ||
6fe7ccc8 A |
1157 | ASSERT(!m_pattern.m_body->m_hasFixedSize); |
1158 | setMatchStart(matchPos); | |
14957cd0 A |
1159 | |
1160 | move(index, matchPos); | |
1161 | ||
1162 | Label findEOLLoop(this); | |
1163 | foundEndingNewLine.append(branch32(Equal, matchPos, length)); | |
6fe7ccc8 A |
1164 | if (m_charSize == Char8) |
1165 | load8(BaseIndex(input, matchPos, TimesOne, 0), character); | |
1166 | else | |
1167 | load16(BaseIndex(input, matchPos, TimesTwo, 0), character); | |
14957cd0 A |
1168 | matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass()); |
1169 | add32(TrustedImm32(1), matchPos); | |
1170 | jump(findEOLLoop); | |
1171 | ||
1172 | foundEndingNewLine.link(this); | |
1173 | ||
1174 | if (!m_pattern.m_multiline && term->anchors.eolAnchor) | |
1175 | op.m_jumps.append(branch32(NotEqual, matchPos, length)); | |
1176 | ||
1177 | move(matchPos, index); | |
1178 | } | |
1179 | ||
1180 | void backtrackDotStarEnclosure(size_t opIndex) | |
1181 | { | |
1182 | backtrackTermDefault(opIndex); | |
1183 | } | |
1184 | ||
1185 | // Code generation/backtracking for simple terms | |
1186 | // (pattern characters, character classes, and assertions). | |
1187 | // These methods farm out work to the set of functions above. | |
1188 | void generateTerm(size_t opIndex) | |
1189 | { | |
1190 | YarrOp& op = m_ops[opIndex]; | |
1191 | PatternTerm* term = op.m_term; | |
1192 | ||
1193 | switch (term->type) { | |
1194 | case PatternTerm::TypePatternCharacter: | |
1195 | switch (term->quantityType) { | |
1196 | case QuantifierFixedCount: | |
1197 | if (term->quantityCount == 1) | |
1198 | generatePatternCharacterOnce(opIndex); | |
1199 | else | |
1200 | generatePatternCharacterFixed(opIndex); | |
1201 | break; | |
1202 | case QuantifierGreedy: | |
1203 | generatePatternCharacterGreedy(opIndex); | |
1204 | break; | |
1205 | case QuantifierNonGreedy: | |
1206 | generatePatternCharacterNonGreedy(opIndex); | |
1207 | break; | |
1208 | } | |
1209 | break; | |
1210 | ||
1211 | case PatternTerm::TypeCharacterClass: | |
1212 | switch (term->quantityType) { | |
1213 | case QuantifierFixedCount: | |
1214 | if (term->quantityCount == 1) | |
1215 | generateCharacterClassOnce(opIndex); | |
1216 | else | |
1217 | generateCharacterClassFixed(opIndex); | |
1218 | break; | |
1219 | case QuantifierGreedy: | |
1220 | generateCharacterClassGreedy(opIndex); | |
1221 | break; | |
1222 | case QuantifierNonGreedy: | |
1223 | generateCharacterClassNonGreedy(opIndex); | |
1224 | break; | |
1225 | } | |
1226 | break; | |
1227 | ||
1228 | case PatternTerm::TypeAssertionBOL: | |
1229 | generateAssertionBOL(opIndex); | |
1230 | break; | |
1231 | ||
1232 | case PatternTerm::TypeAssertionEOL: | |
1233 | generateAssertionEOL(opIndex); | |
1234 | break; | |
1235 | ||
1236 | case PatternTerm::TypeAssertionWordBoundary: | |
1237 | generateAssertionWordBoundary(opIndex); | |
1238 | break; | |
1239 | ||
1240 | case PatternTerm::TypeForwardReference: | |
1241 | break; | |
1242 | ||
1243 | case PatternTerm::TypeParenthesesSubpattern: | |
1244 | case PatternTerm::TypeParentheticalAssertion: | |
1245 | ASSERT_NOT_REACHED(); | |
1246 | case PatternTerm::TypeBackReference: | |
1247 | m_shouldFallBack = true; | |
1248 | break; | |
1249 | case PatternTerm::TypeDotStarEnclosure: | |
1250 | generateDotStarEnclosure(opIndex); | |
1251 | break; | |
1252 | } | |
1253 | } | |
1254 | void backtrackTerm(size_t opIndex) | |
1255 | { | |
1256 | YarrOp& op = m_ops[opIndex]; | |
1257 | PatternTerm* term = op.m_term; | |
1258 | ||
1259 | switch (term->type) { | |
1260 | case PatternTerm::TypePatternCharacter: | |
1261 | switch (term->quantityType) { | |
1262 | case QuantifierFixedCount: | |
1263 | if (term->quantityCount == 1) | |
1264 | backtrackPatternCharacterOnce(opIndex); | |
1265 | else | |
1266 | backtrackPatternCharacterFixed(opIndex); | |
1267 | break; | |
1268 | case QuantifierGreedy: | |
1269 | backtrackPatternCharacterGreedy(opIndex); | |
1270 | break; | |
1271 | case QuantifierNonGreedy: | |
1272 | backtrackPatternCharacterNonGreedy(opIndex); | |
1273 | break; | |
1274 | } | |
1275 | break; | |
1276 | ||
1277 | case PatternTerm::TypeCharacterClass: | |
1278 | switch (term->quantityType) { | |
1279 | case QuantifierFixedCount: | |
1280 | if (term->quantityCount == 1) | |
1281 | backtrackCharacterClassOnce(opIndex); | |
1282 | else | |
1283 | backtrackCharacterClassFixed(opIndex); | |
1284 | break; | |
1285 | case QuantifierGreedy: | |
1286 | backtrackCharacterClassGreedy(opIndex); | |
1287 | break; | |
1288 | case QuantifierNonGreedy: | |
1289 | backtrackCharacterClassNonGreedy(opIndex); | |
1290 | break; | |
1291 | } | |
1292 | break; | |
1293 | ||
1294 | case PatternTerm::TypeAssertionBOL: | |
1295 | backtrackAssertionBOL(opIndex); | |
1296 | break; | |
1297 | ||
1298 | case PatternTerm::TypeAssertionEOL: | |
1299 | backtrackAssertionEOL(opIndex); | |
1300 | break; | |
1301 | ||
1302 | case PatternTerm::TypeAssertionWordBoundary: | |
1303 | backtrackAssertionWordBoundary(opIndex); | |
1304 | break; | |
1305 | ||
1306 | case PatternTerm::TypeForwardReference: | |
1307 | break; | |
1308 | ||
1309 | case PatternTerm::TypeParenthesesSubpattern: | |
1310 | case PatternTerm::TypeParentheticalAssertion: | |
1311 | ASSERT_NOT_REACHED(); | |
1312 | ||
1313 | case PatternTerm::TypeDotStarEnclosure: | |
1314 | backtrackDotStarEnclosure(opIndex); | |
1315 | break; | |
1316 | ||
1317 | case PatternTerm::TypeBackReference: | |
1318 | m_shouldFallBack = true; | |
1319 | break; | |
1320 | } | |
1321 | } | |
1322 | ||
1323 | void generate() | |
1324 | { | |
1325 | // Forwards generate the matching code. | |
1326 | ASSERT(m_ops.size()); | |
1327 | size_t opIndex = 0; | |
1328 | ||
1329 | do { | |
1330 | YarrOp& op = m_ops[opIndex]; | |
1331 | switch (op.m_op) { | |
1332 | ||
1333 | case OpTerm: | |
1334 | generateTerm(opIndex); | |
1335 | break; | |
1336 | ||
1337 | // OpBodyAlternativeBegin/Next/End | |
1338 | // | |
1339 | // These nodes wrap the set of alternatives in the body of the regular expression. | |
1340 | // There may be either one or two chains of OpBodyAlternative nodes, one representing | |
1341 | // the 'once through' sequence of alternatives (if any exist), and one representing | |
1342 | // the repeating alternatives (again, if any exist). | |
1343 | // | |
1344 | // Upon normal entry to the Begin alternative, we will check that input is available. | |
1345 | // Reentry to the Begin alternative will take place after the check has taken place, | |
1346 | // and will assume that the input position has already been progressed as appropriate. | |
1347 | // | |
1348 | // Entry to subsequent Next/End alternatives occurs when the prior alternative has | |
1349 | // successfully completed a match - return a success state from JIT code. | |
1350 | // | |
1351 | // Next alternatives allow for reentry optimized to suit backtracking from its | |
1352 | // preceding alternative. It expects the input position to still be set to a position | |
1353 | // appropriate to its predecessor, and it will only perform an input check if the | |
1354 | // predecessor had a minimum size less than its own. | |
1355 | // | |
1356 | // In the case 'once through' expressions, the End node will also have a reentry | |
1357 | // point to jump to when the last alternative fails. Again, this expects the input | |
1358 | // position to still reflect that expected by the prior alternative. | |
1359 | case OpBodyAlternativeBegin: { | |
1360 | PatternAlternative* alternative = op.m_alternative; | |
1361 | ||
1362 | // Upon entry at the head of the set of alternatives, check if input is available | |
1363 | // to run the first alternative. (This progresses the input position). | |
1364 | op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize)); | |
1365 | // We will reenter after the check, and assume the input position to have been | |
1366 | // set as appropriate to this alternative. | |
1367 | op.m_reentry = label(); | |
1368 | ||
1369 | m_checked += alternative->m_minimumSize; | |
1370 | break; | |
1371 | } | |
1372 | case OpBodyAlternativeNext: | |
1373 | case OpBodyAlternativeEnd: { | |
1374 | PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; | |
1375 | PatternAlternative* alternative = op.m_alternative; | |
1376 | ||
1377 | // If we get here, the prior alternative matched - return success. | |
1378 | ||
1379 | // Adjust the stack pointer to remove the pattern's frame. | |
6fe7ccc8 | 1380 | removeCallFrame(); |
14957cd0 A |
1381 | |
1382 | // Load appropriate values into the return register and the first output | |
1383 | // slot, and return. In the case of pattern with a fixed size, we will | |
1384 | // not have yet set the value in the first | |
1385 | ASSERT(index != returnRegister); | |
1386 | if (m_pattern.m_body->m_hasFixedSize) { | |
1387 | move(index, returnRegister); | |
1388 | if (priorAlternative->m_minimumSize) | |
1389 | sub32(Imm32(priorAlternative->m_minimumSize), returnRegister); | |
6fe7ccc8 A |
1390 | if (compileMode == IncludeSubpatterns) |
1391 | store32(returnRegister, output); | |
14957cd0 | 1392 | } else |
6fe7ccc8 A |
1393 | getMatchStart(returnRegister); |
1394 | if (compileMode == IncludeSubpatterns) | |
1395 | store32(index, Address(output, 4)); | |
1396 | move(index, returnRegister2); | |
1397 | ||
14957cd0 A |
1398 | generateReturn(); |
1399 | ||
1400 | // This is the divide between the tail of the prior alternative, above, and | |
1401 | // the head of the subsequent alternative, below. | |
1402 | ||
1403 | if (op.m_op == OpBodyAlternativeNext) { | |
1404 | // This is the reentry point for the Next alternative. We expect any code | |
1405 | // that jumps here to do so with the input position matching that of the | |
1406 | // PRIOR alteranative, and we will only check input availability if we | |
1407 | // need to progress it forwards. | |
1408 | op.m_reentry = label(); | |
1409 | if (alternative->m_minimumSize > priorAlternative->m_minimumSize) { | |
1410 | add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index); | |
1411 | op.m_jumps.append(jumpIfNoAvailableInput()); | |
1412 | } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize) | |
1413 | sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index); | |
1414 | } else if (op.m_nextOp == notFound) { | |
1415 | // This is the reentry point for the End of 'once through' alternatives, | |
6fe7ccc8 | 1416 | // jumped to when the last alternative fails to match. |
14957cd0 A |
1417 | op.m_reentry = label(); |
1418 | sub32(Imm32(priorAlternative->m_minimumSize), index); | |
1419 | } | |
1420 | ||
1421 | if (op.m_op == OpBodyAlternativeNext) | |
1422 | m_checked += alternative->m_minimumSize; | |
1423 | m_checked -= priorAlternative->m_minimumSize; | |
1424 | break; | |
1425 | } | |
1426 | ||
1427 | // OpSimpleNestedAlternativeBegin/Next/End | |
1428 | // OpNestedAlternativeBegin/Next/End | |
1429 | // | |
1430 | // These nodes are used to handle sets of alternatives that are nested within | |
1431 | // subpatterns and parenthetical assertions. The 'simple' forms are used where | |
1432 | // we do not need to be able to backtrack back into any alternative other than | |
1433 | // the last, the normal forms allow backtracking into any alternative. | |
1434 | // | |
1435 | // Each Begin/Next node is responsible for planting an input check to ensure | |
1436 | // sufficient input is available on entry. Next nodes additionally need to | |
1437 | // jump to the end - Next nodes use the End node's m_jumps list to hold this | |
1438 | // set of jumps. | |
1439 | // | |
1440 | // In the non-simple forms, successful alternative matches must store a | |
1441 | // 'return address' using a DataLabelPtr, used to store the address to jump | |
1442 | // to when backtracking, to get to the code for the appropriate alternative. | |
1443 | case OpSimpleNestedAlternativeBegin: | |
1444 | case OpNestedAlternativeBegin: { | |
1445 | PatternTerm* term = op.m_term; | |
1446 | PatternAlternative* alternative = op.m_alternative; | |
1447 | PatternDisjunction* disjunction = term->parentheses.disjunction; | |
1448 | ||
1449 | // Calculate how much input we need to check for, and if non-zero check. | |
1450 | op.m_checkAdjust = alternative->m_minimumSize; | |
1451 | if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion)) | |
1452 | op.m_checkAdjust -= disjunction->m_minimumSize; | |
1453 | if (op.m_checkAdjust) | |
1454 | op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust)); | |
6fe7ccc8 | 1455 | |
14957cd0 A |
1456 | m_checked += op.m_checkAdjust; |
1457 | break; | |
1458 | } | |
1459 | case OpSimpleNestedAlternativeNext: | |
1460 | case OpNestedAlternativeNext: { | |
1461 | PatternTerm* term = op.m_term; | |
1462 | PatternAlternative* alternative = op.m_alternative; | |
1463 | PatternDisjunction* disjunction = term->parentheses.disjunction; | |
1464 | ||
1465 | // In the non-simple case, store a 'return address' so we can backtrack correctly. | |
1466 | if (op.m_op == OpNestedAlternativeNext) { | |
1467 | unsigned parenthesesFrameLocation = term->frameLocation; | |
1468 | unsigned alternativeFrameLocation = parenthesesFrameLocation; | |
1469 | if (term->quantityType != QuantifierFixedCount) | |
1470 | alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; | |
1471 | op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation); | |
1472 | } | |
1473 | ||
6fe7ccc8 A |
1474 | if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) { |
1475 | // If the previous alternative matched without consuming characters then | |
1476 | // backtrack to try to match while consumming some input. | |
1477 | op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); | |
1478 | } | |
1479 | ||
14957cd0 A |
1480 | // If we reach here then the last alternative has matched - jump to the |
1481 | // End node, to skip over any further alternatives. | |
1482 | // | |
1483 | // FIXME: this is logically O(N^2) (though N can be expected to be very | |
1484 | // small). We could avoid this either by adding an extra jump to the JIT | |
1485 | // data structures, or by making backtracking code that jumps to Next | |
1486 | // alternatives are responsible for checking that input is available (if | |
1487 | // we didn't need to plant the input checks, then m_jumps would be free). | |
1488 | YarrOp* endOp = &m_ops[op.m_nextOp]; | |
1489 | while (endOp->m_nextOp != notFound) { | |
1490 | ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext); | |
1491 | endOp = &m_ops[endOp->m_nextOp]; | |
1492 | } | |
1493 | ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd); | |
1494 | endOp->m_jumps.append(jump()); | |
1495 | ||
1496 | // This is the entry point for the next alternative. | |
1497 | op.m_reentry = label(); | |
1498 | ||
1499 | // Calculate how much input we need to check for, and if non-zero check. | |
1500 | op.m_checkAdjust = alternative->m_minimumSize; | |
1501 | if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion)) | |
1502 | op.m_checkAdjust -= disjunction->m_minimumSize; | |
1503 | if (op.m_checkAdjust) | |
1504 | op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust)); | |
1505 | ||
1506 | YarrOp& lastOp = m_ops[op.m_previousOp]; | |
1507 | m_checked -= lastOp.m_checkAdjust; | |
1508 | m_checked += op.m_checkAdjust; | |
1509 | break; | |
1510 | } | |
1511 | case OpSimpleNestedAlternativeEnd: | |
1512 | case OpNestedAlternativeEnd: { | |
1513 | PatternTerm* term = op.m_term; | |
1514 | ||
1515 | // In the non-simple case, store a 'return address' so we can backtrack correctly. | |
1516 | if (op.m_op == OpNestedAlternativeEnd) { | |
1517 | unsigned parenthesesFrameLocation = term->frameLocation; | |
1518 | unsigned alternativeFrameLocation = parenthesesFrameLocation; | |
1519 | if (term->quantityType != QuantifierFixedCount) | |
1520 | alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; | |
1521 | op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation); | |
1522 | } | |
1523 | ||
6fe7ccc8 A |
1524 | if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) { |
1525 | // If the previous alternative matched without consuming characters then | |
1526 | // backtrack to try to match while consumming some input. | |
1527 | op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); | |
1528 | } | |
1529 | ||
14957cd0 A |
1530 | // If this set of alternatives contains more than one alternative, |
1531 | // then the Next nodes will have planted jumps to the End, and added | |
1532 | // them to this node's m_jumps list. | |
1533 | op.m_jumps.link(this); | |
1534 | op.m_jumps.clear(); | |
1535 | ||
1536 | YarrOp& lastOp = m_ops[op.m_previousOp]; | |
1537 | m_checked -= lastOp.m_checkAdjust; | |
1538 | break; | |
1539 | } | |
1540 | ||
1541 | // OpParenthesesSubpatternOnceBegin/End | |
1542 | // | |
1543 | // These nodes support (optionally) capturing subpatterns, that have a | |
1544 | // quantity count of 1 (this covers fixed once, and ?/?? quantifiers). | |
1545 | case OpParenthesesSubpatternOnceBegin: { | |
1546 | PatternTerm* term = op.m_term; | |
1547 | unsigned parenthesesFrameLocation = term->frameLocation; | |
1548 | const RegisterID indexTemporary = regT0; | |
1549 | ASSERT(term->quantityCount == 1); | |
1550 | ||
1551 | // Upon entry to a Greedy quantified set of parenthese store the index. | |
1552 | // We'll use this for two purposes: | |
1553 | // - To indicate which iteration we are on of mathing the remainder of | |
1554 | // the expression after the parentheses - the first, including the | |
1555 | // match within the parentheses, or the second having skipped over them. | |
1556 | // - To check for empty matches, which must be rejected. | |
1557 | // | |
1558 | // At the head of a NonGreedy set of parentheses we'll immediately set the | |
1559 | // value on the stack to -1 (indicating a match skipping the subpattern), | |
1560 | // and plant a jump to the end. We'll also plant a label to backtrack to | |
1561 | // to reenter the subpattern later, with a store to set up index on the | |
1562 | // second iteration. | |
1563 | // | |
1564 | // FIXME: for capturing parens, could use the index in the capture array? | |
1565 | if (term->quantityType == QuantifierGreedy) | |
1566 | storeToFrame(index, parenthesesFrameLocation); | |
1567 | else if (term->quantityType == QuantifierNonGreedy) { | |
1568 | storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); | |
1569 | op.m_jumps.append(jump()); | |
1570 | op.m_reentry = label(); | |
1571 | storeToFrame(index, parenthesesFrameLocation); | |
1572 | } | |
1573 | ||
1574 | // If the parenthese are capturing, store the starting index value to the | |
1575 | // captures array, offsetting as necessary. | |
1576 | // | |
1577 | // FIXME: could avoid offsetting this value in JIT code, apply | |
1578 | // offsets only afterwards, at the point the results array is | |
1579 | // being accessed. | |
6fe7ccc8 | 1580 | if (term->capture() && compileMode == IncludeSubpatterns) { |
14957cd0 A |
1581 | int inputOffset = term->inputPosition - m_checked; |
1582 | if (term->quantityType == QuantifierFixedCount) | |
1583 | inputOffset -= term->parentheses.disjunction->m_minimumSize; | |
1584 | if (inputOffset) { | |
1585 | move(index, indexTemporary); | |
1586 | add32(Imm32(inputOffset), indexTemporary); | |
6fe7ccc8 | 1587 | setSubpatternStart(indexTemporary, term->parentheses.subpatternId); |
14957cd0 | 1588 | } else |
6fe7ccc8 | 1589 | setSubpatternStart(index, term->parentheses.subpatternId); |
14957cd0 A |
1590 | } |
1591 | break; | |
1592 | } | |
1593 | case OpParenthesesSubpatternOnceEnd: { | |
1594 | PatternTerm* term = op.m_term; | |
14957cd0 A |
1595 | const RegisterID indexTemporary = regT0; |
1596 | ASSERT(term->quantityCount == 1); | |
1597 | ||
6fe7ccc8 A |
1598 | #ifndef NDEBUG |
1599 | // Runtime ASSERT to make sure that the nested alternative handled the | |
1600 | // "no input consumed" check. | |
1601 | if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) { | |
1602 | Jump pastBreakpoint; | |
1603 | pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); | |
1604 | breakpoint(); | |
1605 | pastBreakpoint.link(this); | |
1606 | } | |
1607 | #endif | |
14957cd0 A |
1608 | |
1609 | // If the parenthese are capturing, store the ending index value to the | |
1610 | // captures array, offsetting as necessary. | |
1611 | // | |
1612 | // FIXME: could avoid offsetting this value in JIT code, apply | |
1613 | // offsets only afterwards, at the point the results array is | |
1614 | // being accessed. | |
6fe7ccc8 | 1615 | if (term->capture() && compileMode == IncludeSubpatterns) { |
14957cd0 A |
1616 | int inputOffset = term->inputPosition - m_checked; |
1617 | if (inputOffset) { | |
1618 | move(index, indexTemporary); | |
1619 | add32(Imm32(inputOffset), indexTemporary); | |
6fe7ccc8 | 1620 | setSubpatternEnd(indexTemporary, term->parentheses.subpatternId); |
14957cd0 | 1621 | } else |
6fe7ccc8 | 1622 | setSubpatternEnd(index, term->parentheses.subpatternId); |
14957cd0 A |
1623 | } |
1624 | ||
1625 | // If the parentheses are quantified Greedy then add a label to jump back | |
1626 | // to if get a failed match from after the parentheses. For NonGreedy | |
1627 | // parentheses, link the jump from before the subpattern to here. | |
1628 | if (term->quantityType == QuantifierGreedy) | |
1629 | op.m_reentry = label(); | |
1630 | else if (term->quantityType == QuantifierNonGreedy) { | |
1631 | YarrOp& beginOp = m_ops[op.m_previousOp]; | |
1632 | beginOp.m_jumps.link(this); | |
1633 | } | |
1634 | break; | |
1635 | } | |
1636 | ||
1637 | // OpParenthesesSubpatternTerminalBegin/End | |
1638 | case OpParenthesesSubpatternTerminalBegin: { | |
1639 | PatternTerm* term = op.m_term; | |
1640 | ASSERT(term->quantityType == QuantifierGreedy); | |
1641 | ASSERT(term->quantityCount == quantifyInfinite); | |
1642 | ASSERT(!term->capture()); | |
1643 | ||
1644 | // Upon entry set a label to loop back to. | |
1645 | op.m_reentry = label(); | |
1646 | ||
1647 | // Store the start index of the current match; we need to reject zero | |
1648 | // length matches. | |
1649 | storeToFrame(index, term->frameLocation); | |
1650 | break; | |
1651 | } | |
1652 | case OpParenthesesSubpatternTerminalEnd: { | |
6fe7ccc8 A |
1653 | YarrOp& beginOp = m_ops[op.m_previousOp]; |
1654 | #ifndef NDEBUG | |
14957cd0 A |
1655 | PatternTerm* term = op.m_term; |
1656 | ||
6fe7ccc8 A |
1657 | // Runtime ASSERT to make sure that the nested alternative handled the |
1658 | // "no input consumed" check. | |
1659 | Jump pastBreakpoint; | |
1660 | pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); | |
1661 | breakpoint(); | |
1662 | pastBreakpoint.link(this); | |
1663 | #endif | |
14957cd0 | 1664 | |
6fe7ccc8 A |
1665 | // We know that the match is non-zero, we can accept it and |
1666 | // loop back up to the head of the subpattern. | |
1667 | jump(beginOp.m_reentry); | |
14957cd0 A |
1668 | |
1669 | // This is the entry point to jump to when we stop matching - we will | |
1670 | // do so once the subpattern cannot match any more. | |
1671 | op.m_reentry = label(); | |
1672 | break; | |
1673 | } | |
1674 | ||
1675 | // OpParentheticalAssertionBegin/End | |
1676 | case OpParentheticalAssertionBegin: { | |
1677 | PatternTerm* term = op.m_term; | |
1678 | ||
1679 | // Store the current index - assertions should not update index, so | |
1680 | // we will need to restore it upon a successful match. | |
1681 | unsigned parenthesesFrameLocation = term->frameLocation; | |
1682 | storeToFrame(index, parenthesesFrameLocation); | |
1683 | ||
1684 | // Check | |
1685 | op.m_checkAdjust = m_checked - term->inputPosition; | |
1686 | if (op.m_checkAdjust) | |
1687 | sub32(Imm32(op.m_checkAdjust), index); | |
1688 | ||
1689 | m_checked -= op.m_checkAdjust; | |
1690 | break; | |
1691 | } | |
1692 | case OpParentheticalAssertionEnd: { | |
1693 | PatternTerm* term = op.m_term; | |
1694 | ||
1695 | // Restore the input index value. | |
1696 | unsigned parenthesesFrameLocation = term->frameLocation; | |
1697 | loadFromFrame(parenthesesFrameLocation, index); | |
1698 | ||
1699 | // If inverted, a successful match of the assertion must be treated | |
1700 | // as a failure, so jump to backtracking. | |
1701 | if (term->invert()) { | |
1702 | op.m_jumps.append(jump()); | |
1703 | op.m_reentry = label(); | |
1704 | } | |
1705 | ||
1706 | YarrOp& lastOp = m_ops[op.m_previousOp]; | |
1707 | m_checked += lastOp.m_checkAdjust; | |
1708 | break; | |
1709 | } | |
1710 | ||
1711 | case OpMatchFailed: | |
6fe7ccc8 A |
1712 | removeCallFrame(); |
1713 | move(TrustedImmPtr((void*)WTF::notFound), returnRegister); | |
1714 | move(TrustedImm32(0), returnRegister2); | |
14957cd0 A |
1715 | generateReturn(); |
1716 | break; | |
1717 | } | |
1718 | ||
1719 | ++opIndex; | |
1720 | } while (opIndex < m_ops.size()); | |
1721 | } | |
1722 | ||
1723 | void backtrack() | |
1724 | { | |
1725 | // Backwards generate the backtracking code. | |
1726 | size_t opIndex = m_ops.size(); | |
1727 | ASSERT(opIndex); | |
1728 | ||
1729 | do { | |
1730 | --opIndex; | |
1731 | YarrOp& op = m_ops[opIndex]; | |
1732 | switch (op.m_op) { | |
1733 | ||
1734 | case OpTerm: | |
1735 | backtrackTerm(opIndex); | |
1736 | break; | |
1737 | ||
1738 | // OpBodyAlternativeBegin/Next/End | |
1739 | // | |
1740 | // For each Begin/Next node representing an alternative, we need to decide what to do | |
1741 | // in two circumstances: | |
1742 | // - If we backtrack back into this node, from within the alternative. | |
1743 | // - If the input check at the head of the alternative fails (if this exists). | |
1744 | // | |
1745 | // We treat these two cases differently since in the former case we have slightly | |
1746 | // more information - since we are backtracking out of a prior alternative we know | |
1747 | // that at least enough input was available to run it. For example, given the regular | |
1748 | // expression /a|b/, if we backtrack out of the first alternative (a failed pattern | |
1749 | // character match of 'a'), then we need not perform an additional input availability | |
1750 | // check before running the second alternative. | |
1751 | // | |
1752 | // Backtracking required differs for the last alternative, which in the case of the | |
1753 | // repeating set of alternatives must loop. The code generated for the last alternative | |
1754 | // will also be used to handle all input check failures from any prior alternatives - | |
1755 | // these require similar functionality, in seeking the next available alternative for | |
1756 | // which there is sufficient input. | |
1757 | // | |
1758 | // Since backtracking of all other alternatives simply requires us to link backtracks | |
1759 | // to the reentry point for the subsequent alternative, we will only be generating any | |
1760 | // code when backtracking the last alternative. | |
1761 | case OpBodyAlternativeBegin: | |
1762 | case OpBodyAlternativeNext: { | |
1763 | PatternAlternative* alternative = op.m_alternative; | |
1764 | ||
1765 | if (op.m_op == OpBodyAlternativeNext) { | |
1766 | PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; | |
1767 | m_checked += priorAlternative->m_minimumSize; | |
1768 | } | |
1769 | m_checked -= alternative->m_minimumSize; | |
1770 | ||
1771 | // Is this the last alternative? If not, then if we backtrack to this point we just | |
1772 | // need to jump to try to match the next alternative. | |
1773 | if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) { | |
1774 | m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this); | |
1775 | break; | |
1776 | } | |
1777 | YarrOp& endOp = m_ops[op.m_nextOp]; | |
1778 | ||
1779 | YarrOp* beginOp = &op; | |
1780 | while (beginOp->m_op != OpBodyAlternativeBegin) { | |
1781 | ASSERT(beginOp->m_op == OpBodyAlternativeNext); | |
1782 | beginOp = &m_ops[beginOp->m_previousOp]; | |
1783 | } | |
1784 | ||
1785 | bool onceThrough = endOp.m_nextOp == notFound; | |
1786 | ||
1787 | // First, generate code to handle cases where we backtrack out of an attempted match | |
1788 | // of the last alternative. If this is a 'once through' set of alternatives then we | |
1789 | // have nothing to do - link this straight through to the End. | |
1790 | if (onceThrough) | |
1791 | m_backtrackingState.linkTo(endOp.m_reentry, this); | |
1792 | else { | |
1793 | // If we don't need to move the input poistion, and the pattern has a fixed size | |
1794 | // (in which case we omit the store of the start index until the pattern has matched) | |
1795 | // then we can just link the backtrack out of the last alternative straight to the | |
1796 | // head of the first alternative. | |
1797 | if (m_pattern.m_body->m_hasFixedSize | |
1798 | && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) | |
1799 | && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1)) | |
1800 | m_backtrackingState.linkTo(beginOp->m_reentry, this); | |
1801 | else { | |
1802 | // We need to generate a trampoline of code to execute before looping back | |
1803 | // around to the first alternative. | |
1804 | m_backtrackingState.link(this); | |
1805 | ||
1806 | // If the pattern size is not fixed, then store the start index, for use if we match. | |
1807 | if (!m_pattern.m_body->m_hasFixedSize) { | |
1808 | if (alternative->m_minimumSize == 1) | |
6fe7ccc8 | 1809 | setMatchStart(index); |
14957cd0 A |
1810 | else { |
1811 | move(index, regT0); | |
1812 | if (alternative->m_minimumSize) | |
1813 | sub32(Imm32(alternative->m_minimumSize - 1), regT0); | |
1814 | else | |
6fe7ccc8 A |
1815 | add32(TrustedImm32(1), regT0); |
1816 | setMatchStart(regT0); | |
14957cd0 A |
1817 | } |
1818 | } | |
1819 | ||
1820 | // Generate code to loop. Check whether the last alternative is longer than the | |
1821 | // first (e.g. /a|xy/ or /a|xyz/). | |
1822 | if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) { | |
1823 | // We want to loop, and increment input position. If the delta is 1, it is | |
1824 | // already correctly incremented, if more than one then decrement as appropriate. | |
1825 | unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize; | |
1826 | ASSERT(delta); | |
1827 | if (delta != 1) | |
1828 | sub32(Imm32(delta - 1), index); | |
1829 | jump(beginOp->m_reentry); | |
1830 | } else { | |
1831 | // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot | |
1832 | // be sufficent input available to handle this, so just fall through. | |
1833 | unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize; | |
1834 | if (delta != 0xFFFFFFFFu) { | |
1835 | // We need to check input because we are incrementing the input. | |
1836 | add32(Imm32(delta + 1), index); | |
1837 | checkInput().linkTo(beginOp->m_reentry, this); | |
1838 | } | |
1839 | } | |
1840 | } | |
1841 | } | |
1842 | ||
1843 | // We can reach this point in the code in two ways: | |
1844 | // - Fallthrough from the code above (a repeating alternative backtracked out of its | |
1845 | // last alternative, and did not have sufficent input to run the first). | |
1846 | // - We will loop back up to the following label when a releating alternative loops, | |
1847 | // following a failed input check. | |
1848 | // | |
1849 | // Either way, we have just failed the input check for the first alternative. | |
1850 | Label firstInputCheckFailed(this); | |
1851 | ||
1852 | // Generate code to handle input check failures from alternatives except the last. | |
1853 | // prevOp is the alternative we're handling a bail out from (initially Begin), and | |
1854 | // nextOp is the alternative we will be attempting to reenter into. | |
1855 | // | |
1856 | // We will link input check failures from the forwards matching path back to the code | |
1857 | // that can handle them. | |
1858 | YarrOp* prevOp = beginOp; | |
1859 | YarrOp* nextOp = &m_ops[beginOp->m_nextOp]; | |
1860 | while (nextOp->m_op != OpBodyAlternativeEnd) { | |
1861 | prevOp->m_jumps.link(this); | |
1862 | ||
1863 | // We only get here if an input check fails, it is only worth checking again | |
1864 | // if the next alternative has a minimum size less than the last. | |
1865 | if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) { | |
1866 | // FIXME: if we added an extra label to YarrOp, we could avoid needing to | |
1867 | // subtract delta back out, and reduce this code. Should performance test | |
1868 | // the benefit of this. | |
1869 | unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize; | |
1870 | sub32(Imm32(delta), index); | |
1871 | Jump fail = jumpIfNoAvailableInput(); | |
1872 | add32(Imm32(delta), index); | |
1873 | jump(nextOp->m_reentry); | |
1874 | fail.link(this); | |
1875 | } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize) | |
1876 | add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index); | |
1877 | prevOp = nextOp; | |
1878 | nextOp = &m_ops[nextOp->m_nextOp]; | |
1879 | } | |
1880 | ||
1881 | // We fall through to here if there is insufficient input to run the last alternative. | |
1882 | ||
1883 | // If there is insufficient input to run the last alternative, then for 'once through' | |
1884 | // alternatives we are done - just jump back up into the forwards matching path at the End. | |
1885 | if (onceThrough) { | |
1886 | op.m_jumps.linkTo(endOp.m_reentry, this); | |
1887 | jump(endOp.m_reentry); | |
1888 | break; | |
1889 | } | |
1890 | ||
1891 | // For repeating alternatives, link any input check failure from the last alternative to | |
1892 | // this point. | |
1893 | op.m_jumps.link(this); | |
1894 | ||
1895 | bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize; | |
1896 | ||
1897 | // Check for cases where input position is already incremented by 1 for the last | |
1898 | // alternative (this is particularly useful where the minimum size of the body | |
1899 | // disjunction is 0, e.g. /a*|b/). | |
1900 | if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) { | |
1901 | // index is already incremented by 1, so just store it now! | |
6fe7ccc8 | 1902 | setMatchStart(index); |
14957cd0 A |
1903 | needsToUpdateMatchStart = false; |
1904 | } | |
1905 | ||
1906 | // Check whether there is sufficient input to loop. Increment the input position by | |
1907 | // one, and check. Also add in the minimum disjunction size before checking - there | |
1908 | // is no point in looping if we're just going to fail all the input checks around | |
1909 | // the next iteration. | |
1910 | ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize); | |
1911 | if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) { | |
1912 | // If the last alternative had the same minimum size as the disjunction, | |
1913 | // just simply increment input pos by 1, no adjustment based on minimum size. | |
6fe7ccc8 | 1914 | add32(TrustedImm32(1), index); |
14957cd0 A |
1915 | } else { |
1916 | // If the minumum for the last alternative was one greater than than that | |
1917 | // for the disjunction, we're already progressed by 1, nothing to do! | |
1918 | unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1; | |
1919 | if (delta) | |
1920 | sub32(Imm32(delta), index); | |
1921 | } | |
1922 | Jump matchFailed = jumpIfNoAvailableInput(); | |
1923 | ||
1924 | if (needsToUpdateMatchStart) { | |
1925 | if (!m_pattern.m_body->m_minimumSize) | |
6fe7ccc8 | 1926 | setMatchStart(index); |
14957cd0 A |
1927 | else { |
1928 | move(index, regT0); | |
1929 | sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0); | |
6fe7ccc8 | 1930 | setMatchStart(regT0); |
14957cd0 A |
1931 | } |
1932 | } | |
1933 | ||
1934 | // Calculate how much more input the first alternative requires than the minimum | |
1935 | // for the body as a whole. If no more is needed then we dont need an additional | |
1936 | // input check here - jump straight back up to the start of the first alternative. | |
1937 | if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) | |
1938 | jump(beginOp->m_reentry); | |
1939 | else { | |
1940 | if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize) | |
1941 | add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index); | |
1942 | else | |
1943 | sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index); | |
1944 | checkInput().linkTo(beginOp->m_reentry, this); | |
1945 | jump(firstInputCheckFailed); | |
1946 | } | |
1947 | ||
1948 | // We jump to here if we iterate to the point that there is insufficient input to | |
1949 | // run any matches, and need to return a failure state from JIT code. | |
1950 | matchFailed.link(this); | |
1951 | ||
6fe7ccc8 A |
1952 | removeCallFrame(); |
1953 | move(TrustedImmPtr((void*)WTF::notFound), returnRegister); | |
1954 | move(TrustedImm32(0), returnRegister2); | |
14957cd0 A |
1955 | generateReturn(); |
1956 | break; | |
1957 | } | |
1958 | case OpBodyAlternativeEnd: { | |
1959 | // We should never backtrack back into a body disjunction. | |
1960 | ASSERT(m_backtrackingState.isEmpty()); | |
1961 | ||
1962 | PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; | |
1963 | m_checked += priorAlternative->m_minimumSize; | |
1964 | break; | |
1965 | } | |
1966 | ||
1967 | // OpSimpleNestedAlternativeBegin/Next/End | |
1968 | // OpNestedAlternativeBegin/Next/End | |
1969 | // | |
1970 | // Generate code for when we backtrack back out of an alternative into | |
1971 | // a Begin or Next node, or when the entry input count check fails. If | |
1972 | // there are more alternatives we need to jump to the next alternative, | |
1973 | // if not we backtrack back out of the current set of parentheses. | |
1974 | // | |
1975 | // In the case of non-simple nested assertions we need to also link the | |
1976 | // 'return address' appropriately to backtrack back out into the correct | |
1977 | // alternative. | |
1978 | case OpSimpleNestedAlternativeBegin: | |
1979 | case OpSimpleNestedAlternativeNext: | |
1980 | case OpNestedAlternativeBegin: | |
1981 | case OpNestedAlternativeNext: { | |
1982 | YarrOp& nextOp = m_ops[op.m_nextOp]; | |
1983 | bool isBegin = op.m_previousOp == notFound; | |
1984 | bool isLastAlternative = nextOp.m_nextOp == notFound; | |
1985 | ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin)); | |
1986 | ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd)); | |
1987 | ||
1988 | // Treat an input check failure the same as a failed match. | |
1989 | m_backtrackingState.append(op.m_jumps); | |
1990 | ||
1991 | // Set the backtracks to jump to the appropriate place. We may need | |
1992 | // to link the backtracks in one of three different way depending on | |
1993 | // the type of alternative we are dealing with: | |
1994 | // - A single alternative, with no simplings. | |
1995 | // - The last alternative of a set of two or more. | |
1996 | // - An alternative other than the last of a set of two or more. | |
1997 | // | |
1998 | // In the case of a single alternative on its own, we don't need to | |
1999 | // jump anywhere - if the alternative fails to match we can just | |
2000 | // continue to backtrack out of the parentheses without jumping. | |
2001 | // | |
2002 | // In the case of the last alternative in a set of more than one, we | |
2003 | // need to jump to return back out to the beginning. We'll do so by | |
2004 | // adding a jump to the End node's m_jumps list, and linking this | |
2005 | // when we come to generate the Begin node. For alternatives other | |
2006 | // than the last, we need to jump to the next alternative. | |
2007 | // | |
2008 | // If the alternative had adjusted the input position we must link | |
2009 | // backtracking to here, correct, and then jump on. If not we can | |
2010 | // link the backtracks directly to their destination. | |
2011 | if (op.m_checkAdjust) { | |
2012 | // Handle the cases where we need to link the backtracks here. | |
2013 | m_backtrackingState.link(this); | |
2014 | sub32(Imm32(op.m_checkAdjust), index); | |
2015 | if (!isLastAlternative) { | |
2016 | // An alternative that is not the last should jump to its successor. | |
2017 | jump(nextOp.m_reentry); | |
2018 | } else if (!isBegin) { | |
6fe7ccc8 | 2019 | // The last of more than one alternatives must jump back to the beginning. |
14957cd0 A |
2020 | nextOp.m_jumps.append(jump()); |
2021 | } else { | |
2022 | // A single alternative on its own can fall through. | |
2023 | m_backtrackingState.fallthrough(); | |
2024 | } | |
2025 | } else { | |
2026 | // Handle the cases where we can link the backtracks directly to their destinations. | |
2027 | if (!isLastAlternative) { | |
2028 | // An alternative that is not the last should jump to its successor. | |
2029 | m_backtrackingState.linkTo(nextOp.m_reentry, this); | |
2030 | } else if (!isBegin) { | |
6fe7ccc8 | 2031 | // The last of more than one alternatives must jump back to the beginning. |
14957cd0 A |
2032 | m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this); |
2033 | } | |
2034 | // In the case of a single alternative on its own do nothing - it can fall through. | |
2035 | } | |
2036 | ||
6fe7ccc8 A |
2037 | // If there is a backtrack jump from a zero length match link it here. |
2038 | if (op.m_zeroLengthMatch.isSet()) | |
2039 | m_backtrackingState.append(op.m_zeroLengthMatch); | |
2040 | ||
14957cd0 A |
2041 | // At this point we've handled the backtracking back into this node. |
2042 | // Now link any backtracks that need to jump to here. | |
2043 | ||
2044 | // For non-simple alternatives, link the alternative's 'return address' | |
2045 | // so that we backtrack back out into the previous alternative. | |
2046 | if (op.m_op == OpNestedAlternativeNext) | |
2047 | m_backtrackingState.append(op.m_returnAddress); | |
2048 | ||
2049 | // If there is more than one alternative, then the last alternative will | |
2050 | // have planted a jump to be linked to the end. This jump was added to the | |
2051 | // End node's m_jumps list. If we are back at the beginning, link it here. | |
2052 | if (isBegin) { | |
2053 | YarrOp* endOp = &m_ops[op.m_nextOp]; | |
2054 | while (endOp->m_nextOp != notFound) { | |
2055 | ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext); | |
2056 | endOp = &m_ops[endOp->m_nextOp]; | |
2057 | } | |
2058 | ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd); | |
2059 | m_backtrackingState.append(endOp->m_jumps); | |
2060 | } | |
2061 | ||
2062 | if (!isBegin) { | |
2063 | YarrOp& lastOp = m_ops[op.m_previousOp]; | |
2064 | m_checked += lastOp.m_checkAdjust; | |
2065 | } | |
2066 | m_checked -= op.m_checkAdjust; | |
2067 | break; | |
2068 | } | |
2069 | case OpSimpleNestedAlternativeEnd: | |
2070 | case OpNestedAlternativeEnd: { | |
2071 | PatternTerm* term = op.m_term; | |
2072 | ||
6fe7ccc8 A |
2073 | // If there is a backtrack jump from a zero length match link it here. |
2074 | if (op.m_zeroLengthMatch.isSet()) | |
2075 | m_backtrackingState.append(op.m_zeroLengthMatch); | |
2076 | ||
14957cd0 A |
2077 | // If we backtrack into the end of a simple subpattern do nothing; |
2078 | // just continue through into the last alternative. If we backtrack | |
2079 | // into the end of a non-simple set of alterntives we need to jump | |
2080 | // to the backtracking return address set up during generation. | |
2081 | if (op.m_op == OpNestedAlternativeEnd) { | |
2082 | m_backtrackingState.link(this); | |
2083 | ||
2084 | // Plant a jump to the return address. | |
2085 | unsigned parenthesesFrameLocation = term->frameLocation; | |
2086 | unsigned alternativeFrameLocation = parenthesesFrameLocation; | |
2087 | if (term->quantityType != QuantifierFixedCount) | |
2088 | alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; | |
2089 | loadFromFrameAndJump(alternativeFrameLocation); | |
2090 | ||
2091 | // Link the DataLabelPtr associated with the end of the last | |
2092 | // alternative to this point. | |
2093 | m_backtrackingState.append(op.m_returnAddress); | |
2094 | } | |
2095 | ||
2096 | YarrOp& lastOp = m_ops[op.m_previousOp]; | |
2097 | m_checked += lastOp.m_checkAdjust; | |
2098 | break; | |
2099 | } | |
2100 | ||
2101 | // OpParenthesesSubpatternOnceBegin/End | |
2102 | // | |
2103 | // When we are backtracking back out of a capturing subpattern we need | |
2104 | // to clear the start index in the matches output array, to record that | |
2105 | // this subpattern has not been captured. | |
2106 | // | |
2107 | // When backtracking back out of a Greedy quantified subpattern we need | |
2108 | // to catch this, and try running the remainder of the alternative after | |
2109 | // the subpattern again, skipping the parentheses. | |
2110 | // | |
2111 | // Upon backtracking back into a quantified set of parentheses we need to | |
2112 | // check whether we were currently skipping the subpattern. If not, we | |
2113 | // can backtrack into them, if we were we need to either backtrack back | |
2114 | // out of the start of the parentheses, or jump back to the forwards | |
2115 | // matching start, depending of whether the match is Greedy or NonGreedy. | |
2116 | case OpParenthesesSubpatternOnceBegin: { | |
2117 | PatternTerm* term = op.m_term; | |
2118 | ASSERT(term->quantityCount == 1); | |
2119 | ||
2120 | // We only need to backtrack to thispoint if capturing or greedy. | |
6fe7ccc8 | 2121 | if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) { |
14957cd0 A |
2122 | m_backtrackingState.link(this); |
2123 | ||
2124 | // If capturing, clear the capture (we only need to reset start). | |
6fe7ccc8 A |
2125 | if (term->capture() && compileMode == IncludeSubpatterns) |
2126 | clearSubpatternStart(term->parentheses.subpatternId); | |
14957cd0 A |
2127 | |
2128 | // If Greedy, jump to the end. | |
2129 | if (term->quantityType == QuantifierGreedy) { | |
2130 | // Clear the flag in the stackframe indicating we ran through the subpattern. | |
2131 | unsigned parenthesesFrameLocation = term->frameLocation; | |
2132 | storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); | |
2133 | // Jump to after the parentheses, skipping the subpattern. | |
2134 | jump(m_ops[op.m_nextOp].m_reentry); | |
2135 | // A backtrack from after the parentheses, when skipping the subpattern, | |
2136 | // will jump back to here. | |
2137 | op.m_jumps.link(this); | |
2138 | } | |
2139 | ||
2140 | m_backtrackingState.fallthrough(); | |
2141 | } | |
2142 | break; | |
2143 | } | |
2144 | case OpParenthesesSubpatternOnceEnd: { | |
2145 | PatternTerm* term = op.m_term; | |
2146 | ||
2147 | if (term->quantityType != QuantifierFixedCount) { | |
2148 | m_backtrackingState.link(this); | |
2149 | ||
2150 | // Check whether we should backtrack back into the parentheses, or if we | |
2151 | // are currently in a state where we had skipped over the subpattern | |
2152 | // (in which case the flag value on the stack will be -1). | |
2153 | unsigned parenthesesFrameLocation = term->frameLocation; | |
2154 | Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1)); | |
2155 | ||
2156 | if (term->quantityType == QuantifierGreedy) { | |
2157 | // For Greedy parentheses, we skip after having already tried going | |
2158 | // through the subpattern, so if we get here we're done. | |
2159 | YarrOp& beginOp = m_ops[op.m_previousOp]; | |
2160 | beginOp.m_jumps.append(hadSkipped); | |
2161 | } else { | |
2162 | // For NonGreedy parentheses, we try skipping the subpattern first, | |
2163 | // so if we get here we need to try running through the subpattern | |
2164 | // next. Jump back to the start of the parentheses in the forwards | |
2165 | // matching path. | |
2166 | ASSERT(term->quantityType == QuantifierNonGreedy); | |
2167 | YarrOp& beginOp = m_ops[op.m_previousOp]; | |
2168 | hadSkipped.linkTo(beginOp.m_reentry, this); | |
2169 | } | |
2170 | ||
2171 | m_backtrackingState.fallthrough(); | |
2172 | } | |
2173 | ||
2174 | m_backtrackingState.append(op.m_jumps); | |
2175 | break; | |
2176 | } | |
2177 | ||
2178 | // OpParenthesesSubpatternTerminalBegin/End | |
2179 | // | |
2180 | // Terminal subpatterns will always match - there is nothing after them to | |
2181 | // force a backtrack, and they have a minimum count of 0, and as such will | |
2182 | // always produce an acceptable result. | |
2183 | case OpParenthesesSubpatternTerminalBegin: { | |
2184 | // We will backtrack to this point once the subpattern cannot match any | |
2185 | // more. Since no match is accepted as a successful match (we are Greedy | |
2186 | // quantified with a minimum of zero) jump back to the forwards matching | |
2187 | // path at the end. | |
2188 | YarrOp& endOp = m_ops[op.m_nextOp]; | |
2189 | m_backtrackingState.linkTo(endOp.m_reentry, this); | |
2190 | break; | |
2191 | } | |
2192 | case OpParenthesesSubpatternTerminalEnd: | |
2193 | // We should never be backtracking to here (hence the 'terminal' in the name). | |
2194 | ASSERT(m_backtrackingState.isEmpty()); | |
2195 | m_backtrackingState.append(op.m_jumps); | |
2196 | break; | |
2197 | ||
2198 | // OpParentheticalAssertionBegin/End | |
2199 | case OpParentheticalAssertionBegin: { | |
2200 | PatternTerm* term = op.m_term; | |
2201 | YarrOp& endOp = m_ops[op.m_nextOp]; | |
2202 | ||
2203 | // We need to handle the backtracks upon backtracking back out | |
2204 | // of a parenthetical assertion if either we need to correct | |
2205 | // the input index, or the assertion was inverted. | |
2206 | if (op.m_checkAdjust || term->invert()) { | |
2207 | m_backtrackingState.link(this); | |
2208 | ||
2209 | if (op.m_checkAdjust) | |
2210 | add32(Imm32(op.m_checkAdjust), index); | |
2211 | ||
2212 | // In an inverted assertion failure to match the subpattern | |
2213 | // is treated as a successful match - jump to the end of the | |
2214 | // subpattern. We already have adjusted the input position | |
2215 | // back to that before the assertion, which is correct. | |
2216 | if (term->invert()) | |
2217 | jump(endOp.m_reentry); | |
2218 | ||
2219 | m_backtrackingState.fallthrough(); | |
2220 | } | |
2221 | ||
2222 | // The End node's jump list will contain any backtracks into | |
2223 | // the end of the assertion. Also, if inverted, we will have | |
2224 | // added the failure caused by a successful match to this. | |
2225 | m_backtrackingState.append(endOp.m_jumps); | |
2226 | ||
2227 | m_checked += op.m_checkAdjust; | |
2228 | break; | |
2229 | } | |
2230 | case OpParentheticalAssertionEnd: { | |
2231 | // FIXME: We should really be clearing any nested subpattern | |
2232 | // matches on bailing out from after the pattern. Firefox has | |
2233 | // this bug too (presumably because they use YARR!) | |
2234 | ||
2235 | // Never backtrack into an assertion; later failures bail to before the begin. | |
2236 | m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this); | |
2237 | ||
2238 | YarrOp& lastOp = m_ops[op.m_previousOp]; | |
2239 | m_checked -= lastOp.m_checkAdjust; | |
2240 | break; | |
2241 | } | |
2242 | ||
2243 | case OpMatchFailed: | |
2244 | break; | |
2245 | } | |
2246 | ||
2247 | } while (opIndex); | |
2248 | } | |
2249 | ||
2250 | // Compilation methods: | |
2251 | // ==================== | |
2252 | ||
2253 | // opCompileParenthesesSubpattern | |
2254 | // Emits ops for a subpattern (set of parentheses). These consist | |
2255 | // of a set of alternatives wrapped in an outer set of nodes for | |
2256 | // the parentheses. | |
2257 | // Supported types of parentheses are 'Once' (quantityCount == 1) | |
2258 | // and 'Terminal' (non-capturing parentheses quantified as greedy | |
2259 | // and infinite). | |
2260 | // Alternatives will use the 'Simple' set of ops if either the | |
2261 | // subpattern is terminal (in which case we will never need to | |
2262 | // backtrack), or if the subpattern only contains one alternative. | |
2263 | void opCompileParenthesesSubpattern(PatternTerm* term) | |
2264 | { | |
2265 | YarrOpCode parenthesesBeginOpCode; | |
2266 | YarrOpCode parenthesesEndOpCode; | |
2267 | YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin; | |
2268 | YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext; | |
2269 | YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd; | |
2270 | ||
2271 | // We can currently only compile quantity 1 subpatterns that are | |
2272 | // not copies. We generate a copy in the case of a range quantifier, | |
2273 | // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to | |
2274 | // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem | |
2275 | // comes where the subpattern is capturing, in which case we would | |
2276 | // need to restore the capture from the first subpattern upon a | |
2277 | // failure in the second. | |
2278 | if (term->quantityCount == 1 && !term->parentheses.isCopy) { | |
2279 | // Select the 'Once' nodes. | |
2280 | parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin; | |
2281 | parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd; | |
2282 | ||
2283 | // If there is more than one alternative we cannot use the 'simple' nodes. | |
2284 | if (term->parentheses.disjunction->m_alternatives.size() != 1) { | |
2285 | alternativeBeginOpCode = OpNestedAlternativeBegin; | |
2286 | alternativeNextOpCode = OpNestedAlternativeNext; | |
2287 | alternativeEndOpCode = OpNestedAlternativeEnd; | |
2288 | } | |
2289 | } else if (term->parentheses.isTerminal) { | |
2290 | // Select the 'Terminal' nodes. | |
2291 | parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin; | |
2292 | parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd; | |
2293 | } else { | |
2294 | // This subpattern is not supported by the JIT. | |
2295 | m_shouldFallBack = true; | |
2296 | return; | |
2297 | } | |
2298 | ||
2299 | size_t parenBegin = m_ops.size(); | |
2300 | m_ops.append(parenthesesBeginOpCode); | |
2301 | ||
2302 | m_ops.append(alternativeBeginOpCode); | |
2303 | m_ops.last().m_previousOp = notFound; | |
2304 | m_ops.last().m_term = term; | |
2305 | Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives; | |
2306 | for (unsigned i = 0; i < alternatives.size(); ++i) { | |
2307 | size_t lastOpIndex = m_ops.size() - 1; | |
2308 | ||
2309 | PatternAlternative* nestedAlternative = alternatives[i]; | |
2310 | opCompileAlternative(nestedAlternative); | |
2311 | ||
2312 | size_t thisOpIndex = m_ops.size(); | |
2313 | m_ops.append(YarrOp(alternativeNextOpCode)); | |
2314 | ||
2315 | YarrOp& lastOp = m_ops[lastOpIndex]; | |
2316 | YarrOp& thisOp = m_ops[thisOpIndex]; | |
2317 | ||
2318 | lastOp.m_alternative = nestedAlternative; | |
2319 | lastOp.m_nextOp = thisOpIndex; | |
2320 | thisOp.m_previousOp = lastOpIndex; | |
2321 | thisOp.m_term = term; | |
2322 | } | |
2323 | YarrOp& lastOp = m_ops.last(); | |
2324 | ASSERT(lastOp.m_op == alternativeNextOpCode); | |
2325 | lastOp.m_op = alternativeEndOpCode; | |
2326 | lastOp.m_alternative = 0; | |
2327 | lastOp.m_nextOp = notFound; | |
2328 | ||
2329 | size_t parenEnd = m_ops.size(); | |
2330 | m_ops.append(parenthesesEndOpCode); | |
2331 | ||
2332 | m_ops[parenBegin].m_term = term; | |
2333 | m_ops[parenBegin].m_previousOp = notFound; | |
2334 | m_ops[parenBegin].m_nextOp = parenEnd; | |
2335 | m_ops[parenEnd].m_term = term; | |
2336 | m_ops[parenEnd].m_previousOp = parenBegin; | |
2337 | m_ops[parenEnd].m_nextOp = notFound; | |
2338 | } | |
2339 | ||
2340 | // opCompileParentheticalAssertion | |
2341 | // Emits ops for a parenthetical assertion. These consist of an | |
2342 | // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping | |
2343 | // the alternatives, with these wrapped by an outer pair of | |
2344 | // OpParentheticalAssertionBegin/End nodes. | |
2345 | // We can always use the OpSimpleNestedAlternative nodes in the | |
2346 | // case of parenthetical assertions since these only ever match | |
2347 | // once, and will never backtrack back into the assertion. | |
2348 | void opCompileParentheticalAssertion(PatternTerm* term) | |
2349 | { | |
2350 | size_t parenBegin = m_ops.size(); | |
2351 | m_ops.append(OpParentheticalAssertionBegin); | |
2352 | ||
2353 | m_ops.append(OpSimpleNestedAlternativeBegin); | |
2354 | m_ops.last().m_previousOp = notFound; | |
2355 | m_ops.last().m_term = term; | |
2356 | Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives; | |
2357 | for (unsigned i = 0; i < alternatives.size(); ++i) { | |
2358 | size_t lastOpIndex = m_ops.size() - 1; | |
2359 | ||
2360 | PatternAlternative* nestedAlternative = alternatives[i]; | |
2361 | opCompileAlternative(nestedAlternative); | |
2362 | ||
2363 | size_t thisOpIndex = m_ops.size(); | |
2364 | m_ops.append(YarrOp(OpSimpleNestedAlternativeNext)); | |
2365 | ||
2366 | YarrOp& lastOp = m_ops[lastOpIndex]; | |
2367 | YarrOp& thisOp = m_ops[thisOpIndex]; | |
2368 | ||
2369 | lastOp.m_alternative = nestedAlternative; | |
2370 | lastOp.m_nextOp = thisOpIndex; | |
2371 | thisOp.m_previousOp = lastOpIndex; | |
2372 | thisOp.m_term = term; | |
2373 | } | |
2374 | YarrOp& lastOp = m_ops.last(); | |
2375 | ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext); | |
2376 | lastOp.m_op = OpSimpleNestedAlternativeEnd; | |
2377 | lastOp.m_alternative = 0; | |
2378 | lastOp.m_nextOp = notFound; | |
2379 | ||
2380 | size_t parenEnd = m_ops.size(); | |
2381 | m_ops.append(OpParentheticalAssertionEnd); | |
2382 | ||
2383 | m_ops[parenBegin].m_term = term; | |
2384 | m_ops[parenBegin].m_previousOp = notFound; | |
2385 | m_ops[parenBegin].m_nextOp = parenEnd; | |
2386 | m_ops[parenEnd].m_term = term; | |
2387 | m_ops[parenEnd].m_previousOp = parenBegin; | |
2388 | m_ops[parenEnd].m_nextOp = notFound; | |
2389 | } | |
2390 | ||
2391 | // opCompileAlternative | |
2392 | // Called to emit nodes for all terms in an alternative. | |
2393 | void opCompileAlternative(PatternAlternative* alternative) | |
2394 | { | |
2395 | optimizeAlternative(alternative); | |
2396 | ||
2397 | for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { | |
2398 | PatternTerm* term = &alternative->m_terms[i]; | |
2399 | ||
2400 | switch (term->type) { | |
2401 | case PatternTerm::TypeParenthesesSubpattern: | |
2402 | opCompileParenthesesSubpattern(term); | |
2403 | break; | |
2404 | ||
2405 | case PatternTerm::TypeParentheticalAssertion: | |
2406 | opCompileParentheticalAssertion(term); | |
2407 | break; | |
2408 | ||
2409 | default: | |
2410 | m_ops.append(term); | |
2411 | } | |
2412 | } | |
2413 | } | |
2414 | ||
2415 | // opCompileBody | |
2416 | // This method compiles the body disjunction of the regular expression. | |
2417 | // The body consists of two sets of alternatives - zero or more 'once | |
2418 | // through' (BOL anchored) alternatives, followed by zero or more | |
2419 | // repeated alternatives. | |
2420 | // For each of these two sets of alteratives, if not empty they will be | |
2421 | // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the | |
2422 | // 'begin' node referencing the first alternative, and 'next' nodes | |
2423 | // referencing any further alternatives. The begin/next/end nodes are | |
2424 | // linked together in a doubly linked list. In the case of repeating | |
2425 | // alternatives, the end node is also linked back to the beginning. | |
2426 | // If no repeating alternatives exist, then a OpMatchFailed node exists | |
2427 | // to return the failing result. | |
2428 | void opCompileBody(PatternDisjunction* disjunction) | |
2429 | { | |
2430 | Vector<PatternAlternative*>& alternatives = disjunction->m_alternatives; | |
2431 | size_t currentAlternativeIndex = 0; | |
2432 | ||
2433 | // Emit the 'once through' alternatives. | |
2434 | if (alternatives.size() && alternatives[0]->onceThrough()) { | |
2435 | m_ops.append(YarrOp(OpBodyAlternativeBegin)); | |
2436 | m_ops.last().m_previousOp = notFound; | |
2437 | ||
2438 | do { | |
2439 | size_t lastOpIndex = m_ops.size() - 1; | |
2440 | PatternAlternative* alternative = alternatives[currentAlternativeIndex]; | |
2441 | opCompileAlternative(alternative); | |
2442 | ||
2443 | size_t thisOpIndex = m_ops.size(); | |
2444 | m_ops.append(YarrOp(OpBodyAlternativeNext)); | |
2445 | ||
2446 | YarrOp& lastOp = m_ops[lastOpIndex]; | |
2447 | YarrOp& thisOp = m_ops[thisOpIndex]; | |
2448 | ||
2449 | lastOp.m_alternative = alternative; | |
2450 | lastOp.m_nextOp = thisOpIndex; | |
2451 | thisOp.m_previousOp = lastOpIndex; | |
2452 | ||
2453 | ++currentAlternativeIndex; | |
2454 | } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough()); | |
2455 | ||
2456 | YarrOp& lastOp = m_ops.last(); | |
2457 | ||
2458 | ASSERT(lastOp.m_op == OpBodyAlternativeNext); | |
2459 | lastOp.m_op = OpBodyAlternativeEnd; | |
2460 | lastOp.m_alternative = 0; | |
2461 | lastOp.m_nextOp = notFound; | |
2462 | } | |
2463 | ||
2464 | if (currentAlternativeIndex == alternatives.size()) { | |
2465 | m_ops.append(YarrOp(OpMatchFailed)); | |
2466 | return; | |
2467 | } | |
2468 | ||
2469 | // Emit the repeated alternatives. | |
2470 | size_t repeatLoop = m_ops.size(); | |
2471 | m_ops.append(YarrOp(OpBodyAlternativeBegin)); | |
2472 | m_ops.last().m_previousOp = notFound; | |
2473 | do { | |
2474 | size_t lastOpIndex = m_ops.size() - 1; | |
2475 | PatternAlternative* alternative = alternatives[currentAlternativeIndex]; | |
2476 | ASSERT(!alternative->onceThrough()); | |
2477 | opCompileAlternative(alternative); | |
2478 | ||
2479 | size_t thisOpIndex = m_ops.size(); | |
2480 | m_ops.append(YarrOp(OpBodyAlternativeNext)); | |
2481 | ||
2482 | YarrOp& lastOp = m_ops[lastOpIndex]; | |
2483 | YarrOp& thisOp = m_ops[thisOpIndex]; | |
2484 | ||
2485 | lastOp.m_alternative = alternative; | |
2486 | lastOp.m_nextOp = thisOpIndex; | |
2487 | thisOp.m_previousOp = lastOpIndex; | |
2488 | ||
2489 | ++currentAlternativeIndex; | |
2490 | } while (currentAlternativeIndex < alternatives.size()); | |
2491 | YarrOp& lastOp = m_ops.last(); | |
2492 | ASSERT(lastOp.m_op == OpBodyAlternativeNext); | |
2493 | lastOp.m_op = OpBodyAlternativeEnd; | |
2494 | lastOp.m_alternative = 0; | |
2495 | lastOp.m_nextOp = repeatLoop; | |
2496 | } | |
2497 | ||
2498 | void generateEnter() | |
2499 | { | |
2500 | #if CPU(X86_64) | |
2501 | push(X86Registers::ebp); | |
2502 | move(stackPointerRegister, X86Registers::ebp); | |
2503 | push(X86Registers::ebx); | |
2504 | #elif CPU(X86) | |
2505 | push(X86Registers::ebp); | |
2506 | move(stackPointerRegister, X86Registers::ebp); | |
2507 | // TODO: do we need spill registers to fill the output pointer if there are no sub captures? | |
2508 | push(X86Registers::ebx); | |
2509 | push(X86Registers::edi); | |
2510 | push(X86Registers::esi); | |
2511 | // load output into edi (2 = saved ebp + return address). | |
2512 | #if COMPILER(MSVC) | |
2513 | loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input); | |
2514 | loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index); | |
2515 | loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length); | |
6fe7ccc8 A |
2516 | if (compileMode == IncludeSubpatterns) |
2517 | loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output); | |
14957cd0 | 2518 | #else |
6fe7ccc8 A |
2519 | if (compileMode == IncludeSubpatterns) |
2520 | loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output); | |
14957cd0 A |
2521 | #endif |
2522 | #elif CPU(ARM) | |
2523 | push(ARMRegisters::r4); | |
2524 | push(ARMRegisters::r5); | |
2525 | push(ARMRegisters::r6); | |
2526 | #if CPU(ARM_TRADITIONAL) | |
2527 | push(ARMRegisters::r8); // scratch register | |
2528 | #endif | |
6fe7ccc8 A |
2529 | if (compileMode == IncludeSubpatterns) |
2530 | move(ARMRegisters::r3, output); | |
14957cd0 A |
2531 | #elif CPU(SH4) |
2532 | push(SH4Registers::r11); | |
2533 | push(SH4Registers::r13); | |
2534 | #elif CPU(MIPS) | |
2535 | // Do nothing. | |
2536 | #endif | |
2537 | } | |
2538 | ||
2539 | void generateReturn() | |
2540 | { | |
2541 | #if CPU(X86_64) | |
2542 | pop(X86Registers::ebx); | |
2543 | pop(X86Registers::ebp); | |
2544 | #elif CPU(X86) | |
2545 | pop(X86Registers::esi); | |
2546 | pop(X86Registers::edi); | |
2547 | pop(X86Registers::ebx); | |
2548 | pop(X86Registers::ebp); | |
2549 | #elif CPU(ARM) | |
2550 | #if CPU(ARM_TRADITIONAL) | |
2551 | pop(ARMRegisters::r8); // scratch register | |
2552 | #endif | |
2553 | pop(ARMRegisters::r6); | |
2554 | pop(ARMRegisters::r5); | |
2555 | pop(ARMRegisters::r4); | |
2556 | #elif CPU(SH4) | |
2557 | pop(SH4Registers::r13); | |
2558 | pop(SH4Registers::r11); | |
2559 | #elif CPU(MIPS) | |
2560 | // Do nothing | |
2561 | #endif | |
2562 | ret(); | |
2563 | } | |
2564 | ||
2565 | public: | |
6fe7ccc8 | 2566 | YarrGenerator(YarrPattern& pattern, YarrCharSize charSize) |
14957cd0 | 2567 | : m_pattern(pattern) |
6fe7ccc8 A |
2568 | , m_charSize(charSize) |
2569 | , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo) | |
14957cd0 A |
2570 | , m_shouldFallBack(false) |
2571 | , m_checked(0) | |
2572 | { | |
2573 | } | |
2574 | ||
2575 | void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject) | |
2576 | { | |
2577 | generateEnter(); | |
2578 | ||
6fe7ccc8 A |
2579 | Jump hasInput = checkInput(); |
2580 | move(TrustedImmPtr((void*)WTF::notFound), returnRegister); | |
2581 | move(TrustedImm32(0), returnRegister2); | |
2582 | generateReturn(); | |
2583 | hasInput.link(this); | |
2584 | ||
2585 | if (compileMode == IncludeSubpatterns) { | |
2586 | for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i) | |
2587 | store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int))); | |
2588 | } | |
2589 | ||
14957cd0 | 2590 | if (!m_pattern.m_body->m_hasFixedSize) |
6fe7ccc8 | 2591 | setMatchStart(index); |
14957cd0 | 2592 | |
6fe7ccc8 | 2593 | initCallFrame(); |
14957cd0 A |
2594 | |
2595 | // Compile the pattern to the internal 'YarrOp' representation. | |
2596 | opCompileBody(m_pattern.m_body); | |
2597 | ||
2598 | // If we encountered anything we can't handle in the JIT code | |
2599 | // (e.g. backreferences) then return early. | |
2600 | if (m_shouldFallBack) { | |
2601 | jitObject.setFallBack(true); | |
2602 | return; | |
2603 | } | |
2604 | ||
2605 | generate(); | |
2606 | backtrack(); | |
2607 | ||
2608 | // Link & finalize the code. | |
6fe7ccc8 | 2609 | LinkBuffer linkBuffer(*globalData, this, REGEXP_CODE_ID); |
14957cd0 | 2610 | m_backtrackingState.linkDataLabels(linkBuffer); |
6fe7ccc8 A |
2611 | |
2612 | if (compileMode == MatchOnly) { | |
2613 | if (m_charSize == Char8) | |
2614 | jitObject.set8BitCodeMatchOnly(linkBuffer.finalizeCode()); | |
2615 | else | |
2616 | jitObject.set16BitCodeMatchOnly(linkBuffer.finalizeCode()); | |
2617 | } else { | |
2618 | if (m_charSize == Char8) | |
2619 | jitObject.set8BitCode(linkBuffer.finalizeCode()); | |
2620 | else | |
2621 | jitObject.set16BitCode(linkBuffer.finalizeCode()); | |
2622 | } | |
14957cd0 A |
2623 | jitObject.setFallBack(m_shouldFallBack); |
2624 | } | |
2625 | ||
2626 | private: | |
2627 | YarrPattern& m_pattern; | |
2628 | ||
6fe7ccc8 A |
2629 | YarrCharSize m_charSize; |
2630 | ||
2631 | Scale m_charScale; | |
2632 | ||
14957cd0 A |
2633 | // Used to detect regular expression constructs that are not currently |
2634 | // supported in the JIT; fall back to the interpreter when this is detected. | |
2635 | bool m_shouldFallBack; | |
2636 | ||
2637 | // The regular expression expressed as a linear sequence of operations. | |
2638 | Vector<YarrOp, 128> m_ops; | |
2639 | ||
2640 | // This records the current input offset being applied due to the current | |
2641 | // set of alternatives we are nested within. E.g. when matching the | |
2642 | // character 'b' within the regular expression /abc/, we will know that | |
2643 | // the minimum size for the alternative is 3, checked upon entry to the | |
2644 | // alternative, and that 'b' is at offset 1 from the start, and as such | |
2645 | // when matching 'b' we need to apply an offset of -2 to the load. | |
2646 | // | |
2647 | // FIXME: This should go away. Rather than tracking this value throughout | |
2648 | // code generation, we should gather this information up front & store it | |
2649 | // on the YarrOp structure. | |
2650 | int m_checked; | |
2651 | ||
2652 | // This class records state whilst generating the backtracking path of code. | |
2653 | BacktrackingState m_backtrackingState; | |
2654 | }; | |
2655 | ||
6fe7ccc8 | 2656 | void jitCompile(YarrPattern& pattern, YarrCharSize charSize, JSGlobalData* globalData, YarrCodeBlock& jitObject, YarrJITCompileMode mode) |
14957cd0 | 2657 | { |
6fe7ccc8 A |
2658 | if (mode == MatchOnly) |
2659 | YarrGenerator<MatchOnly>(pattern, charSize).compile(globalData, jitObject); | |
2660 | else | |
2661 | YarrGenerator<IncludeSubpatterns>(pattern, charSize).compile(globalData, jitObject); | |
14957cd0 A |
2662 | } |
2663 | ||
2664 | }} | |
2665 | ||
2666 | #endif |