]>
Commit | Line | Data |
---|---|---|
14957cd0 | 1 | /* |
93a37866 | 2 | * Copyright (C) 2009, 2013 Apple Inc. All rights reserved. |
14957cd0 A |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without | |
5 | * modification, are permitted provided that the following conditions | |
6 | * are met: | |
7 | * 1. Redistributions of source code must retain the above copyright | |
8 | * notice, this list of conditions and the following disclaimer. | |
9 | * 2. Redistributions in binary form must reproduce the above copyright | |
10 | * notice, this list of conditions and the following disclaimer in the | |
11 | * documentation and/or other materials provided with the distribution. | |
12 | * | |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
24 | */ | |
25 | ||
26 | #include "config.h" | |
27 | #include "YarrJIT.h" | |
28 | ||
6fe7ccc8 | 29 | #include <wtf/ASCIICType.h> |
14957cd0 | 30 | #include "LinkBuffer.h" |
93a37866 | 31 | #include "Options.h" |
14957cd0 | 32 | #include "Yarr.h" |
6fe7ccc8 | 33 | #include "YarrCanonicalizeUCS2.h" |
14957cd0 A |
34 | |
35 | #if ENABLE(YARR_JIT) | |
36 | ||
37 | using namespace WTF; | |
38 | ||
39 | namespace JSC { namespace Yarr { | |
40 | ||
6fe7ccc8 | 41 | template<YarrJITCompileMode compileMode> |
14957cd0 | 42 | class YarrGenerator : private MacroAssembler { |
93a37866 | 43 | friend void jitCompile(VM*, YarrCodeBlock& jitObject, const String& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); |
14957cd0 A |
44 | |
45 | #if CPU(ARM) | |
46 | static const RegisterID input = ARMRegisters::r0; | |
47 | static const RegisterID index = ARMRegisters::r1; | |
48 | static const RegisterID length = ARMRegisters::r2; | |
81345200 | 49 | static const RegisterID output = ARMRegisters::r3; |
14957cd0 | 50 | |
81345200 A |
51 | static const RegisterID regT0 = ARMRegisters::r4; |
52 | static const RegisterID regT1 = ARMRegisters::r5; | |
14957cd0 A |
53 | |
54 | static const RegisterID returnRegister = ARMRegisters::r0; | |
6fe7ccc8 | 55 | static const RegisterID returnRegister2 = ARMRegisters::r1; |
93a37866 A |
56 | #elif CPU(ARM64) |
57 | static const RegisterID input = ARM64Registers::x0; | |
58 | static const RegisterID index = ARM64Registers::x1; | |
59 | static const RegisterID length = ARM64Registers::x2; | |
60 | static const RegisterID output = ARM64Registers::x3; | |
61 | ||
62 | static const RegisterID regT0 = ARM64Registers::x4; | |
63 | static const RegisterID regT1 = ARM64Registers::x5; | |
64 | ||
65 | static const RegisterID returnRegister = ARM64Registers::x0; | |
66 | static const RegisterID returnRegister2 = ARM64Registers::x1; | |
14957cd0 A |
67 | #elif CPU(MIPS) |
68 | static const RegisterID input = MIPSRegisters::a0; | |
69 | static const RegisterID index = MIPSRegisters::a1; | |
70 | static const RegisterID length = MIPSRegisters::a2; | |
71 | static const RegisterID output = MIPSRegisters::a3; | |
72 | ||
73 | static const RegisterID regT0 = MIPSRegisters::t4; | |
74 | static const RegisterID regT1 = MIPSRegisters::t5; | |
75 | ||
76 | static const RegisterID returnRegister = MIPSRegisters::v0; | |
6fe7ccc8 | 77 | static const RegisterID returnRegister2 = MIPSRegisters::v1; |
14957cd0 A |
78 | #elif CPU(SH4) |
79 | static const RegisterID input = SH4Registers::r4; | |
80 | static const RegisterID index = SH4Registers::r5; | |
81 | static const RegisterID length = SH4Registers::r6; | |
82 | static const RegisterID output = SH4Registers::r7; | |
83 | ||
84 | static const RegisterID regT0 = SH4Registers::r0; | |
85 | static const RegisterID regT1 = SH4Registers::r1; | |
86 | ||
87 | static const RegisterID returnRegister = SH4Registers::r0; | |
6fe7ccc8 | 88 | static const RegisterID returnRegister2 = SH4Registers::r1; |
14957cd0 A |
89 | #elif CPU(X86) |
90 | static const RegisterID input = X86Registers::eax; | |
91 | static const RegisterID index = X86Registers::edx; | |
92 | static const RegisterID length = X86Registers::ecx; | |
93 | static const RegisterID output = X86Registers::edi; | |
94 | ||
95 | static const RegisterID regT0 = X86Registers::ebx; | |
96 | static const RegisterID regT1 = X86Registers::esi; | |
97 | ||
98 | static const RegisterID returnRegister = X86Registers::eax; | |
6fe7ccc8 | 99 | static const RegisterID returnRegister2 = X86Registers::edx; |
14957cd0 | 100 | #elif CPU(X86_64) |
93a37866 | 101 | #if !OS(WINDOWS) |
14957cd0 A |
102 | static const RegisterID input = X86Registers::edi; |
103 | static const RegisterID index = X86Registers::esi; | |
104 | static const RegisterID length = X86Registers::edx; | |
105 | static const RegisterID output = X86Registers::ecx; | |
93a37866 A |
106 | #else |
107 | // If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted. | |
108 | // http://msdn.microsoft.com/en-us/library/7572ztz4.aspx | |
109 | COMPILE_ASSERT(sizeof(MatchResult) > sizeof(void*), MatchResult_does_not_fit_in_64bits); | |
110 | static const RegisterID input = X86Registers::edx; | |
111 | static const RegisterID index = X86Registers::r8; | |
112 | static const RegisterID length = X86Registers::r9; | |
113 | static const RegisterID output = X86Registers::r10; | |
114 | #endif | |
14957cd0 A |
115 | |
116 | static const RegisterID regT0 = X86Registers::eax; | |
117 | static const RegisterID regT1 = X86Registers::ebx; | |
118 | ||
119 | static const RegisterID returnRegister = X86Registers::eax; | |
6fe7ccc8 | 120 | static const RegisterID returnRegister2 = X86Registers::edx; |
14957cd0 A |
121 | #endif |
122 | ||
123 | void optimizeAlternative(PatternAlternative* alternative) | |
124 | { | |
125 | if (!alternative->m_terms.size()) | |
126 | return; | |
127 | ||
128 | for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) { | |
129 | PatternTerm& term = alternative->m_terms[i]; | |
130 | PatternTerm& nextTerm = alternative->m_terms[i + 1]; | |
131 | ||
132 | if ((term.type == PatternTerm::TypeCharacterClass) | |
133 | && (term.quantityType == QuantifierFixedCount) | |
134 | && (nextTerm.type == PatternTerm::TypePatternCharacter) | |
135 | && (nextTerm.quantityType == QuantifierFixedCount)) { | |
136 | PatternTerm termCopy = term; | |
137 | alternative->m_terms[i] = nextTerm; | |
138 | alternative->m_terms[i + 1] = termCopy; | |
139 | } | |
140 | } | |
141 | } | |
142 | ||
143 | void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount) | |
144 | { | |
145 | do { | |
146 | // pick which range we're going to generate | |
147 | int which = count >> 1; | |
148 | char lo = ranges[which].begin; | |
149 | char hi = ranges[which].end; | |
150 | ||
151 | // check if there are any ranges or matches below lo. If not, just jl to failure - | |
152 | // if there is anything else to check, check that first, if it falls through jmp to failure. | |
153 | if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { | |
154 | Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); | |
155 | ||
156 | // generate code for all ranges before this one | |
157 | if (which) | |
158 | matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); | |
159 | ||
160 | while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { | |
161 | matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex]))); | |
162 | ++*matchIndex; | |
163 | } | |
164 | failures.append(jump()); | |
165 | ||
166 | loOrAbove.link(this); | |
167 | } else if (which) { | |
168 | Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); | |
169 | ||
170 | matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); | |
171 | failures.append(jump()); | |
172 | ||
173 | loOrAbove.link(this); | |
174 | } else | |
175 | failures.append(branch32(LessThan, character, Imm32((unsigned short)lo))); | |
176 | ||
177 | while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi)) | |
178 | ++*matchIndex; | |
179 | ||
180 | matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi))); | |
181 | // fall through to here, the value is above hi. | |
182 | ||
183 | // shuffle along & loop around if there are any more matches to handle. | |
184 | unsigned next = which + 1; | |
185 | ranges += next; | |
186 | count -= next; | |
187 | } while (count); | |
188 | } | |
189 | ||
190 | void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass) | |
191 | { | |
192 | if (charClass->m_table) { | |
93a37866 A |
193 | ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table)); |
194 | matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry)); | |
14957cd0 A |
195 | return; |
196 | } | |
197 | Jump unicodeFail; | |
198 | if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) { | |
199 | Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f)); | |
200 | ||
201 | if (charClass->m_matchesUnicode.size()) { | |
202 | for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) { | |
203 | UChar ch = charClass->m_matchesUnicode[i]; | |
204 | matchDest.append(branch32(Equal, character, Imm32(ch))); | |
205 | } | |
206 | } | |
207 | ||
208 | if (charClass->m_rangesUnicode.size()) { | |
209 | for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) { | |
210 | UChar lo = charClass->m_rangesUnicode[i].begin; | |
211 | UChar hi = charClass->m_rangesUnicode[i].end; | |
212 | ||
213 | Jump below = branch32(LessThan, character, Imm32(lo)); | |
214 | matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); | |
215 | below.link(this); | |
216 | } | |
217 | } | |
218 | ||
219 | unicodeFail = jump(); | |
220 | isAscii.link(this); | |
221 | } | |
222 | ||
223 | if (charClass->m_ranges.size()) { | |
224 | unsigned matchIndex = 0; | |
225 | JumpList failures; | |
226 | matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size()); | |
227 | while (matchIndex < charClass->m_matches.size()) | |
228 | matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++]))); | |
229 | ||
230 | failures.link(this); | |
231 | } else if (charClass->m_matches.size()) { | |
232 | // optimization: gather 'a','A' etc back together, can mask & test once. | |
233 | Vector<char> matchesAZaz; | |
234 | ||
235 | for (unsigned i = 0; i < charClass->m_matches.size(); ++i) { | |
236 | char ch = charClass->m_matches[i]; | |
237 | if (m_pattern.m_ignoreCase) { | |
238 | if (isASCIILower(ch)) { | |
239 | matchesAZaz.append(ch); | |
240 | continue; | |
241 | } | |
242 | if (isASCIIUpper(ch)) | |
243 | continue; | |
244 | } | |
245 | matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch))); | |
246 | } | |
247 | ||
248 | if (unsigned countAZaz = matchesAZaz.size()) { | |
249 | or32(TrustedImm32(32), character); | |
250 | for (unsigned i = 0; i < countAZaz; ++i) | |
251 | matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i]))); | |
252 | } | |
253 | } | |
254 | ||
255 | if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) | |
256 | unicodeFail.link(this); | |
257 | } | |
258 | ||
259 | // Jumps if input not available; will have (incorrectly) incremented already! | |
260 | Jump jumpIfNoAvailableInput(unsigned countToCheck = 0) | |
261 | { | |
262 | if (countToCheck) | |
263 | add32(Imm32(countToCheck), index); | |
264 | return branch32(Above, index, length); | |
265 | } | |
266 | ||
267 | Jump jumpIfAvailableInput(unsigned countToCheck) | |
268 | { | |
269 | add32(Imm32(countToCheck), index); | |
270 | return branch32(BelowOrEqual, index, length); | |
271 | } | |
272 | ||
273 | Jump checkInput() | |
274 | { | |
275 | return branch32(BelowOrEqual, index, length); | |
276 | } | |
277 | ||
278 | Jump atEndOfInput() | |
279 | { | |
280 | return branch32(Equal, index, length); | |
281 | } | |
282 | ||
283 | Jump notAtEndOfInput() | |
284 | { | |
285 | return branch32(NotEqual, index, length); | |
286 | } | |
287 | ||
6fe7ccc8 | 288 | Jump jumpIfCharNotEquals(UChar ch, int inputPosition, RegisterID character) |
14957cd0 | 289 | { |
6fe7ccc8 | 290 | readCharacter(inputPosition, character); |
14957cd0 | 291 | |
6fe7ccc8 A |
292 | // For case-insesitive compares, non-ascii characters that have different |
293 | // upper & lower case representations are converted to a character class. | |
294 | ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); | |
295 | if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { | |
296 | or32(TrustedImm32(0x20), character); | |
297 | ch |= 0x20; | |
298 | } | |
299 | ||
300 | return branch32(NotEqual, character, Imm32(ch)); | |
14957cd0 A |
301 | } |
302 | ||
303 | void readCharacter(int inputPosition, RegisterID reg) | |
304 | { | |
6fe7ccc8 A |
305 | if (m_charSize == Char8) |
306 | load8(BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), reg); | |
307 | else | |
308 | load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg); | |
14957cd0 A |
309 | } |
310 | ||
311 | void storeToFrame(RegisterID reg, unsigned frameLocation) | |
312 | { | |
313 | poke(reg, frameLocation); | |
314 | } | |
315 | ||
316 | void storeToFrame(TrustedImm32 imm, unsigned frameLocation) | |
317 | { | |
318 | poke(imm, frameLocation); | |
319 | } | |
320 | ||
321 | DataLabelPtr storeToFrameWithPatch(unsigned frameLocation) | |
322 | { | |
323 | return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*))); | |
324 | } | |
325 | ||
326 | void loadFromFrame(unsigned frameLocation, RegisterID reg) | |
327 | { | |
328 | peek(reg, frameLocation); | |
329 | } | |
330 | ||
331 | void loadFromFrameAndJump(unsigned frameLocation) | |
332 | { | |
333 | jump(Address(stackPointerRegister, frameLocation * sizeof(void*))); | |
334 | } | |
335 | ||
93a37866 A |
336 | unsigned alignCallFrameSizeInBytes(unsigned callFrameSize) |
337 | { | |
338 | callFrameSize *= sizeof(void*); | |
339 | if (callFrameSize / sizeof(void*) != m_pattern.m_body->m_callFrameSize) | |
340 | CRASH(); | |
341 | callFrameSize = (callFrameSize + 0x3f) & ~0x3f; | |
342 | if (!callFrameSize) | |
343 | CRASH(); | |
344 | return callFrameSize; | |
345 | } | |
6fe7ccc8 A |
346 | void initCallFrame() |
347 | { | |
348 | unsigned callFrameSize = m_pattern.m_body->m_callFrameSize; | |
349 | if (callFrameSize) | |
93a37866 | 350 | subPtr(Imm32(alignCallFrameSizeInBytes(callFrameSize)), stackPointerRegister); |
6fe7ccc8 A |
351 | } |
352 | void removeCallFrame() | |
353 | { | |
354 | unsigned callFrameSize = m_pattern.m_body->m_callFrameSize; | |
355 | if (callFrameSize) | |
93a37866 | 356 | addPtr(Imm32(alignCallFrameSizeInBytes(callFrameSize)), stackPointerRegister); |
6fe7ccc8 A |
357 | } |
358 | ||
359 | // Used to record subpatters, should only be called if compileMode is IncludeSubpatterns. | |
360 | void setSubpatternStart(RegisterID reg, unsigned subpattern) | |
361 | { | |
362 | ASSERT(subpattern); | |
363 | // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-( | |
364 | store32(reg, Address(output, (subpattern << 1) * sizeof(int))); | |
365 | } | |
366 | void setSubpatternEnd(RegisterID reg, unsigned subpattern) | |
367 | { | |
368 | ASSERT(subpattern); | |
369 | // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-( | |
370 | store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int))); | |
371 | } | |
372 | void clearSubpatternStart(unsigned subpattern) | |
373 | { | |
374 | ASSERT(subpattern); | |
375 | // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-( | |
376 | store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int))); | |
377 | } | |
378 | ||
379 | // We use one of three different strategies to track the start of the current match, | |
380 | // while matching. | |
381 | // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily | |
382 | // at the end of matching. This is irrespective of compileMode, and in this case | |
383 | // these methods should never be called. | |
384 | // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output | |
385 | // vector, store the match start in the output vector. | |
386 | // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly | |
387 | // in this register. | |
388 | void setMatchStart(RegisterID reg) | |
389 | { | |
390 | ASSERT(!m_pattern.m_body->m_hasFixedSize); | |
391 | if (compileMode == IncludeSubpatterns) | |
392 | store32(reg, output); | |
393 | else | |
394 | move(reg, output); | |
395 | } | |
396 | void getMatchStart(RegisterID reg) | |
397 | { | |
398 | ASSERT(!m_pattern.m_body->m_hasFixedSize); | |
399 | if (compileMode == IncludeSubpatterns) | |
400 | load32(output, reg); | |
401 | else | |
402 | move(output, reg); | |
403 | } | |
404 | ||
14957cd0 A |
405 | enum YarrOpCode { |
406 | // These nodes wrap body alternatives - those in the main disjunction, | |
407 | // rather than subpatterns or assertions. These are chained together in | |
408 | // a doubly linked list, with a 'begin' node for the first alternative, | |
409 | // a 'next' node for each subsequent alternative, and an 'end' node at | |
410 | // the end. In the case of repeating alternatives, the 'end' node also | |
411 | // has a reference back to 'begin'. | |
412 | OpBodyAlternativeBegin, | |
413 | OpBodyAlternativeNext, | |
414 | OpBodyAlternativeEnd, | |
415 | // Similar to the body alternatives, but used for subpatterns with two | |
416 | // or more alternatives. | |
417 | OpNestedAlternativeBegin, | |
418 | OpNestedAlternativeNext, | |
419 | OpNestedAlternativeEnd, | |
420 | // Used for alternatives in subpatterns where there is only a single | |
421 | // alternative (backtrackingis easier in these cases), or for alternatives | |
422 | // which never need to be backtracked (those in parenthetical assertions, | |
423 | // terminal subpatterns). | |
424 | OpSimpleNestedAlternativeBegin, | |
425 | OpSimpleNestedAlternativeNext, | |
426 | OpSimpleNestedAlternativeEnd, | |
427 | // Used to wrap 'Once' subpattern matches (quantityCount == 1). | |
428 | OpParenthesesSubpatternOnceBegin, | |
429 | OpParenthesesSubpatternOnceEnd, | |
430 | // Used to wrap 'Terminal' subpattern matches (at the end of the regexp). | |
431 | OpParenthesesSubpatternTerminalBegin, | |
432 | OpParenthesesSubpatternTerminalEnd, | |
433 | // Used to wrap parenthetical assertions. | |
434 | OpParentheticalAssertionBegin, | |
435 | OpParentheticalAssertionEnd, | |
436 | // Wraps all simple terms (pattern characters, character classes). | |
437 | OpTerm, | |
438 | // Where an expression contains only 'once through' body alternatives | |
439 | // and no repeating ones, this op is used to return match failure. | |
440 | OpMatchFailed | |
441 | }; | |
442 | ||
443 | // This structure is used to hold the compiled opcode information, | |
444 | // including reference back to the original PatternTerm/PatternAlternatives, | |
445 | // and JIT compilation data structures. | |
446 | struct YarrOp { | |
447 | explicit YarrOp(PatternTerm* term) | |
448 | : m_op(OpTerm) | |
449 | , m_term(term) | |
450 | , m_isDeadCode(false) | |
451 | { | |
452 | } | |
453 | ||
454 | explicit YarrOp(YarrOpCode op) | |
455 | : m_op(op) | |
456 | , m_isDeadCode(false) | |
457 | { | |
458 | } | |
459 | ||
460 | // The operation, as a YarrOpCode, and also a reference to the PatternTerm. | |
461 | YarrOpCode m_op; | |
462 | PatternTerm* m_term; | |
463 | ||
464 | // For alternatives, this holds the PatternAlternative and doubly linked | |
465 | // references to this alternative's siblings. In the case of the | |
466 | // OpBodyAlternativeEnd node at the end of a section of repeating nodes, | |
467 | // m_nextOp will reference the OpBodyAlternativeBegin node of the first | |
468 | // repeating alternative. | |
469 | PatternAlternative* m_alternative; | |
470 | size_t m_previousOp; | |
471 | size_t m_nextOp; | |
472 | ||
473 | // Used to record a set of Jumps out of the generated code, typically | |
474 | // used for jumps out to backtracking code, and a single reentry back | |
475 | // into the code for a node (likely where a backtrack will trigger | |
476 | // rematching). | |
477 | Label m_reentry; | |
478 | JumpList m_jumps; | |
479 | ||
6fe7ccc8 A |
480 | // Used for backtracking when the prior alternative did not consume any |
481 | // characters but matched. | |
482 | Jump m_zeroLengthMatch; | |
483 | ||
14957cd0 A |
484 | // This flag is used to null out the second pattern character, when |
485 | // two are fused to match a pair together. | |
486 | bool m_isDeadCode; | |
487 | ||
488 | // Currently used in the case of some of the more complex management of | |
489 | // 'm_checked', to cache the offset used in this alternative, to avoid | |
490 | // recalculating it. | |
491 | int m_checkAdjust; | |
492 | ||
493 | // Used by OpNestedAlternativeNext/End to hold the pointer to the | |
494 | // value that will be pushed into the pattern's frame to return to, | |
495 | // upon backtracking back into the disjunction. | |
496 | DataLabelPtr m_returnAddress; | |
497 | }; | |
498 | ||
499 | // BacktrackingState | |
500 | // This class encapsulates information about the state of code generation | |
501 | // whilst generating the code for backtracking, when a term fails to match. | |
502 | // Upon entry to code generation of the backtracking code for a given node, | |
503 | // the Backtracking state will hold references to all control flow sources | |
504 | // that are outputs in need of further backtracking from the prior node | |
505 | // generated (which is the subsequent operation in the regular expression, | |
506 | // and in the m_ops Vector, since we generated backtracking backwards). | |
507 | // These references to control flow take the form of: | |
508 | // - A jump list of jumps, to be linked to code that will backtrack them | |
509 | // further. | |
510 | // - A set of DataLabelPtr values, to be populated with values to be | |
511 | // treated effectively as return addresses backtracking into complex | |
512 | // subpatterns. | |
513 | // - A flag indicating that the current sequence of generated code up to | |
514 | // this point requires backtracking. | |
515 | class BacktrackingState { | |
516 | public: | |
517 | BacktrackingState() | |
518 | : m_pendingFallthrough(false) | |
519 | { | |
520 | } | |
521 | ||
522 | // Add a jump or jumps, a return address, or set the flag indicating | |
523 | // that the current 'fallthrough' control flow requires backtracking. | |
524 | void append(const Jump& jump) | |
525 | { | |
526 | m_laterFailures.append(jump); | |
527 | } | |
528 | void append(JumpList& jumpList) | |
529 | { | |
530 | m_laterFailures.append(jumpList); | |
531 | } | |
532 | void append(const DataLabelPtr& returnAddress) | |
533 | { | |
534 | m_pendingReturns.append(returnAddress); | |
535 | } | |
536 | void fallthrough() | |
537 | { | |
538 | ASSERT(!m_pendingFallthrough); | |
539 | m_pendingFallthrough = true; | |
540 | } | |
541 | ||
542 | // These methods clear the backtracking state, either linking to the | |
543 | // current location, a provided label, or copying the backtracking out | |
544 | // to a JumpList. All actions may require code generation to take place, | |
545 | // and as such are passed a pointer to the assembler. | |
546 | void link(MacroAssembler* assembler) | |
547 | { | |
548 | if (m_pendingReturns.size()) { | |
549 | Label here(assembler); | |
550 | for (unsigned i = 0; i < m_pendingReturns.size(); ++i) | |
551 | m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here)); | |
552 | m_pendingReturns.clear(); | |
553 | } | |
554 | m_laterFailures.link(assembler); | |
555 | m_laterFailures.clear(); | |
556 | m_pendingFallthrough = false; | |
557 | } | |
558 | void linkTo(Label label, MacroAssembler* assembler) | |
559 | { | |
560 | if (m_pendingReturns.size()) { | |
561 | for (unsigned i = 0; i < m_pendingReturns.size(); ++i) | |
562 | m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label)); | |
563 | m_pendingReturns.clear(); | |
564 | } | |
565 | if (m_pendingFallthrough) | |
566 | assembler->jump(label); | |
567 | m_laterFailures.linkTo(label, assembler); | |
568 | m_laterFailures.clear(); | |
569 | m_pendingFallthrough = false; | |
570 | } | |
571 | void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler) | |
572 | { | |
573 | if (m_pendingReturns.size()) { | |
574 | Label here(assembler); | |
575 | for (unsigned i = 0; i < m_pendingReturns.size(); ++i) | |
576 | m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here)); | |
577 | m_pendingReturns.clear(); | |
578 | m_pendingFallthrough = true; | |
579 | } | |
580 | if (m_pendingFallthrough) | |
581 | jumpList.append(assembler->jump()); | |
582 | jumpList.append(m_laterFailures); | |
583 | m_laterFailures.clear(); | |
584 | m_pendingFallthrough = false; | |
585 | } | |
586 | ||
587 | bool isEmpty() | |
588 | { | |
589 | return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough; | |
590 | } | |
591 | ||
592 | // Called at the end of code generation to link all return addresses. | |
593 | void linkDataLabels(LinkBuffer& linkBuffer) | |
594 | { | |
595 | ASSERT(isEmpty()); | |
596 | for (unsigned i = 0; i < m_backtrackRecords.size(); ++i) | |
597 | linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation)); | |
598 | } | |
599 | ||
600 | private: | |
601 | struct ReturnAddressRecord { | |
602 | ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation) | |
603 | : m_dataLabel(dataLabel) | |
604 | , m_backtrackLocation(backtrackLocation) | |
605 | { | |
606 | } | |
607 | ||
608 | DataLabelPtr m_dataLabel; | |
609 | Label m_backtrackLocation; | |
610 | }; | |
611 | ||
612 | JumpList m_laterFailures; | |
613 | bool m_pendingFallthrough; | |
614 | Vector<DataLabelPtr, 4> m_pendingReturns; | |
615 | Vector<ReturnAddressRecord, 4> m_backtrackRecords; | |
616 | }; | |
617 | ||
618 | // Generation methods: | |
619 | // =================== | |
620 | ||
621 | // This method provides a default implementation of backtracking common | |
622 | // to many terms; terms commonly jump out of the forwards matching path | |
623 | // on any failed conditions, and add these jumps to the m_jumps list. If | |
624 | // no special handling is required we can often just backtrack to m_jumps. | |
625 | void backtrackTermDefault(size_t opIndex) | |
626 | { | |
627 | YarrOp& op = m_ops[opIndex]; | |
628 | m_backtrackingState.append(op.m_jumps); | |
629 | } | |
630 | ||
631 | void generateAssertionBOL(size_t opIndex) | |
632 | { | |
633 | YarrOp& op = m_ops[opIndex]; | |
634 | PatternTerm* term = op.m_term; | |
635 | ||
636 | if (m_pattern.m_multiline) { | |
637 | const RegisterID character = regT0; | |
638 | ||
639 | JumpList matchDest; | |
640 | if (!term->inputPosition) | |
641 | matchDest.append(branch32(Equal, index, Imm32(m_checked))); | |
642 | ||
643 | readCharacter((term->inputPosition - m_checked) - 1, character); | |
644 | matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); | |
645 | op.m_jumps.append(jump()); | |
646 | ||
647 | matchDest.link(this); | |
648 | } else { | |
649 | // Erk, really should poison out these alternatives early. :-/ | |
650 | if (term->inputPosition) | |
651 | op.m_jumps.append(jump()); | |
652 | else | |
653 | op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked))); | |
654 | } | |
655 | } | |
656 | void backtrackAssertionBOL(size_t opIndex) | |
657 | { | |
658 | backtrackTermDefault(opIndex); | |
659 | } | |
660 | ||
661 | void generateAssertionEOL(size_t opIndex) | |
662 | { | |
663 | YarrOp& op = m_ops[opIndex]; | |
664 | PatternTerm* term = op.m_term; | |
665 | ||
666 | if (m_pattern.m_multiline) { | |
667 | const RegisterID character = regT0; | |
668 | ||
669 | JumpList matchDest; | |
670 | if (term->inputPosition == m_checked) | |
671 | matchDest.append(atEndOfInput()); | |
672 | ||
1df5f87f | 673 | readCharacter(term->inputPosition - m_checked, character); |
14957cd0 A |
674 | matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); |
675 | op.m_jumps.append(jump()); | |
676 | ||
677 | matchDest.link(this); | |
678 | } else { | |
679 | if (term->inputPosition == m_checked) | |
680 | op.m_jumps.append(notAtEndOfInput()); | |
681 | // Erk, really should poison out these alternatives early. :-/ | |
682 | else | |
683 | op.m_jumps.append(jump()); | |
684 | } | |
685 | } | |
686 | void backtrackAssertionEOL(size_t opIndex) | |
687 | { | |
688 | backtrackTermDefault(opIndex); | |
689 | } | |
690 | ||
691 | // Also falls though on nextIsNotWordChar. | |
692 | void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar) | |
693 | { | |
694 | YarrOp& op = m_ops[opIndex]; | |
695 | PatternTerm* term = op.m_term; | |
696 | ||
697 | const RegisterID character = regT0; | |
698 | ||
699 | if (term->inputPosition == m_checked) | |
700 | nextIsNotWordChar.append(atEndOfInput()); | |
701 | ||
702 | readCharacter((term->inputPosition - m_checked), character); | |
703 | matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass()); | |
704 | } | |
705 | ||
706 | void generateAssertionWordBoundary(size_t opIndex) | |
707 | { | |
708 | YarrOp& op = m_ops[opIndex]; | |
709 | PatternTerm* term = op.m_term; | |
710 | ||
711 | const RegisterID character = regT0; | |
712 | ||
713 | Jump atBegin; | |
714 | JumpList matchDest; | |
715 | if (!term->inputPosition) | |
716 | atBegin = branch32(Equal, index, Imm32(m_checked)); | |
717 | readCharacter((term->inputPosition - m_checked) - 1, character); | |
718 | matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass()); | |
719 | if (!term->inputPosition) | |
720 | atBegin.link(this); | |
721 | ||
722 | // We fall through to here if the last character was not a wordchar. | |
723 | JumpList nonWordCharThenWordChar; | |
724 | JumpList nonWordCharThenNonWordChar; | |
725 | if (term->invert()) { | |
726 | matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar); | |
727 | nonWordCharThenWordChar.append(jump()); | |
728 | } else { | |
729 | matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar); | |
730 | nonWordCharThenNonWordChar.append(jump()); | |
731 | } | |
732 | op.m_jumps.append(nonWordCharThenNonWordChar); | |
733 | ||
734 | // We jump here if the last character was a wordchar. | |
735 | matchDest.link(this); | |
736 | JumpList wordCharThenWordChar; | |
737 | JumpList wordCharThenNonWordChar; | |
738 | if (term->invert()) { | |
739 | matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar); | |
740 | wordCharThenWordChar.append(jump()); | |
741 | } else { | |
742 | matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar); | |
743 | // This can fall-though! | |
744 | } | |
745 | ||
746 | op.m_jumps.append(wordCharThenWordChar); | |
747 | ||
748 | nonWordCharThenWordChar.link(this); | |
749 | wordCharThenNonWordChar.link(this); | |
750 | } | |
751 | void backtrackAssertionWordBoundary(size_t opIndex) | |
752 | { | |
753 | backtrackTermDefault(opIndex); | |
754 | } | |
755 | ||
756 | void generatePatternCharacterOnce(size_t opIndex) | |
757 | { | |
758 | YarrOp& op = m_ops[opIndex]; | |
759 | ||
6fe7ccc8 A |
760 | if (op.m_isDeadCode) |
761 | return; | |
762 | ||
14957cd0 A |
763 | // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed |
764 | // node, so there must always be at least one more node. | |
765 | ASSERT(opIndex + 1 < m_ops.size()); | |
6fe7ccc8 | 766 | YarrOp* nextOp = &m_ops[opIndex + 1]; |
14957cd0 A |
767 | |
768 | PatternTerm* term = op.m_term; | |
769 | UChar ch = term->patternCharacter; | |
770 | ||
6fe7ccc8 A |
771 | if ((ch > 0xff) && (m_charSize == Char8)) { |
772 | // Have a 16 bit pattern character and an 8 bit string - short circuit | |
773 | op.m_jumps.append(jump()); | |
774 | return; | |
775 | } | |
776 | ||
14957cd0 | 777 | const RegisterID character = regT0; |
6fe7ccc8 A |
778 | int maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2; |
779 | unsigned ignoreCaseMask = 0; | |
93a37866 A |
780 | #if CPU(BIG_ENDIAN) |
781 | int allCharacters = ch << (m_charSize == Char8 ? 24 : 16); | |
782 | #else | |
6fe7ccc8 | 783 | int allCharacters = ch; |
93a37866 | 784 | #endif |
6fe7ccc8 A |
785 | int numberCharacters; |
786 | int startTermPosition = term->inputPosition; | |
14957cd0 | 787 | |
6fe7ccc8 A |
788 | // For case-insesitive compares, non-ascii characters that have different |
789 | // upper & lower case representations are converted to a character class. | |
790 | ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); | |
14957cd0 | 791 | |
6fe7ccc8 | 792 | if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) |
93a37866 A |
793 | #if CPU(BIG_ENDIAN) |
794 | ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16); | |
795 | #else | |
6fe7ccc8 | 796 | ignoreCaseMask |= 32; |
93a37866 | 797 | #endif |
14957cd0 | 798 | |
6fe7ccc8 A |
799 | for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) { |
800 | PatternTerm* nextTerm = nextOp->m_term; | |
801 | ||
802 | if (nextTerm->type != PatternTerm::TypePatternCharacter | |
803 | || nextTerm->quantityType != QuantifierFixedCount | |
804 | || nextTerm->quantityCount != 1 | |
805 | || nextTerm->inputPosition != (startTermPosition + numberCharacters)) | |
806 | break; | |
14957cd0 | 807 | |
6fe7ccc8 | 808 | nextOp->m_isDeadCode = true; |
14957cd0 | 809 | |
93a37866 A |
810 | #if CPU(BIG_ENDIAN) |
811 | int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters); | |
812 | #else | |
6fe7ccc8 | 813 | int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters; |
93a37866 | 814 | #endif |
14957cd0 | 815 | |
6fe7ccc8 A |
816 | UChar currentCharacter = nextTerm->patternCharacter; |
817 | ||
818 | if ((currentCharacter > 0xff) && (m_charSize == Char8)) { | |
819 | // Have a 16 bit pattern character and an 8 bit string - short circuit | |
820 | op.m_jumps.append(jump()); | |
14957cd0 A |
821 | return; |
822 | } | |
6fe7ccc8 A |
823 | |
824 | // For case-insesitive compares, non-ascii characters that have different | |
825 | // upper & lower case representations are converted to a character class. | |
826 | ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter)); | |
827 | ||
828 | allCharacters |= (currentCharacter << shiftAmount); | |
829 | ||
830 | if ((m_pattern.m_ignoreCase) && (isASCIIAlpha(currentCharacter))) | |
831 | ignoreCaseMask |= 32 << shiftAmount; | |
14957cd0 A |
832 | } |
833 | ||
6fe7ccc8 A |
834 | if (m_charSize == Char8) { |
835 | switch (numberCharacters) { | |
836 | case 1: | |
837 | op.m_jumps.append(jumpIfCharNotEquals(ch, startTermPosition - m_checked, character)); | |
838 | return; | |
839 | case 2: { | |
840 | BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); | |
841 | load16Unaligned(address, character); | |
842 | break; | |
843 | } | |
844 | case 3: { | |
845 | BaseIndex highAddress(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); | |
846 | load16Unaligned(highAddress, character); | |
847 | if (ignoreCaseMask) | |
848 | or32(Imm32(ignoreCaseMask), character); | |
849 | op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask))); | |
850 | op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, startTermPosition + 2 - m_checked, character)); | |
851 | return; | |
852 | } | |
853 | case 4: { | |
854 | BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); | |
855 | load32WithUnalignedHalfWords(address, character); | |
856 | break; | |
857 | } | |
858 | } | |
14957cd0 | 859 | } else { |
6fe7ccc8 A |
860 | switch (numberCharacters) { |
861 | case 1: | |
862 | op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); | |
863 | return; | |
864 | case 2: | |
865 | BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar)); | |
866 | load32WithUnalignedHalfWords(address, character); | |
867 | break; | |
868 | } | |
14957cd0 | 869 | } |
6fe7ccc8 A |
870 | |
871 | if (ignoreCaseMask) | |
872 | or32(Imm32(ignoreCaseMask), character); | |
873 | op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask))); | |
874 | return; | |
14957cd0 A |
875 | } |
876 | void backtrackPatternCharacterOnce(size_t opIndex) | |
877 | { | |
878 | backtrackTermDefault(opIndex); | |
879 | } | |
880 | ||
881 | void generatePatternCharacterFixed(size_t opIndex) | |
882 | { | |
883 | YarrOp& op = m_ops[opIndex]; | |
884 | PatternTerm* term = op.m_term; | |
885 | UChar ch = term->patternCharacter; | |
886 | ||
887 | const RegisterID character = regT0; | |
888 | const RegisterID countRegister = regT1; | |
889 | ||
890 | move(index, countRegister); | |
1df5f87f | 891 | sub32(Imm32(term->quantityCount.unsafeGet()), countRegister); |
14957cd0 A |
892 | |
893 | Label loop(this); | |
6fe7ccc8 | 894 | BaseIndex address(input, countRegister, m_charScale, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(m_charSize == Char8 ? sizeof(char) : sizeof(UChar))).unsafeGet()); |
14957cd0 | 895 | |
6fe7ccc8 A |
896 | if (m_charSize == Char8) |
897 | load8(address, character); | |
898 | else | |
14957cd0 | 899 | load16(address, character); |
6fe7ccc8 A |
900 | |
901 | // For case-insesitive compares, non-ascii characters that have different | |
902 | // upper & lower case representations are converted to a character class. | |
903 | ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); | |
904 | if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { | |
905 | or32(TrustedImm32(0x20), character); | |
906 | ch |= 0x20; | |
14957cd0 | 907 | } |
6fe7ccc8 A |
908 | |
909 | op.m_jumps.append(branch32(NotEqual, character, Imm32(ch))); | |
14957cd0 A |
910 | add32(TrustedImm32(1), countRegister); |
911 | branch32(NotEqual, countRegister, index).linkTo(loop, this); | |
912 | } | |
913 | void backtrackPatternCharacterFixed(size_t opIndex) | |
914 | { | |
915 | backtrackTermDefault(opIndex); | |
916 | } | |
917 | ||
918 | void generatePatternCharacterGreedy(size_t opIndex) | |
919 | { | |
920 | YarrOp& op = m_ops[opIndex]; | |
921 | PatternTerm* term = op.m_term; | |
922 | UChar ch = term->patternCharacter; | |
923 | ||
924 | const RegisterID character = regT0; | |
925 | const RegisterID countRegister = regT1; | |
926 | ||
927 | move(TrustedImm32(0), countRegister); | |
928 | ||
6fe7ccc8 A |
929 | // Unless have a 16 bit pattern character and an 8 bit string - short circuit |
930 | if (!((ch > 0xff) && (m_charSize == Char8))) { | |
931 | JumpList failures; | |
932 | Label loop(this); | |
933 | failures.append(atEndOfInput()); | |
934 | failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); | |
935 | ||
936 | add32(TrustedImm32(1), countRegister); | |
937 | add32(TrustedImm32(1), index); | |
938 | if (term->quantityCount == quantifyInfinite) | |
939 | jump(loop); | |
940 | else | |
941 | branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this); | |
14957cd0 | 942 | |
6fe7ccc8 A |
943 | failures.link(this); |
944 | } | |
14957cd0 A |
945 | op.m_reentry = label(); |
946 | ||
947 | storeToFrame(countRegister, term->frameLocation); | |
14957cd0 A |
948 | } |
949 | void backtrackPatternCharacterGreedy(size_t opIndex) | |
950 | { | |
951 | YarrOp& op = m_ops[opIndex]; | |
952 | PatternTerm* term = op.m_term; | |
953 | ||
954 | const RegisterID countRegister = regT1; | |
955 | ||
956 | m_backtrackingState.link(this); | |
957 | ||
958 | loadFromFrame(term->frameLocation, countRegister); | |
959 | m_backtrackingState.append(branchTest32(Zero, countRegister)); | |
960 | sub32(TrustedImm32(1), countRegister); | |
961 | sub32(TrustedImm32(1), index); | |
962 | jump(op.m_reentry); | |
963 | } | |
964 | ||
965 | void generatePatternCharacterNonGreedy(size_t opIndex) | |
966 | { | |
967 | YarrOp& op = m_ops[opIndex]; | |
968 | PatternTerm* term = op.m_term; | |
969 | ||
970 | const RegisterID countRegister = regT1; | |
971 | ||
972 | move(TrustedImm32(0), countRegister); | |
973 | op.m_reentry = label(); | |
974 | storeToFrame(countRegister, term->frameLocation); | |
975 | } | |
976 | void backtrackPatternCharacterNonGreedy(size_t opIndex) | |
977 | { | |
978 | YarrOp& op = m_ops[opIndex]; | |
979 | PatternTerm* term = op.m_term; | |
980 | UChar ch = term->patternCharacter; | |
981 | ||
982 | const RegisterID character = regT0; | |
983 | const RegisterID countRegister = regT1; | |
984 | ||
14957cd0 A |
985 | m_backtrackingState.link(this); |
986 | ||
987 | loadFromFrame(term->frameLocation, countRegister); | |
988 | ||
6fe7ccc8 A |
989 | // Unless have a 16 bit pattern character and an 8 bit string - short circuit |
990 | if (!((ch > 0xff) && (m_charSize == Char8))) { | |
991 | JumpList nonGreedyFailures; | |
992 | nonGreedyFailures.append(atEndOfInput()); | |
993 | if (term->quantityCount != quantifyInfinite) | |
994 | nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet()))); | |
995 | nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); | |
14957cd0 | 996 | |
6fe7ccc8 A |
997 | add32(TrustedImm32(1), countRegister); |
998 | add32(TrustedImm32(1), index); | |
14957cd0 | 999 | |
6fe7ccc8 A |
1000 | jump(op.m_reentry); |
1001 | nonGreedyFailures.link(this); | |
1002 | } | |
14957cd0 | 1003 | |
14957cd0 A |
1004 | sub32(countRegister, index); |
1005 | m_backtrackingState.fallthrough(); | |
1006 | } | |
1007 | ||
1008 | void generateCharacterClassOnce(size_t opIndex) | |
1009 | { | |
1010 | YarrOp& op = m_ops[opIndex]; | |
1011 | PatternTerm* term = op.m_term; | |
1012 | ||
1013 | const RegisterID character = regT0; | |
1014 | ||
1015 | JumpList matchDest; | |
1df5f87f | 1016 | readCharacter(term->inputPosition - m_checked, character); |
14957cd0 A |
1017 | matchCharacterClass(character, matchDest, term->characterClass); |
1018 | ||
1019 | if (term->invert()) | |
1020 | op.m_jumps.append(matchDest); | |
1021 | else { | |
1022 | op.m_jumps.append(jump()); | |
1023 | matchDest.link(this); | |
1024 | } | |
1025 | } | |
1026 | void backtrackCharacterClassOnce(size_t opIndex) | |
1027 | { | |
1028 | backtrackTermDefault(opIndex); | |
1029 | } | |
1030 | ||
1031 | void generateCharacterClassFixed(size_t opIndex) | |
1032 | { | |
1033 | YarrOp& op = m_ops[opIndex]; | |
1034 | PatternTerm* term = op.m_term; | |
1035 | ||
1036 | const RegisterID character = regT0; | |
1037 | const RegisterID countRegister = regT1; | |
1038 | ||
1039 | move(index, countRegister); | |
1df5f87f | 1040 | sub32(Imm32(term->quantityCount.unsafeGet()), countRegister); |
14957cd0 A |
1041 | |
1042 | Label loop(this); | |
1043 | JumpList matchDest; | |
6fe7ccc8 A |
1044 | if (m_charSize == Char8) |
1045 | load8(BaseIndex(input, countRegister, TimesOne, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(char))).unsafeGet()), character); | |
1046 | else | |
1047 | load16(BaseIndex(input, countRegister, TimesTwo, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet()), character); | |
14957cd0 A |
1048 | matchCharacterClass(character, matchDest, term->characterClass); |
1049 | ||
1050 | if (term->invert()) | |
1051 | op.m_jumps.append(matchDest); | |
1052 | else { | |
1053 | op.m_jumps.append(jump()); | |
1054 | matchDest.link(this); | |
1055 | } | |
1056 | ||
1057 | add32(TrustedImm32(1), countRegister); | |
1058 | branch32(NotEqual, countRegister, index).linkTo(loop, this); | |
1059 | } | |
1060 | void backtrackCharacterClassFixed(size_t opIndex) | |
1061 | { | |
1062 | backtrackTermDefault(opIndex); | |
1063 | } | |
1064 | ||
1065 | void generateCharacterClassGreedy(size_t opIndex) | |
1066 | { | |
1067 | YarrOp& op = m_ops[opIndex]; | |
1068 | PatternTerm* term = op.m_term; | |
1069 | ||
1070 | const RegisterID character = regT0; | |
1071 | const RegisterID countRegister = regT1; | |
1072 | ||
1073 | move(TrustedImm32(0), countRegister); | |
1074 | ||
1075 | JumpList failures; | |
1076 | Label loop(this); | |
1077 | failures.append(atEndOfInput()); | |
1078 | ||
1079 | if (term->invert()) { | |
1080 | readCharacter(term->inputPosition - m_checked, character); | |
1081 | matchCharacterClass(character, failures, term->characterClass); | |
1082 | } else { | |
1083 | JumpList matchDest; | |
1084 | readCharacter(term->inputPosition - m_checked, character); | |
1085 | matchCharacterClass(character, matchDest, term->characterClass); | |
1086 | failures.append(jump()); | |
1087 | matchDest.link(this); | |
1088 | } | |
1089 | ||
1090 | add32(TrustedImm32(1), countRegister); | |
1091 | add32(TrustedImm32(1), index); | |
1092 | if (term->quantityCount != quantifyInfinite) { | |
1df5f87f | 1093 | branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this); |
14957cd0 A |
1094 | failures.append(jump()); |
1095 | } else | |
1096 | jump(loop); | |
1097 | ||
1098 | failures.link(this); | |
1099 | op.m_reentry = label(); | |
1100 | ||
1101 | storeToFrame(countRegister, term->frameLocation); | |
1102 | } | |
1103 | void backtrackCharacterClassGreedy(size_t opIndex) | |
1104 | { | |
1105 | YarrOp& op = m_ops[opIndex]; | |
1106 | PatternTerm* term = op.m_term; | |
1107 | ||
1108 | const RegisterID countRegister = regT1; | |
1109 | ||
1110 | m_backtrackingState.link(this); | |
1111 | ||
1112 | loadFromFrame(term->frameLocation, countRegister); | |
1113 | m_backtrackingState.append(branchTest32(Zero, countRegister)); | |
1114 | sub32(TrustedImm32(1), countRegister); | |
1115 | sub32(TrustedImm32(1), index); | |
1116 | jump(op.m_reentry); | |
1117 | } | |
1118 | ||
1119 | void generateCharacterClassNonGreedy(size_t opIndex) | |
1120 | { | |
1121 | YarrOp& op = m_ops[opIndex]; | |
1122 | PatternTerm* term = op.m_term; | |
1123 | ||
1124 | const RegisterID countRegister = regT1; | |
1125 | ||
1126 | move(TrustedImm32(0), countRegister); | |
1127 | op.m_reentry = label(); | |
1128 | storeToFrame(countRegister, term->frameLocation); | |
1129 | } | |
1130 | void backtrackCharacterClassNonGreedy(size_t opIndex) | |
1131 | { | |
1132 | YarrOp& op = m_ops[opIndex]; | |
1133 | PatternTerm* term = op.m_term; | |
1134 | ||
1135 | const RegisterID character = regT0; | |
1136 | const RegisterID countRegister = regT1; | |
1137 | ||
1138 | JumpList nonGreedyFailures; | |
1139 | ||
1140 | m_backtrackingState.link(this); | |
1141 | ||
14957cd0 A |
1142 | loadFromFrame(term->frameLocation, countRegister); |
1143 | ||
1144 | nonGreedyFailures.append(atEndOfInput()); | |
1df5f87f | 1145 | nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet()))); |
14957cd0 A |
1146 | |
1147 | JumpList matchDest; | |
1148 | readCharacter(term->inputPosition - m_checked, character); | |
1149 | matchCharacterClass(character, matchDest, term->characterClass); | |
1150 | ||
1151 | if (term->invert()) | |
1152 | nonGreedyFailures.append(matchDest); | |
1153 | else { | |
1154 | nonGreedyFailures.append(jump()); | |
1155 | matchDest.link(this); | |
1156 | } | |
1157 | ||
1158 | add32(TrustedImm32(1), countRegister); | |
1159 | add32(TrustedImm32(1), index); | |
1160 | ||
1161 | jump(op.m_reentry); | |
1162 | ||
1163 | nonGreedyFailures.link(this); | |
1164 | sub32(countRegister, index); | |
1165 | m_backtrackingState.fallthrough(); | |
1166 | } | |
1167 | ||
1168 | void generateDotStarEnclosure(size_t opIndex) | |
1169 | { | |
1170 | YarrOp& op = m_ops[opIndex]; | |
1171 | PatternTerm* term = op.m_term; | |
1172 | ||
1173 | const RegisterID character = regT0; | |
1174 | const RegisterID matchPos = regT1; | |
1175 | ||
1176 | JumpList foundBeginningNewLine; | |
1177 | JumpList saveStartIndex; | |
1178 | JumpList foundEndingNewLine; | |
1179 | ||
6fe7ccc8 A |
1180 | ASSERT(!m_pattern.m_body->m_hasFixedSize); |
1181 | getMatchStart(matchPos); | |
14957cd0 A |
1182 | |
1183 | saveStartIndex.append(branchTest32(Zero, matchPos)); | |
1184 | Label findBOLLoop(this); | |
1185 | sub32(TrustedImm32(1), matchPos); | |
6fe7ccc8 A |
1186 | if (m_charSize == Char8) |
1187 | load8(BaseIndex(input, matchPos, TimesOne, 0), character); | |
1188 | else | |
1189 | load16(BaseIndex(input, matchPos, TimesTwo, 0), character); | |
14957cd0 A |
1190 | matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass()); |
1191 | branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this); | |
1192 | saveStartIndex.append(jump()); | |
1193 | ||
1194 | foundBeginningNewLine.link(this); | |
1195 | add32(TrustedImm32(1), matchPos); // Advance past newline | |
1196 | saveStartIndex.link(this); | |
1197 | ||
1198 | if (!m_pattern.m_multiline && term->anchors.bolAnchor) | |
1199 | op.m_jumps.append(branchTest32(NonZero, matchPos)); | |
1200 | ||
6fe7ccc8 A |
1201 | ASSERT(!m_pattern.m_body->m_hasFixedSize); |
1202 | setMatchStart(matchPos); | |
14957cd0 A |
1203 | |
1204 | move(index, matchPos); | |
1205 | ||
1206 | Label findEOLLoop(this); | |
1207 | foundEndingNewLine.append(branch32(Equal, matchPos, length)); | |
6fe7ccc8 A |
1208 | if (m_charSize == Char8) |
1209 | load8(BaseIndex(input, matchPos, TimesOne, 0), character); | |
1210 | else | |
1211 | load16(BaseIndex(input, matchPos, TimesTwo, 0), character); | |
14957cd0 A |
1212 | matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass()); |
1213 | add32(TrustedImm32(1), matchPos); | |
1214 | jump(findEOLLoop); | |
1215 | ||
1216 | foundEndingNewLine.link(this); | |
1217 | ||
1218 | if (!m_pattern.m_multiline && term->anchors.eolAnchor) | |
1219 | op.m_jumps.append(branch32(NotEqual, matchPos, length)); | |
1220 | ||
1221 | move(matchPos, index); | |
1222 | } | |
1223 | ||
1224 | void backtrackDotStarEnclosure(size_t opIndex) | |
1225 | { | |
1226 | backtrackTermDefault(opIndex); | |
1227 | } | |
1228 | ||
1229 | // Code generation/backtracking for simple terms | |
1230 | // (pattern characters, character classes, and assertions). | |
1231 | // These methods farm out work to the set of functions above. | |
1232 | void generateTerm(size_t opIndex) | |
1233 | { | |
1234 | YarrOp& op = m_ops[opIndex]; | |
1235 | PatternTerm* term = op.m_term; | |
1236 | ||
1237 | switch (term->type) { | |
1238 | case PatternTerm::TypePatternCharacter: | |
1239 | switch (term->quantityType) { | |
1240 | case QuantifierFixedCount: | |
1241 | if (term->quantityCount == 1) | |
1242 | generatePatternCharacterOnce(opIndex); | |
1243 | else | |
1244 | generatePatternCharacterFixed(opIndex); | |
1245 | break; | |
1246 | case QuantifierGreedy: | |
1247 | generatePatternCharacterGreedy(opIndex); | |
1248 | break; | |
1249 | case QuantifierNonGreedy: | |
1250 | generatePatternCharacterNonGreedy(opIndex); | |
1251 | break; | |
1252 | } | |
1253 | break; | |
1254 | ||
1255 | case PatternTerm::TypeCharacterClass: | |
1256 | switch (term->quantityType) { | |
1257 | case QuantifierFixedCount: | |
1258 | if (term->quantityCount == 1) | |
1259 | generateCharacterClassOnce(opIndex); | |
1260 | else | |
1261 | generateCharacterClassFixed(opIndex); | |
1262 | break; | |
1263 | case QuantifierGreedy: | |
1264 | generateCharacterClassGreedy(opIndex); | |
1265 | break; | |
1266 | case QuantifierNonGreedy: | |
1267 | generateCharacterClassNonGreedy(opIndex); | |
1268 | break; | |
1269 | } | |
1270 | break; | |
1271 | ||
1272 | case PatternTerm::TypeAssertionBOL: | |
1273 | generateAssertionBOL(opIndex); | |
1274 | break; | |
1275 | ||
1276 | case PatternTerm::TypeAssertionEOL: | |
1277 | generateAssertionEOL(opIndex); | |
1278 | break; | |
1279 | ||
1280 | case PatternTerm::TypeAssertionWordBoundary: | |
1281 | generateAssertionWordBoundary(opIndex); | |
1282 | break; | |
1283 | ||
1284 | case PatternTerm::TypeForwardReference: | |
1285 | break; | |
1286 | ||
1287 | case PatternTerm::TypeParenthesesSubpattern: | |
1288 | case PatternTerm::TypeParentheticalAssertion: | |
93a37866 | 1289 | RELEASE_ASSERT_NOT_REACHED(); |
14957cd0 A |
1290 | case PatternTerm::TypeBackReference: |
1291 | m_shouldFallBack = true; | |
1292 | break; | |
1293 | case PatternTerm::TypeDotStarEnclosure: | |
1294 | generateDotStarEnclosure(opIndex); | |
1295 | break; | |
1296 | } | |
1297 | } | |
1298 | void backtrackTerm(size_t opIndex) | |
1299 | { | |
1300 | YarrOp& op = m_ops[opIndex]; | |
1301 | PatternTerm* term = op.m_term; | |
1302 | ||
1303 | switch (term->type) { | |
1304 | case PatternTerm::TypePatternCharacter: | |
1305 | switch (term->quantityType) { | |
1306 | case QuantifierFixedCount: | |
1307 | if (term->quantityCount == 1) | |
1308 | backtrackPatternCharacterOnce(opIndex); | |
1309 | else | |
1310 | backtrackPatternCharacterFixed(opIndex); | |
1311 | break; | |
1312 | case QuantifierGreedy: | |
1313 | backtrackPatternCharacterGreedy(opIndex); | |
1314 | break; | |
1315 | case QuantifierNonGreedy: | |
1316 | backtrackPatternCharacterNonGreedy(opIndex); | |
1317 | break; | |
1318 | } | |
1319 | break; | |
1320 | ||
1321 | case PatternTerm::TypeCharacterClass: | |
1322 | switch (term->quantityType) { | |
1323 | case QuantifierFixedCount: | |
1324 | if (term->quantityCount == 1) | |
1325 | backtrackCharacterClassOnce(opIndex); | |
1326 | else | |
1327 | backtrackCharacterClassFixed(opIndex); | |
1328 | break; | |
1329 | case QuantifierGreedy: | |
1330 | backtrackCharacterClassGreedy(opIndex); | |
1331 | break; | |
1332 | case QuantifierNonGreedy: | |
1333 | backtrackCharacterClassNonGreedy(opIndex); | |
1334 | break; | |
1335 | } | |
1336 | break; | |
1337 | ||
1338 | case PatternTerm::TypeAssertionBOL: | |
1339 | backtrackAssertionBOL(opIndex); | |
1340 | break; | |
1341 | ||
1342 | case PatternTerm::TypeAssertionEOL: | |
1343 | backtrackAssertionEOL(opIndex); | |
1344 | break; | |
1345 | ||
1346 | case PatternTerm::TypeAssertionWordBoundary: | |
1347 | backtrackAssertionWordBoundary(opIndex); | |
1348 | break; | |
1349 | ||
1350 | case PatternTerm::TypeForwardReference: | |
1351 | break; | |
1352 | ||
1353 | case PatternTerm::TypeParenthesesSubpattern: | |
1354 | case PatternTerm::TypeParentheticalAssertion: | |
93a37866 | 1355 | RELEASE_ASSERT_NOT_REACHED(); |
14957cd0 A |
1356 | |
1357 | case PatternTerm::TypeDotStarEnclosure: | |
1358 | backtrackDotStarEnclosure(opIndex); | |
1359 | break; | |
1360 | ||
1361 | case PatternTerm::TypeBackReference: | |
1362 | m_shouldFallBack = true; | |
1363 | break; | |
1364 | } | |
1365 | } | |
1366 | ||
1367 | void generate() | |
1368 | { | |
1369 | // Forwards generate the matching code. | |
1370 | ASSERT(m_ops.size()); | |
1371 | size_t opIndex = 0; | |
1372 | ||
1373 | do { | |
1374 | YarrOp& op = m_ops[opIndex]; | |
1375 | switch (op.m_op) { | |
1376 | ||
1377 | case OpTerm: | |
1378 | generateTerm(opIndex); | |
1379 | break; | |
1380 | ||
1381 | // OpBodyAlternativeBegin/Next/End | |
1382 | // | |
1383 | // These nodes wrap the set of alternatives in the body of the regular expression. | |
1384 | // There may be either one or two chains of OpBodyAlternative nodes, one representing | |
1385 | // the 'once through' sequence of alternatives (if any exist), and one representing | |
1386 | // the repeating alternatives (again, if any exist). | |
1387 | // | |
1388 | // Upon normal entry to the Begin alternative, we will check that input is available. | |
1389 | // Reentry to the Begin alternative will take place after the check has taken place, | |
1390 | // and will assume that the input position has already been progressed as appropriate. | |
1391 | // | |
1392 | // Entry to subsequent Next/End alternatives occurs when the prior alternative has | |
1393 | // successfully completed a match - return a success state from JIT code. | |
1394 | // | |
1395 | // Next alternatives allow for reentry optimized to suit backtracking from its | |
1396 | // preceding alternative. It expects the input position to still be set to a position | |
1397 | // appropriate to its predecessor, and it will only perform an input check if the | |
1398 | // predecessor had a minimum size less than its own. | |
1399 | // | |
1400 | // In the case 'once through' expressions, the End node will also have a reentry | |
1401 | // point to jump to when the last alternative fails. Again, this expects the input | |
1402 | // position to still reflect that expected by the prior alternative. | |
1403 | case OpBodyAlternativeBegin: { | |
1404 | PatternAlternative* alternative = op.m_alternative; | |
1405 | ||
1406 | // Upon entry at the head of the set of alternatives, check if input is available | |
1407 | // to run the first alternative. (This progresses the input position). | |
1408 | op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize)); | |
1409 | // We will reenter after the check, and assume the input position to have been | |
1410 | // set as appropriate to this alternative. | |
1411 | op.m_reentry = label(); | |
1412 | ||
1413 | m_checked += alternative->m_minimumSize; | |
1414 | break; | |
1415 | } | |
1416 | case OpBodyAlternativeNext: | |
1417 | case OpBodyAlternativeEnd: { | |
1418 | PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; | |
1419 | PatternAlternative* alternative = op.m_alternative; | |
1420 | ||
1421 | // If we get here, the prior alternative matched - return success. | |
1422 | ||
1423 | // Adjust the stack pointer to remove the pattern's frame. | |
6fe7ccc8 | 1424 | removeCallFrame(); |
14957cd0 A |
1425 | |
1426 | // Load appropriate values into the return register and the first output | |
1427 | // slot, and return. In the case of pattern with a fixed size, we will | |
1428 | // not have yet set the value in the first | |
1429 | ASSERT(index != returnRegister); | |
1430 | if (m_pattern.m_body->m_hasFixedSize) { | |
1431 | move(index, returnRegister); | |
1432 | if (priorAlternative->m_minimumSize) | |
1433 | sub32(Imm32(priorAlternative->m_minimumSize), returnRegister); | |
6fe7ccc8 A |
1434 | if (compileMode == IncludeSubpatterns) |
1435 | store32(returnRegister, output); | |
14957cd0 | 1436 | } else |
6fe7ccc8 A |
1437 | getMatchStart(returnRegister); |
1438 | if (compileMode == IncludeSubpatterns) | |
1439 | store32(index, Address(output, 4)); | |
1440 | move(index, returnRegister2); | |
1441 | ||
14957cd0 A |
1442 | generateReturn(); |
1443 | ||
1444 | // This is the divide between the tail of the prior alternative, above, and | |
1445 | // the head of the subsequent alternative, below. | |
1446 | ||
1447 | if (op.m_op == OpBodyAlternativeNext) { | |
1448 | // This is the reentry point for the Next alternative. We expect any code | |
1449 | // that jumps here to do so with the input position matching that of the | |
1450 | // PRIOR alteranative, and we will only check input availability if we | |
1451 | // need to progress it forwards. | |
1452 | op.m_reentry = label(); | |
1453 | if (alternative->m_minimumSize > priorAlternative->m_minimumSize) { | |
1454 | add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index); | |
1455 | op.m_jumps.append(jumpIfNoAvailableInput()); | |
1456 | } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize) | |
1457 | sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index); | |
1458 | } else if (op.m_nextOp == notFound) { | |
1459 | // This is the reentry point for the End of 'once through' alternatives, | |
6fe7ccc8 | 1460 | // jumped to when the last alternative fails to match. |
14957cd0 A |
1461 | op.m_reentry = label(); |
1462 | sub32(Imm32(priorAlternative->m_minimumSize), index); | |
1463 | } | |
1464 | ||
1465 | if (op.m_op == OpBodyAlternativeNext) | |
1466 | m_checked += alternative->m_minimumSize; | |
1467 | m_checked -= priorAlternative->m_minimumSize; | |
1468 | break; | |
1469 | } | |
1470 | ||
1471 | // OpSimpleNestedAlternativeBegin/Next/End | |
1472 | // OpNestedAlternativeBegin/Next/End | |
1473 | // | |
1474 | // These nodes are used to handle sets of alternatives that are nested within | |
1475 | // subpatterns and parenthetical assertions. The 'simple' forms are used where | |
1476 | // we do not need to be able to backtrack back into any alternative other than | |
1477 | // the last, the normal forms allow backtracking into any alternative. | |
1478 | // | |
1479 | // Each Begin/Next node is responsible for planting an input check to ensure | |
1480 | // sufficient input is available on entry. Next nodes additionally need to | |
1481 | // jump to the end - Next nodes use the End node's m_jumps list to hold this | |
1482 | // set of jumps. | |
1483 | // | |
1484 | // In the non-simple forms, successful alternative matches must store a | |
1485 | // 'return address' using a DataLabelPtr, used to store the address to jump | |
1486 | // to when backtracking, to get to the code for the appropriate alternative. | |
1487 | case OpSimpleNestedAlternativeBegin: | |
1488 | case OpNestedAlternativeBegin: { | |
1489 | PatternTerm* term = op.m_term; | |
1490 | PatternAlternative* alternative = op.m_alternative; | |
1491 | PatternDisjunction* disjunction = term->parentheses.disjunction; | |
1492 | ||
1493 | // Calculate how much input we need to check for, and if non-zero check. | |
1494 | op.m_checkAdjust = alternative->m_minimumSize; | |
1495 | if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion)) | |
1496 | op.m_checkAdjust -= disjunction->m_minimumSize; | |
1497 | if (op.m_checkAdjust) | |
1498 | op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust)); | |
6fe7ccc8 | 1499 | |
14957cd0 A |
1500 | m_checked += op.m_checkAdjust; |
1501 | break; | |
1502 | } | |
1503 | case OpSimpleNestedAlternativeNext: | |
1504 | case OpNestedAlternativeNext: { | |
1505 | PatternTerm* term = op.m_term; | |
1506 | PatternAlternative* alternative = op.m_alternative; | |
1507 | PatternDisjunction* disjunction = term->parentheses.disjunction; | |
1508 | ||
1509 | // In the non-simple case, store a 'return address' so we can backtrack correctly. | |
1510 | if (op.m_op == OpNestedAlternativeNext) { | |
1511 | unsigned parenthesesFrameLocation = term->frameLocation; | |
1512 | unsigned alternativeFrameLocation = parenthesesFrameLocation; | |
1513 | if (term->quantityType != QuantifierFixedCount) | |
1514 | alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; | |
1515 | op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation); | |
1516 | } | |
1517 | ||
6fe7ccc8 A |
1518 | if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) { |
1519 | // If the previous alternative matched without consuming characters then | |
1520 | // backtrack to try to match while consumming some input. | |
1521 | op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); | |
1522 | } | |
1523 | ||
14957cd0 A |
1524 | // If we reach here then the last alternative has matched - jump to the |
1525 | // End node, to skip over any further alternatives. | |
1526 | // | |
1527 | // FIXME: this is logically O(N^2) (though N can be expected to be very | |
1528 | // small). We could avoid this either by adding an extra jump to the JIT | |
1529 | // data structures, or by making backtracking code that jumps to Next | |
1530 | // alternatives are responsible for checking that input is available (if | |
1531 | // we didn't need to plant the input checks, then m_jumps would be free). | |
1532 | YarrOp* endOp = &m_ops[op.m_nextOp]; | |
1533 | while (endOp->m_nextOp != notFound) { | |
1534 | ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext); | |
1535 | endOp = &m_ops[endOp->m_nextOp]; | |
1536 | } | |
1537 | ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd); | |
1538 | endOp->m_jumps.append(jump()); | |
1539 | ||
1540 | // This is the entry point for the next alternative. | |
1541 | op.m_reentry = label(); | |
1542 | ||
1543 | // Calculate how much input we need to check for, and if non-zero check. | |
1544 | op.m_checkAdjust = alternative->m_minimumSize; | |
1545 | if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion)) | |
1546 | op.m_checkAdjust -= disjunction->m_minimumSize; | |
1547 | if (op.m_checkAdjust) | |
1548 | op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust)); | |
1549 | ||
1550 | YarrOp& lastOp = m_ops[op.m_previousOp]; | |
1551 | m_checked -= lastOp.m_checkAdjust; | |
1552 | m_checked += op.m_checkAdjust; | |
1553 | break; | |
1554 | } | |
1555 | case OpSimpleNestedAlternativeEnd: | |
1556 | case OpNestedAlternativeEnd: { | |
1557 | PatternTerm* term = op.m_term; | |
1558 | ||
1559 | // In the non-simple case, store a 'return address' so we can backtrack correctly. | |
1560 | if (op.m_op == OpNestedAlternativeEnd) { | |
1561 | unsigned parenthesesFrameLocation = term->frameLocation; | |
1562 | unsigned alternativeFrameLocation = parenthesesFrameLocation; | |
1563 | if (term->quantityType != QuantifierFixedCount) | |
1564 | alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; | |
1565 | op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation); | |
1566 | } | |
1567 | ||
6fe7ccc8 A |
1568 | if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) { |
1569 | // If the previous alternative matched without consuming characters then | |
1570 | // backtrack to try to match while consumming some input. | |
1571 | op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); | |
1572 | } | |
1573 | ||
14957cd0 A |
1574 | // If this set of alternatives contains more than one alternative, |
1575 | // then the Next nodes will have planted jumps to the End, and added | |
1576 | // them to this node's m_jumps list. | |
1577 | op.m_jumps.link(this); | |
1578 | op.m_jumps.clear(); | |
1579 | ||
1580 | YarrOp& lastOp = m_ops[op.m_previousOp]; | |
1581 | m_checked -= lastOp.m_checkAdjust; | |
1582 | break; | |
1583 | } | |
1584 | ||
1585 | // OpParenthesesSubpatternOnceBegin/End | |
1586 | // | |
1587 | // These nodes support (optionally) capturing subpatterns, that have a | |
1588 | // quantity count of 1 (this covers fixed once, and ?/?? quantifiers). | |
1589 | case OpParenthesesSubpatternOnceBegin: { | |
1590 | PatternTerm* term = op.m_term; | |
1591 | unsigned parenthesesFrameLocation = term->frameLocation; | |
1592 | const RegisterID indexTemporary = regT0; | |
1593 | ASSERT(term->quantityCount == 1); | |
1594 | ||
1595 | // Upon entry to a Greedy quantified set of parenthese store the index. | |
1596 | // We'll use this for two purposes: | |
1597 | // - To indicate which iteration we are on of mathing the remainder of | |
1598 | // the expression after the parentheses - the first, including the | |
1599 | // match within the parentheses, or the second having skipped over them. | |
1600 | // - To check for empty matches, which must be rejected. | |
1601 | // | |
1602 | // At the head of a NonGreedy set of parentheses we'll immediately set the | |
1603 | // value on the stack to -1 (indicating a match skipping the subpattern), | |
1604 | // and plant a jump to the end. We'll also plant a label to backtrack to | |
1605 | // to reenter the subpattern later, with a store to set up index on the | |
1606 | // second iteration. | |
1607 | // | |
1608 | // FIXME: for capturing parens, could use the index in the capture array? | |
1609 | if (term->quantityType == QuantifierGreedy) | |
1610 | storeToFrame(index, parenthesesFrameLocation); | |
1611 | else if (term->quantityType == QuantifierNonGreedy) { | |
1612 | storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); | |
1613 | op.m_jumps.append(jump()); | |
1614 | op.m_reentry = label(); | |
1615 | storeToFrame(index, parenthesesFrameLocation); | |
1616 | } | |
1617 | ||
1618 | // If the parenthese are capturing, store the starting index value to the | |
1619 | // captures array, offsetting as necessary. | |
1620 | // | |
1621 | // FIXME: could avoid offsetting this value in JIT code, apply | |
1622 | // offsets only afterwards, at the point the results array is | |
1623 | // being accessed. | |
6fe7ccc8 | 1624 | if (term->capture() && compileMode == IncludeSubpatterns) { |
14957cd0 A |
1625 | int inputOffset = term->inputPosition - m_checked; |
1626 | if (term->quantityType == QuantifierFixedCount) | |
1627 | inputOffset -= term->parentheses.disjunction->m_minimumSize; | |
1628 | if (inputOffset) { | |
1629 | move(index, indexTemporary); | |
1630 | add32(Imm32(inputOffset), indexTemporary); | |
6fe7ccc8 | 1631 | setSubpatternStart(indexTemporary, term->parentheses.subpatternId); |
14957cd0 | 1632 | } else |
6fe7ccc8 | 1633 | setSubpatternStart(index, term->parentheses.subpatternId); |
14957cd0 A |
1634 | } |
1635 | break; | |
1636 | } | |
1637 | case OpParenthesesSubpatternOnceEnd: { | |
1638 | PatternTerm* term = op.m_term; | |
14957cd0 A |
1639 | const RegisterID indexTemporary = regT0; |
1640 | ASSERT(term->quantityCount == 1); | |
1641 | ||
6fe7ccc8 A |
1642 | // Runtime ASSERT to make sure that the nested alternative handled the |
1643 | // "no input consumed" check. | |
81345200 | 1644 | if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) { |
6fe7ccc8 A |
1645 | Jump pastBreakpoint; |
1646 | pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); | |
81345200 | 1647 | abortWithReason(YARRNoInputConsumed); |
6fe7ccc8 A |
1648 | pastBreakpoint.link(this); |
1649 | } | |
14957cd0 A |
1650 | |
1651 | // If the parenthese are capturing, store the ending index value to the | |
1652 | // captures array, offsetting as necessary. | |
1653 | // | |
1654 | // FIXME: could avoid offsetting this value in JIT code, apply | |
1655 | // offsets only afterwards, at the point the results array is | |
1656 | // being accessed. | |
6fe7ccc8 | 1657 | if (term->capture() && compileMode == IncludeSubpatterns) { |
14957cd0 A |
1658 | int inputOffset = term->inputPosition - m_checked; |
1659 | if (inputOffset) { | |
1660 | move(index, indexTemporary); | |
1661 | add32(Imm32(inputOffset), indexTemporary); | |
6fe7ccc8 | 1662 | setSubpatternEnd(indexTemporary, term->parentheses.subpatternId); |
14957cd0 | 1663 | } else |
6fe7ccc8 | 1664 | setSubpatternEnd(index, term->parentheses.subpatternId); |
14957cd0 A |
1665 | } |
1666 | ||
1667 | // If the parentheses are quantified Greedy then add a label to jump back | |
1668 | // to if get a failed match from after the parentheses. For NonGreedy | |
1669 | // parentheses, link the jump from before the subpattern to here. | |
1670 | if (term->quantityType == QuantifierGreedy) | |
1671 | op.m_reentry = label(); | |
1672 | else if (term->quantityType == QuantifierNonGreedy) { | |
1673 | YarrOp& beginOp = m_ops[op.m_previousOp]; | |
1674 | beginOp.m_jumps.link(this); | |
1675 | } | |
1676 | break; | |
1677 | } | |
1678 | ||
1679 | // OpParenthesesSubpatternTerminalBegin/End | |
1680 | case OpParenthesesSubpatternTerminalBegin: { | |
1681 | PatternTerm* term = op.m_term; | |
1682 | ASSERT(term->quantityType == QuantifierGreedy); | |
1683 | ASSERT(term->quantityCount == quantifyInfinite); | |
1684 | ASSERT(!term->capture()); | |
1685 | ||
1686 | // Upon entry set a label to loop back to. | |
1687 | op.m_reentry = label(); | |
1688 | ||
1689 | // Store the start index of the current match; we need to reject zero | |
1690 | // length matches. | |
1691 | storeToFrame(index, term->frameLocation); | |
1692 | break; | |
1693 | } | |
1694 | case OpParenthesesSubpatternTerminalEnd: { | |
6fe7ccc8 | 1695 | YarrOp& beginOp = m_ops[op.m_previousOp]; |
81345200 A |
1696 | if (!ASSERT_DISABLED) { |
1697 | PatternTerm* term = op.m_term; | |
1698 | ||
1699 | // Runtime ASSERT to make sure that the nested alternative handled the | |
1700 | // "no input consumed" check. | |
1701 | Jump pastBreakpoint; | |
1702 | pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); | |
1703 | abortWithReason(YARRNoInputConsumed); | |
1704 | pastBreakpoint.link(this); | |
1705 | } | |
14957cd0 | 1706 | |
6fe7ccc8 A |
1707 | // We know that the match is non-zero, we can accept it and |
1708 | // loop back up to the head of the subpattern. | |
1709 | jump(beginOp.m_reentry); | |
14957cd0 A |
1710 | |
1711 | // This is the entry point to jump to when we stop matching - we will | |
1712 | // do so once the subpattern cannot match any more. | |
1713 | op.m_reentry = label(); | |
1714 | break; | |
1715 | } | |
1716 | ||
1717 | // OpParentheticalAssertionBegin/End | |
1718 | case OpParentheticalAssertionBegin: { | |
1719 | PatternTerm* term = op.m_term; | |
1720 | ||
1721 | // Store the current index - assertions should not update index, so | |
1722 | // we will need to restore it upon a successful match. | |
1723 | unsigned parenthesesFrameLocation = term->frameLocation; | |
1724 | storeToFrame(index, parenthesesFrameLocation); | |
1725 | ||
1726 | // Check | |
1727 | op.m_checkAdjust = m_checked - term->inputPosition; | |
1728 | if (op.m_checkAdjust) | |
1729 | sub32(Imm32(op.m_checkAdjust), index); | |
1730 | ||
1731 | m_checked -= op.m_checkAdjust; | |
1732 | break; | |
1733 | } | |
1734 | case OpParentheticalAssertionEnd: { | |
1735 | PatternTerm* term = op.m_term; | |
1736 | ||
1737 | // Restore the input index value. | |
1738 | unsigned parenthesesFrameLocation = term->frameLocation; | |
1739 | loadFromFrame(parenthesesFrameLocation, index); | |
1740 | ||
1741 | // If inverted, a successful match of the assertion must be treated | |
1742 | // as a failure, so jump to backtracking. | |
1743 | if (term->invert()) { | |
1744 | op.m_jumps.append(jump()); | |
1745 | op.m_reentry = label(); | |
1746 | } | |
1747 | ||
1748 | YarrOp& lastOp = m_ops[op.m_previousOp]; | |
1749 | m_checked += lastOp.m_checkAdjust; | |
1750 | break; | |
1751 | } | |
1752 | ||
1753 | case OpMatchFailed: | |
6fe7ccc8 A |
1754 | removeCallFrame(); |
1755 | move(TrustedImmPtr((void*)WTF::notFound), returnRegister); | |
1756 | move(TrustedImm32(0), returnRegister2); | |
14957cd0 A |
1757 | generateReturn(); |
1758 | break; | |
1759 | } | |
1760 | ||
1761 | ++opIndex; | |
1762 | } while (opIndex < m_ops.size()); | |
1763 | } | |
1764 | ||
1765 | void backtrack() | |
1766 | { | |
1767 | // Backwards generate the backtracking code. | |
1768 | size_t opIndex = m_ops.size(); | |
1769 | ASSERT(opIndex); | |
1770 | ||
1771 | do { | |
1772 | --opIndex; | |
1773 | YarrOp& op = m_ops[opIndex]; | |
1774 | switch (op.m_op) { | |
1775 | ||
1776 | case OpTerm: | |
1777 | backtrackTerm(opIndex); | |
1778 | break; | |
1779 | ||
1780 | // OpBodyAlternativeBegin/Next/End | |
1781 | // | |
1782 | // For each Begin/Next node representing an alternative, we need to decide what to do | |
1783 | // in two circumstances: | |
1784 | // - If we backtrack back into this node, from within the alternative. | |
1785 | // - If the input check at the head of the alternative fails (if this exists). | |
1786 | // | |
1787 | // We treat these two cases differently since in the former case we have slightly | |
1788 | // more information - since we are backtracking out of a prior alternative we know | |
1789 | // that at least enough input was available to run it. For example, given the regular | |
1790 | // expression /a|b/, if we backtrack out of the first alternative (a failed pattern | |
1791 | // character match of 'a'), then we need not perform an additional input availability | |
1792 | // check before running the second alternative. | |
1793 | // | |
1794 | // Backtracking required differs for the last alternative, which in the case of the | |
1795 | // repeating set of alternatives must loop. The code generated for the last alternative | |
1796 | // will also be used to handle all input check failures from any prior alternatives - | |
1797 | // these require similar functionality, in seeking the next available alternative for | |
1798 | // which there is sufficient input. | |
1799 | // | |
1800 | // Since backtracking of all other alternatives simply requires us to link backtracks | |
1801 | // to the reentry point for the subsequent alternative, we will only be generating any | |
1802 | // code when backtracking the last alternative. | |
1803 | case OpBodyAlternativeBegin: | |
1804 | case OpBodyAlternativeNext: { | |
1805 | PatternAlternative* alternative = op.m_alternative; | |
1806 | ||
1807 | if (op.m_op == OpBodyAlternativeNext) { | |
1808 | PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; | |
1809 | m_checked += priorAlternative->m_minimumSize; | |
1810 | } | |
1811 | m_checked -= alternative->m_minimumSize; | |
1812 | ||
1813 | // Is this the last alternative? If not, then if we backtrack to this point we just | |
1814 | // need to jump to try to match the next alternative. | |
1815 | if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) { | |
1816 | m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this); | |
1817 | break; | |
1818 | } | |
1819 | YarrOp& endOp = m_ops[op.m_nextOp]; | |
1820 | ||
1821 | YarrOp* beginOp = &op; | |
1822 | while (beginOp->m_op != OpBodyAlternativeBegin) { | |
1823 | ASSERT(beginOp->m_op == OpBodyAlternativeNext); | |
1824 | beginOp = &m_ops[beginOp->m_previousOp]; | |
1825 | } | |
1826 | ||
1827 | bool onceThrough = endOp.m_nextOp == notFound; | |
1828 | ||
1829 | // First, generate code to handle cases where we backtrack out of an attempted match | |
1830 | // of the last alternative. If this is a 'once through' set of alternatives then we | |
1831 | // have nothing to do - link this straight through to the End. | |
1832 | if (onceThrough) | |
1833 | m_backtrackingState.linkTo(endOp.m_reentry, this); | |
1834 | else { | |
1835 | // If we don't need to move the input poistion, and the pattern has a fixed size | |
1836 | // (in which case we omit the store of the start index until the pattern has matched) | |
1837 | // then we can just link the backtrack out of the last alternative straight to the | |
1838 | // head of the first alternative. | |
1839 | if (m_pattern.m_body->m_hasFixedSize | |
1840 | && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) | |
1841 | && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1)) | |
1842 | m_backtrackingState.linkTo(beginOp->m_reentry, this); | |
1843 | else { | |
1844 | // We need to generate a trampoline of code to execute before looping back | |
1845 | // around to the first alternative. | |
1846 | m_backtrackingState.link(this); | |
1847 | ||
1848 | // If the pattern size is not fixed, then store the start index, for use if we match. | |
1849 | if (!m_pattern.m_body->m_hasFixedSize) { | |
1850 | if (alternative->m_minimumSize == 1) | |
6fe7ccc8 | 1851 | setMatchStart(index); |
14957cd0 A |
1852 | else { |
1853 | move(index, regT0); | |
1854 | if (alternative->m_minimumSize) | |
1855 | sub32(Imm32(alternative->m_minimumSize - 1), regT0); | |
1856 | else | |
6fe7ccc8 A |
1857 | add32(TrustedImm32(1), regT0); |
1858 | setMatchStart(regT0); | |
14957cd0 A |
1859 | } |
1860 | } | |
1861 | ||
1862 | // Generate code to loop. Check whether the last alternative is longer than the | |
1863 | // first (e.g. /a|xy/ or /a|xyz/). | |
1864 | if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) { | |
1865 | // We want to loop, and increment input position. If the delta is 1, it is | |
1866 | // already correctly incremented, if more than one then decrement as appropriate. | |
1867 | unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize; | |
1868 | ASSERT(delta); | |
1869 | if (delta != 1) | |
1870 | sub32(Imm32(delta - 1), index); | |
1871 | jump(beginOp->m_reentry); | |
1872 | } else { | |
1873 | // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot | |
1874 | // be sufficent input available to handle this, so just fall through. | |
1875 | unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize; | |
1876 | if (delta != 0xFFFFFFFFu) { | |
1877 | // We need to check input because we are incrementing the input. | |
1878 | add32(Imm32(delta + 1), index); | |
1879 | checkInput().linkTo(beginOp->m_reentry, this); | |
1880 | } | |
1881 | } | |
1882 | } | |
1883 | } | |
1884 | ||
1885 | // We can reach this point in the code in two ways: | |
1886 | // - Fallthrough from the code above (a repeating alternative backtracked out of its | |
1887 | // last alternative, and did not have sufficent input to run the first). | |
1888 | // - We will loop back up to the following label when a releating alternative loops, | |
1889 | // following a failed input check. | |
1890 | // | |
1891 | // Either way, we have just failed the input check for the first alternative. | |
1892 | Label firstInputCheckFailed(this); | |
1893 | ||
1894 | // Generate code to handle input check failures from alternatives except the last. | |
1895 | // prevOp is the alternative we're handling a bail out from (initially Begin), and | |
1896 | // nextOp is the alternative we will be attempting to reenter into. | |
1897 | // | |
1898 | // We will link input check failures from the forwards matching path back to the code | |
1899 | // that can handle them. | |
1900 | YarrOp* prevOp = beginOp; | |
1901 | YarrOp* nextOp = &m_ops[beginOp->m_nextOp]; | |
1902 | while (nextOp->m_op != OpBodyAlternativeEnd) { | |
1903 | prevOp->m_jumps.link(this); | |
1904 | ||
1905 | // We only get here if an input check fails, it is only worth checking again | |
1906 | // if the next alternative has a minimum size less than the last. | |
1907 | if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) { | |
1908 | // FIXME: if we added an extra label to YarrOp, we could avoid needing to | |
1909 | // subtract delta back out, and reduce this code. Should performance test | |
1910 | // the benefit of this. | |
1911 | unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize; | |
1912 | sub32(Imm32(delta), index); | |
1913 | Jump fail = jumpIfNoAvailableInput(); | |
1914 | add32(Imm32(delta), index); | |
1915 | jump(nextOp->m_reentry); | |
1916 | fail.link(this); | |
1917 | } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize) | |
1918 | add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index); | |
1919 | prevOp = nextOp; | |
1920 | nextOp = &m_ops[nextOp->m_nextOp]; | |
1921 | } | |
1922 | ||
1923 | // We fall through to here if there is insufficient input to run the last alternative. | |
1924 | ||
1925 | // If there is insufficient input to run the last alternative, then for 'once through' | |
1926 | // alternatives we are done - just jump back up into the forwards matching path at the End. | |
1927 | if (onceThrough) { | |
1928 | op.m_jumps.linkTo(endOp.m_reentry, this); | |
1929 | jump(endOp.m_reentry); | |
1930 | break; | |
1931 | } | |
1932 | ||
1933 | // For repeating alternatives, link any input check failure from the last alternative to | |
1934 | // this point. | |
1935 | op.m_jumps.link(this); | |
1936 | ||
1937 | bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize; | |
1938 | ||
1939 | // Check for cases where input position is already incremented by 1 for the last | |
1940 | // alternative (this is particularly useful where the minimum size of the body | |
1941 | // disjunction is 0, e.g. /a*|b/). | |
1942 | if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) { | |
1943 | // index is already incremented by 1, so just store it now! | |
6fe7ccc8 | 1944 | setMatchStart(index); |
14957cd0 A |
1945 | needsToUpdateMatchStart = false; |
1946 | } | |
1947 | ||
1948 | // Check whether there is sufficient input to loop. Increment the input position by | |
1949 | // one, and check. Also add in the minimum disjunction size before checking - there | |
1950 | // is no point in looping if we're just going to fail all the input checks around | |
1951 | // the next iteration. | |
1952 | ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize); | |
1953 | if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) { | |
1954 | // If the last alternative had the same minimum size as the disjunction, | |
1955 | // just simply increment input pos by 1, no adjustment based on minimum size. | |
6fe7ccc8 | 1956 | add32(TrustedImm32(1), index); |
14957cd0 A |
1957 | } else { |
1958 | // If the minumum for the last alternative was one greater than than that | |
1959 | // for the disjunction, we're already progressed by 1, nothing to do! | |
1960 | unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1; | |
1961 | if (delta) | |
1962 | sub32(Imm32(delta), index); | |
1963 | } | |
1964 | Jump matchFailed = jumpIfNoAvailableInput(); | |
1965 | ||
1966 | if (needsToUpdateMatchStart) { | |
1967 | if (!m_pattern.m_body->m_minimumSize) | |
6fe7ccc8 | 1968 | setMatchStart(index); |
14957cd0 A |
1969 | else { |
1970 | move(index, regT0); | |
1971 | sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0); | |
6fe7ccc8 | 1972 | setMatchStart(regT0); |
14957cd0 A |
1973 | } |
1974 | } | |
1975 | ||
1976 | // Calculate how much more input the first alternative requires than the minimum | |
1977 | // for the body as a whole. If no more is needed then we dont need an additional | |
1978 | // input check here - jump straight back up to the start of the first alternative. | |
1979 | if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) | |
1980 | jump(beginOp->m_reentry); | |
1981 | else { | |
1982 | if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize) | |
1983 | add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index); | |
1984 | else | |
1985 | sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index); | |
1986 | checkInput().linkTo(beginOp->m_reentry, this); | |
1987 | jump(firstInputCheckFailed); | |
1988 | } | |
1989 | ||
1990 | // We jump to here if we iterate to the point that there is insufficient input to | |
1991 | // run any matches, and need to return a failure state from JIT code. | |
1992 | matchFailed.link(this); | |
1993 | ||
6fe7ccc8 A |
1994 | removeCallFrame(); |
1995 | move(TrustedImmPtr((void*)WTF::notFound), returnRegister); | |
1996 | move(TrustedImm32(0), returnRegister2); | |
14957cd0 A |
1997 | generateReturn(); |
1998 | break; | |
1999 | } | |
2000 | case OpBodyAlternativeEnd: { | |
2001 | // We should never backtrack back into a body disjunction. | |
2002 | ASSERT(m_backtrackingState.isEmpty()); | |
2003 | ||
2004 | PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; | |
2005 | m_checked += priorAlternative->m_minimumSize; | |
2006 | break; | |
2007 | } | |
2008 | ||
2009 | // OpSimpleNestedAlternativeBegin/Next/End | |
2010 | // OpNestedAlternativeBegin/Next/End | |
2011 | // | |
2012 | // Generate code for when we backtrack back out of an alternative into | |
2013 | // a Begin or Next node, or when the entry input count check fails. If | |
2014 | // there are more alternatives we need to jump to the next alternative, | |
2015 | // if not we backtrack back out of the current set of parentheses. | |
2016 | // | |
2017 | // In the case of non-simple nested assertions we need to also link the | |
2018 | // 'return address' appropriately to backtrack back out into the correct | |
2019 | // alternative. | |
2020 | case OpSimpleNestedAlternativeBegin: | |
2021 | case OpSimpleNestedAlternativeNext: | |
2022 | case OpNestedAlternativeBegin: | |
2023 | case OpNestedAlternativeNext: { | |
2024 | YarrOp& nextOp = m_ops[op.m_nextOp]; | |
2025 | bool isBegin = op.m_previousOp == notFound; | |
2026 | bool isLastAlternative = nextOp.m_nextOp == notFound; | |
2027 | ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin)); | |
2028 | ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd)); | |
2029 | ||
2030 | // Treat an input check failure the same as a failed match. | |
2031 | m_backtrackingState.append(op.m_jumps); | |
2032 | ||
2033 | // Set the backtracks to jump to the appropriate place. We may need | |
2034 | // to link the backtracks in one of three different way depending on | |
2035 | // the type of alternative we are dealing with: | |
2036 | // - A single alternative, with no simplings. | |
2037 | // - The last alternative of a set of two or more. | |
2038 | // - An alternative other than the last of a set of two or more. | |
2039 | // | |
2040 | // In the case of a single alternative on its own, we don't need to | |
2041 | // jump anywhere - if the alternative fails to match we can just | |
2042 | // continue to backtrack out of the parentheses without jumping. | |
2043 | // | |
2044 | // In the case of the last alternative in a set of more than one, we | |
2045 | // need to jump to return back out to the beginning. We'll do so by | |
2046 | // adding a jump to the End node's m_jumps list, and linking this | |
2047 | // when we come to generate the Begin node. For alternatives other | |
2048 | // than the last, we need to jump to the next alternative. | |
2049 | // | |
2050 | // If the alternative had adjusted the input position we must link | |
2051 | // backtracking to here, correct, and then jump on. If not we can | |
2052 | // link the backtracks directly to their destination. | |
2053 | if (op.m_checkAdjust) { | |
2054 | // Handle the cases where we need to link the backtracks here. | |
2055 | m_backtrackingState.link(this); | |
2056 | sub32(Imm32(op.m_checkAdjust), index); | |
2057 | if (!isLastAlternative) { | |
2058 | // An alternative that is not the last should jump to its successor. | |
2059 | jump(nextOp.m_reentry); | |
2060 | } else if (!isBegin) { | |
6fe7ccc8 | 2061 | // The last of more than one alternatives must jump back to the beginning. |
14957cd0 A |
2062 | nextOp.m_jumps.append(jump()); |
2063 | } else { | |
2064 | // A single alternative on its own can fall through. | |
2065 | m_backtrackingState.fallthrough(); | |
2066 | } | |
2067 | } else { | |
2068 | // Handle the cases where we can link the backtracks directly to their destinations. | |
2069 | if (!isLastAlternative) { | |
2070 | // An alternative that is not the last should jump to its successor. | |
2071 | m_backtrackingState.linkTo(nextOp.m_reentry, this); | |
2072 | } else if (!isBegin) { | |
6fe7ccc8 | 2073 | // The last of more than one alternatives must jump back to the beginning. |
14957cd0 A |
2074 | m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this); |
2075 | } | |
2076 | // In the case of a single alternative on its own do nothing - it can fall through. | |
2077 | } | |
2078 | ||
6fe7ccc8 A |
2079 | // If there is a backtrack jump from a zero length match link it here. |
2080 | if (op.m_zeroLengthMatch.isSet()) | |
2081 | m_backtrackingState.append(op.m_zeroLengthMatch); | |
2082 | ||
14957cd0 A |
2083 | // At this point we've handled the backtracking back into this node. |
2084 | // Now link any backtracks that need to jump to here. | |
2085 | ||
2086 | // For non-simple alternatives, link the alternative's 'return address' | |
2087 | // so that we backtrack back out into the previous alternative. | |
2088 | if (op.m_op == OpNestedAlternativeNext) | |
2089 | m_backtrackingState.append(op.m_returnAddress); | |
2090 | ||
2091 | // If there is more than one alternative, then the last alternative will | |
2092 | // have planted a jump to be linked to the end. This jump was added to the | |
2093 | // End node's m_jumps list. If we are back at the beginning, link it here. | |
2094 | if (isBegin) { | |
2095 | YarrOp* endOp = &m_ops[op.m_nextOp]; | |
2096 | while (endOp->m_nextOp != notFound) { | |
2097 | ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext); | |
2098 | endOp = &m_ops[endOp->m_nextOp]; | |
2099 | } | |
2100 | ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd); | |
2101 | m_backtrackingState.append(endOp->m_jumps); | |
2102 | } | |
2103 | ||
2104 | if (!isBegin) { | |
2105 | YarrOp& lastOp = m_ops[op.m_previousOp]; | |
2106 | m_checked += lastOp.m_checkAdjust; | |
2107 | } | |
2108 | m_checked -= op.m_checkAdjust; | |
2109 | break; | |
2110 | } | |
2111 | case OpSimpleNestedAlternativeEnd: | |
2112 | case OpNestedAlternativeEnd: { | |
2113 | PatternTerm* term = op.m_term; | |
2114 | ||
6fe7ccc8 A |
2115 | // If there is a backtrack jump from a zero length match link it here. |
2116 | if (op.m_zeroLengthMatch.isSet()) | |
2117 | m_backtrackingState.append(op.m_zeroLengthMatch); | |
2118 | ||
14957cd0 A |
2119 | // If we backtrack into the end of a simple subpattern do nothing; |
2120 | // just continue through into the last alternative. If we backtrack | |
2121 | // into the end of a non-simple set of alterntives we need to jump | |
2122 | // to the backtracking return address set up during generation. | |
2123 | if (op.m_op == OpNestedAlternativeEnd) { | |
2124 | m_backtrackingState.link(this); | |
2125 | ||
2126 | // Plant a jump to the return address. | |
2127 | unsigned parenthesesFrameLocation = term->frameLocation; | |
2128 | unsigned alternativeFrameLocation = parenthesesFrameLocation; | |
2129 | if (term->quantityType != QuantifierFixedCount) | |
2130 | alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; | |
2131 | loadFromFrameAndJump(alternativeFrameLocation); | |
2132 | ||
2133 | // Link the DataLabelPtr associated with the end of the last | |
2134 | // alternative to this point. | |
2135 | m_backtrackingState.append(op.m_returnAddress); | |
2136 | } | |
2137 | ||
2138 | YarrOp& lastOp = m_ops[op.m_previousOp]; | |
2139 | m_checked += lastOp.m_checkAdjust; | |
2140 | break; | |
2141 | } | |
2142 | ||
2143 | // OpParenthesesSubpatternOnceBegin/End | |
2144 | // | |
2145 | // When we are backtracking back out of a capturing subpattern we need | |
2146 | // to clear the start index in the matches output array, to record that | |
2147 | // this subpattern has not been captured. | |
2148 | // | |
2149 | // When backtracking back out of a Greedy quantified subpattern we need | |
2150 | // to catch this, and try running the remainder of the alternative after | |
2151 | // the subpattern again, skipping the parentheses. | |
2152 | // | |
2153 | // Upon backtracking back into a quantified set of parentheses we need to | |
2154 | // check whether we were currently skipping the subpattern. If not, we | |
2155 | // can backtrack into them, if we were we need to either backtrack back | |
2156 | // out of the start of the parentheses, or jump back to the forwards | |
2157 | // matching start, depending of whether the match is Greedy or NonGreedy. | |
2158 | case OpParenthesesSubpatternOnceBegin: { | |
2159 | PatternTerm* term = op.m_term; | |
2160 | ASSERT(term->quantityCount == 1); | |
2161 | ||
2162 | // We only need to backtrack to thispoint if capturing or greedy. | |
6fe7ccc8 | 2163 | if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) { |
14957cd0 A |
2164 | m_backtrackingState.link(this); |
2165 | ||
2166 | // If capturing, clear the capture (we only need to reset start). | |
6fe7ccc8 A |
2167 | if (term->capture() && compileMode == IncludeSubpatterns) |
2168 | clearSubpatternStart(term->parentheses.subpatternId); | |
14957cd0 A |
2169 | |
2170 | // If Greedy, jump to the end. | |
2171 | if (term->quantityType == QuantifierGreedy) { | |
2172 | // Clear the flag in the stackframe indicating we ran through the subpattern. | |
2173 | unsigned parenthesesFrameLocation = term->frameLocation; | |
2174 | storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); | |
2175 | // Jump to after the parentheses, skipping the subpattern. | |
2176 | jump(m_ops[op.m_nextOp].m_reentry); | |
2177 | // A backtrack from after the parentheses, when skipping the subpattern, | |
2178 | // will jump back to here. | |
2179 | op.m_jumps.link(this); | |
2180 | } | |
2181 | ||
2182 | m_backtrackingState.fallthrough(); | |
2183 | } | |
2184 | break; | |
2185 | } | |
2186 | case OpParenthesesSubpatternOnceEnd: { | |
2187 | PatternTerm* term = op.m_term; | |
2188 | ||
2189 | if (term->quantityType != QuantifierFixedCount) { | |
2190 | m_backtrackingState.link(this); | |
2191 | ||
2192 | // Check whether we should backtrack back into the parentheses, or if we | |
2193 | // are currently in a state where we had skipped over the subpattern | |
2194 | // (in which case the flag value on the stack will be -1). | |
2195 | unsigned parenthesesFrameLocation = term->frameLocation; | |
2196 | Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1)); | |
2197 | ||
2198 | if (term->quantityType == QuantifierGreedy) { | |
2199 | // For Greedy parentheses, we skip after having already tried going | |
2200 | // through the subpattern, so if we get here we're done. | |
2201 | YarrOp& beginOp = m_ops[op.m_previousOp]; | |
2202 | beginOp.m_jumps.append(hadSkipped); | |
2203 | } else { | |
2204 | // For NonGreedy parentheses, we try skipping the subpattern first, | |
2205 | // so if we get here we need to try running through the subpattern | |
2206 | // next. Jump back to the start of the parentheses in the forwards | |
2207 | // matching path. | |
2208 | ASSERT(term->quantityType == QuantifierNonGreedy); | |
2209 | YarrOp& beginOp = m_ops[op.m_previousOp]; | |
2210 | hadSkipped.linkTo(beginOp.m_reentry, this); | |
2211 | } | |
2212 | ||
2213 | m_backtrackingState.fallthrough(); | |
2214 | } | |
2215 | ||
2216 | m_backtrackingState.append(op.m_jumps); | |
2217 | break; | |
2218 | } | |
2219 | ||
2220 | // OpParenthesesSubpatternTerminalBegin/End | |
2221 | // | |
2222 | // Terminal subpatterns will always match - there is nothing after them to | |
2223 | // force a backtrack, and they have a minimum count of 0, and as such will | |
2224 | // always produce an acceptable result. | |
2225 | case OpParenthesesSubpatternTerminalBegin: { | |
2226 | // We will backtrack to this point once the subpattern cannot match any | |
2227 | // more. Since no match is accepted as a successful match (we are Greedy | |
2228 | // quantified with a minimum of zero) jump back to the forwards matching | |
2229 | // path at the end. | |
2230 | YarrOp& endOp = m_ops[op.m_nextOp]; | |
2231 | m_backtrackingState.linkTo(endOp.m_reentry, this); | |
2232 | break; | |
2233 | } | |
2234 | case OpParenthesesSubpatternTerminalEnd: | |
2235 | // We should never be backtracking to here (hence the 'terminal' in the name). | |
2236 | ASSERT(m_backtrackingState.isEmpty()); | |
2237 | m_backtrackingState.append(op.m_jumps); | |
2238 | break; | |
2239 | ||
2240 | // OpParentheticalAssertionBegin/End | |
2241 | case OpParentheticalAssertionBegin: { | |
2242 | PatternTerm* term = op.m_term; | |
2243 | YarrOp& endOp = m_ops[op.m_nextOp]; | |
2244 | ||
2245 | // We need to handle the backtracks upon backtracking back out | |
2246 | // of a parenthetical assertion if either we need to correct | |
2247 | // the input index, or the assertion was inverted. | |
2248 | if (op.m_checkAdjust || term->invert()) { | |
2249 | m_backtrackingState.link(this); | |
2250 | ||
2251 | if (op.m_checkAdjust) | |
2252 | add32(Imm32(op.m_checkAdjust), index); | |
2253 | ||
2254 | // In an inverted assertion failure to match the subpattern | |
2255 | // is treated as a successful match - jump to the end of the | |
2256 | // subpattern. We already have adjusted the input position | |
2257 | // back to that before the assertion, which is correct. | |
2258 | if (term->invert()) | |
2259 | jump(endOp.m_reentry); | |
2260 | ||
2261 | m_backtrackingState.fallthrough(); | |
2262 | } | |
2263 | ||
2264 | // The End node's jump list will contain any backtracks into | |
2265 | // the end of the assertion. Also, if inverted, we will have | |
2266 | // added the failure caused by a successful match to this. | |
2267 | m_backtrackingState.append(endOp.m_jumps); | |
2268 | ||
2269 | m_checked += op.m_checkAdjust; | |
2270 | break; | |
2271 | } | |
2272 | case OpParentheticalAssertionEnd: { | |
2273 | // FIXME: We should really be clearing any nested subpattern | |
2274 | // matches on bailing out from after the pattern. Firefox has | |
2275 | // this bug too (presumably because they use YARR!) | |
2276 | ||
2277 | // Never backtrack into an assertion; later failures bail to before the begin. | |
2278 | m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this); | |
2279 | ||
2280 | YarrOp& lastOp = m_ops[op.m_previousOp]; | |
2281 | m_checked -= lastOp.m_checkAdjust; | |
2282 | break; | |
2283 | } | |
2284 | ||
2285 | case OpMatchFailed: | |
2286 | break; | |
2287 | } | |
2288 | ||
2289 | } while (opIndex); | |
2290 | } | |
2291 | ||
2292 | // Compilation methods: | |
2293 | // ==================== | |
2294 | ||
2295 | // opCompileParenthesesSubpattern | |
2296 | // Emits ops for a subpattern (set of parentheses). These consist | |
2297 | // of a set of alternatives wrapped in an outer set of nodes for | |
2298 | // the parentheses. | |
2299 | // Supported types of parentheses are 'Once' (quantityCount == 1) | |
2300 | // and 'Terminal' (non-capturing parentheses quantified as greedy | |
2301 | // and infinite). | |
2302 | // Alternatives will use the 'Simple' set of ops if either the | |
2303 | // subpattern is terminal (in which case we will never need to | |
2304 | // backtrack), or if the subpattern only contains one alternative. | |
2305 | void opCompileParenthesesSubpattern(PatternTerm* term) | |
2306 | { | |
2307 | YarrOpCode parenthesesBeginOpCode; | |
2308 | YarrOpCode parenthesesEndOpCode; | |
2309 | YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin; | |
2310 | YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext; | |
2311 | YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd; | |
2312 | ||
2313 | // We can currently only compile quantity 1 subpatterns that are | |
2314 | // not copies. We generate a copy in the case of a range quantifier, | |
2315 | // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to | |
2316 | // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem | |
2317 | // comes where the subpattern is capturing, in which case we would | |
2318 | // need to restore the capture from the first subpattern upon a | |
2319 | // failure in the second. | |
2320 | if (term->quantityCount == 1 && !term->parentheses.isCopy) { | |
2321 | // Select the 'Once' nodes. | |
2322 | parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin; | |
2323 | parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd; | |
2324 | ||
2325 | // If there is more than one alternative we cannot use the 'simple' nodes. | |
2326 | if (term->parentheses.disjunction->m_alternatives.size() != 1) { | |
2327 | alternativeBeginOpCode = OpNestedAlternativeBegin; | |
2328 | alternativeNextOpCode = OpNestedAlternativeNext; | |
2329 | alternativeEndOpCode = OpNestedAlternativeEnd; | |
2330 | } | |
2331 | } else if (term->parentheses.isTerminal) { | |
2332 | // Select the 'Terminal' nodes. | |
2333 | parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin; | |
2334 | parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd; | |
2335 | } else { | |
2336 | // This subpattern is not supported by the JIT. | |
2337 | m_shouldFallBack = true; | |
2338 | return; | |
2339 | } | |
2340 | ||
2341 | size_t parenBegin = m_ops.size(); | |
2342 | m_ops.append(parenthesesBeginOpCode); | |
2343 | ||
2344 | m_ops.append(alternativeBeginOpCode); | |
2345 | m_ops.last().m_previousOp = notFound; | |
2346 | m_ops.last().m_term = term; | |
ed1e77d3 | 2347 | Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives; |
14957cd0 A |
2348 | for (unsigned i = 0; i < alternatives.size(); ++i) { |
2349 | size_t lastOpIndex = m_ops.size() - 1; | |
2350 | ||
93a37866 | 2351 | PatternAlternative* nestedAlternative = alternatives[i].get(); |
14957cd0 A |
2352 | opCompileAlternative(nestedAlternative); |
2353 | ||
2354 | size_t thisOpIndex = m_ops.size(); | |
2355 | m_ops.append(YarrOp(alternativeNextOpCode)); | |
2356 | ||
2357 | YarrOp& lastOp = m_ops[lastOpIndex]; | |
2358 | YarrOp& thisOp = m_ops[thisOpIndex]; | |
2359 | ||
2360 | lastOp.m_alternative = nestedAlternative; | |
2361 | lastOp.m_nextOp = thisOpIndex; | |
2362 | thisOp.m_previousOp = lastOpIndex; | |
2363 | thisOp.m_term = term; | |
2364 | } | |
2365 | YarrOp& lastOp = m_ops.last(); | |
2366 | ASSERT(lastOp.m_op == alternativeNextOpCode); | |
2367 | lastOp.m_op = alternativeEndOpCode; | |
2368 | lastOp.m_alternative = 0; | |
2369 | lastOp.m_nextOp = notFound; | |
2370 | ||
2371 | size_t parenEnd = m_ops.size(); | |
2372 | m_ops.append(parenthesesEndOpCode); | |
2373 | ||
2374 | m_ops[parenBegin].m_term = term; | |
2375 | m_ops[parenBegin].m_previousOp = notFound; | |
2376 | m_ops[parenBegin].m_nextOp = parenEnd; | |
2377 | m_ops[parenEnd].m_term = term; | |
2378 | m_ops[parenEnd].m_previousOp = parenBegin; | |
2379 | m_ops[parenEnd].m_nextOp = notFound; | |
2380 | } | |
2381 | ||
2382 | // opCompileParentheticalAssertion | |
2383 | // Emits ops for a parenthetical assertion. These consist of an | |
2384 | // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping | |
2385 | // the alternatives, with these wrapped by an outer pair of | |
2386 | // OpParentheticalAssertionBegin/End nodes. | |
2387 | // We can always use the OpSimpleNestedAlternative nodes in the | |
2388 | // case of parenthetical assertions since these only ever match | |
2389 | // once, and will never backtrack back into the assertion. | |
2390 | void opCompileParentheticalAssertion(PatternTerm* term) | |
2391 | { | |
2392 | size_t parenBegin = m_ops.size(); | |
2393 | m_ops.append(OpParentheticalAssertionBegin); | |
2394 | ||
2395 | m_ops.append(OpSimpleNestedAlternativeBegin); | |
2396 | m_ops.last().m_previousOp = notFound; | |
2397 | m_ops.last().m_term = term; | |
ed1e77d3 | 2398 | Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives; |
14957cd0 A |
2399 | for (unsigned i = 0; i < alternatives.size(); ++i) { |
2400 | size_t lastOpIndex = m_ops.size() - 1; | |
2401 | ||
93a37866 | 2402 | PatternAlternative* nestedAlternative = alternatives[i].get(); |
14957cd0 A |
2403 | opCompileAlternative(nestedAlternative); |
2404 | ||
2405 | size_t thisOpIndex = m_ops.size(); | |
2406 | m_ops.append(YarrOp(OpSimpleNestedAlternativeNext)); | |
2407 | ||
2408 | YarrOp& lastOp = m_ops[lastOpIndex]; | |
2409 | YarrOp& thisOp = m_ops[thisOpIndex]; | |
2410 | ||
2411 | lastOp.m_alternative = nestedAlternative; | |
2412 | lastOp.m_nextOp = thisOpIndex; | |
2413 | thisOp.m_previousOp = lastOpIndex; | |
2414 | thisOp.m_term = term; | |
2415 | } | |
2416 | YarrOp& lastOp = m_ops.last(); | |
2417 | ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext); | |
2418 | lastOp.m_op = OpSimpleNestedAlternativeEnd; | |
2419 | lastOp.m_alternative = 0; | |
2420 | lastOp.m_nextOp = notFound; | |
2421 | ||
2422 | size_t parenEnd = m_ops.size(); | |
2423 | m_ops.append(OpParentheticalAssertionEnd); | |
2424 | ||
2425 | m_ops[parenBegin].m_term = term; | |
2426 | m_ops[parenBegin].m_previousOp = notFound; | |
2427 | m_ops[parenBegin].m_nextOp = parenEnd; | |
2428 | m_ops[parenEnd].m_term = term; | |
2429 | m_ops[parenEnd].m_previousOp = parenBegin; | |
2430 | m_ops[parenEnd].m_nextOp = notFound; | |
2431 | } | |
2432 | ||
2433 | // opCompileAlternative | |
2434 | // Called to emit nodes for all terms in an alternative. | |
2435 | void opCompileAlternative(PatternAlternative* alternative) | |
2436 | { | |
2437 | optimizeAlternative(alternative); | |
2438 | ||
2439 | for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { | |
2440 | PatternTerm* term = &alternative->m_terms[i]; | |
2441 | ||
2442 | switch (term->type) { | |
2443 | case PatternTerm::TypeParenthesesSubpattern: | |
2444 | opCompileParenthesesSubpattern(term); | |
2445 | break; | |
2446 | ||
2447 | case PatternTerm::TypeParentheticalAssertion: | |
2448 | opCompileParentheticalAssertion(term); | |
2449 | break; | |
2450 | ||
2451 | default: | |
2452 | m_ops.append(term); | |
2453 | } | |
2454 | } | |
2455 | } | |
2456 | ||
2457 | // opCompileBody | |
2458 | // This method compiles the body disjunction of the regular expression. | |
2459 | // The body consists of two sets of alternatives - zero or more 'once | |
2460 | // through' (BOL anchored) alternatives, followed by zero or more | |
2461 | // repeated alternatives. | |
2462 | // For each of these two sets of alteratives, if not empty they will be | |
2463 | // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the | |
2464 | // 'begin' node referencing the first alternative, and 'next' nodes | |
2465 | // referencing any further alternatives. The begin/next/end nodes are | |
2466 | // linked together in a doubly linked list. In the case of repeating | |
2467 | // alternatives, the end node is also linked back to the beginning. | |
2468 | // If no repeating alternatives exist, then a OpMatchFailed node exists | |
2469 | // to return the failing result. | |
2470 | void opCompileBody(PatternDisjunction* disjunction) | |
2471 | { | |
ed1e77d3 | 2472 | Vector<std::unique_ptr<PatternAlternative>>& alternatives = disjunction->m_alternatives; |
14957cd0 A |
2473 | size_t currentAlternativeIndex = 0; |
2474 | ||
2475 | // Emit the 'once through' alternatives. | |
2476 | if (alternatives.size() && alternatives[0]->onceThrough()) { | |
2477 | m_ops.append(YarrOp(OpBodyAlternativeBegin)); | |
2478 | m_ops.last().m_previousOp = notFound; | |
2479 | ||
2480 | do { | |
2481 | size_t lastOpIndex = m_ops.size() - 1; | |
93a37866 | 2482 | PatternAlternative* alternative = alternatives[currentAlternativeIndex].get(); |
14957cd0 A |
2483 | opCompileAlternative(alternative); |
2484 | ||
2485 | size_t thisOpIndex = m_ops.size(); | |
2486 | m_ops.append(YarrOp(OpBodyAlternativeNext)); | |
2487 | ||
2488 | YarrOp& lastOp = m_ops[lastOpIndex]; | |
2489 | YarrOp& thisOp = m_ops[thisOpIndex]; | |
2490 | ||
2491 | lastOp.m_alternative = alternative; | |
2492 | lastOp.m_nextOp = thisOpIndex; | |
2493 | thisOp.m_previousOp = lastOpIndex; | |
2494 | ||
2495 | ++currentAlternativeIndex; | |
2496 | } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough()); | |
2497 | ||
2498 | YarrOp& lastOp = m_ops.last(); | |
2499 | ||
2500 | ASSERT(lastOp.m_op == OpBodyAlternativeNext); | |
2501 | lastOp.m_op = OpBodyAlternativeEnd; | |
2502 | lastOp.m_alternative = 0; | |
2503 | lastOp.m_nextOp = notFound; | |
2504 | } | |
2505 | ||
2506 | if (currentAlternativeIndex == alternatives.size()) { | |
2507 | m_ops.append(YarrOp(OpMatchFailed)); | |
2508 | return; | |
2509 | } | |
2510 | ||
2511 | // Emit the repeated alternatives. | |
2512 | size_t repeatLoop = m_ops.size(); | |
2513 | m_ops.append(YarrOp(OpBodyAlternativeBegin)); | |
2514 | m_ops.last().m_previousOp = notFound; | |
2515 | do { | |
2516 | size_t lastOpIndex = m_ops.size() - 1; | |
93a37866 | 2517 | PatternAlternative* alternative = alternatives[currentAlternativeIndex].get(); |
14957cd0 A |
2518 | ASSERT(!alternative->onceThrough()); |
2519 | opCompileAlternative(alternative); | |
2520 | ||
2521 | size_t thisOpIndex = m_ops.size(); | |
2522 | m_ops.append(YarrOp(OpBodyAlternativeNext)); | |
2523 | ||
2524 | YarrOp& lastOp = m_ops[lastOpIndex]; | |
2525 | YarrOp& thisOp = m_ops[thisOpIndex]; | |
2526 | ||
2527 | lastOp.m_alternative = alternative; | |
2528 | lastOp.m_nextOp = thisOpIndex; | |
2529 | thisOp.m_previousOp = lastOpIndex; | |
2530 | ||
2531 | ++currentAlternativeIndex; | |
2532 | } while (currentAlternativeIndex < alternatives.size()); | |
2533 | YarrOp& lastOp = m_ops.last(); | |
2534 | ASSERT(lastOp.m_op == OpBodyAlternativeNext); | |
2535 | lastOp.m_op = OpBodyAlternativeEnd; | |
2536 | lastOp.m_alternative = 0; | |
2537 | lastOp.m_nextOp = repeatLoop; | |
2538 | } | |
2539 | ||
2540 | void generateEnter() | |
2541 | { | |
2542 | #if CPU(X86_64) | |
2543 | push(X86Registers::ebp); | |
2544 | move(stackPointerRegister, X86Registers::ebp); | |
2545 | push(X86Registers::ebx); | |
93a37866 A |
2546 | // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves. |
2547 | zeroExtend32ToPtr(index, index); | |
2548 | zeroExtend32ToPtr(length, length); | |
2549 | #if OS(WINDOWS) | |
2550 | if (compileMode == IncludeSubpatterns) | |
2551 | loadPtr(Address(X86Registers::ebp, 6 * sizeof(void*)), output); | |
2552 | #endif | |
14957cd0 A |
2553 | #elif CPU(X86) |
2554 | push(X86Registers::ebp); | |
2555 | move(stackPointerRegister, X86Registers::ebp); | |
2556 | // TODO: do we need spill registers to fill the output pointer if there are no sub captures? | |
2557 | push(X86Registers::ebx); | |
2558 | push(X86Registers::edi); | |
2559 | push(X86Registers::esi); | |
2560 | // load output into edi (2 = saved ebp + return address). | |
2561 | #if COMPILER(MSVC) | |
2562 | loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input); | |
2563 | loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index); | |
2564 | loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length); | |
6fe7ccc8 A |
2565 | if (compileMode == IncludeSubpatterns) |
2566 | loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output); | |
14957cd0 | 2567 | #else |
6fe7ccc8 A |
2568 | if (compileMode == IncludeSubpatterns) |
2569 | loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output); | |
14957cd0 | 2570 | #endif |
93a37866 A |
2571 | #elif CPU(ARM64) |
2572 | // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves. | |
2573 | zeroExtend32ToPtr(index, index); | |
2574 | zeroExtend32ToPtr(length, length); | |
14957cd0 A |
2575 | #elif CPU(ARM) |
2576 | push(ARMRegisters::r4); | |
2577 | push(ARMRegisters::r5); | |
2578 | push(ARMRegisters::r6); | |
14957cd0 A |
2579 | #elif CPU(SH4) |
2580 | push(SH4Registers::r11); | |
2581 | push(SH4Registers::r13); | |
2582 | #elif CPU(MIPS) | |
2583 | // Do nothing. | |
2584 | #endif | |
2585 | } | |
2586 | ||
2587 | void generateReturn() | |
2588 | { | |
2589 | #if CPU(X86_64) | |
93a37866 A |
2590 | #if OS(WINDOWS) |
2591 | // Store the return value in the allocated space pointed by rcx. | |
2592 | store64(returnRegister, Address(X86Registers::ecx)); | |
2593 | store64(returnRegister2, Address(X86Registers::ecx, sizeof(void*))); | |
2594 | move(X86Registers::ecx, returnRegister); | |
2595 | #endif | |
14957cd0 A |
2596 | pop(X86Registers::ebx); |
2597 | pop(X86Registers::ebp); | |
2598 | #elif CPU(X86) | |
2599 | pop(X86Registers::esi); | |
2600 | pop(X86Registers::edi); | |
2601 | pop(X86Registers::ebx); | |
2602 | pop(X86Registers::ebp); | |
2603 | #elif CPU(ARM) | |
14957cd0 A |
2604 | pop(ARMRegisters::r6); |
2605 | pop(ARMRegisters::r5); | |
2606 | pop(ARMRegisters::r4); | |
2607 | #elif CPU(SH4) | |
2608 | pop(SH4Registers::r13); | |
2609 | pop(SH4Registers::r11); | |
2610 | #elif CPU(MIPS) | |
2611 | // Do nothing | |
2612 | #endif | |
2613 | ret(); | |
2614 | } | |
2615 | ||
2616 | public: | |
6fe7ccc8 | 2617 | YarrGenerator(YarrPattern& pattern, YarrCharSize charSize) |
14957cd0 | 2618 | : m_pattern(pattern) |
6fe7ccc8 A |
2619 | , m_charSize(charSize) |
2620 | , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo) | |
14957cd0 A |
2621 | , m_shouldFallBack(false) |
2622 | , m_checked(0) | |
2623 | { | |
2624 | } | |
2625 | ||
93a37866 | 2626 | void compile(VM* vm, YarrCodeBlock& jitObject) |
14957cd0 A |
2627 | { |
2628 | generateEnter(); | |
2629 | ||
6fe7ccc8 A |
2630 | Jump hasInput = checkInput(); |
2631 | move(TrustedImmPtr((void*)WTF::notFound), returnRegister); | |
2632 | move(TrustedImm32(0), returnRegister2); | |
2633 | generateReturn(); | |
2634 | hasInput.link(this); | |
2635 | ||
2636 | if (compileMode == IncludeSubpatterns) { | |
2637 | for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i) | |
2638 | store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int))); | |
2639 | } | |
2640 | ||
14957cd0 | 2641 | if (!m_pattern.m_body->m_hasFixedSize) |
6fe7ccc8 | 2642 | setMatchStart(index); |
14957cd0 | 2643 | |
6fe7ccc8 | 2644 | initCallFrame(); |
14957cd0 | 2645 | |
14957cd0 A |
2646 | opCompileBody(m_pattern.m_body); |
2647 | ||
14957cd0 A |
2648 | if (m_shouldFallBack) { |
2649 | jitObject.setFallBack(true); | |
2650 | return; | |
2651 | } | |
2652 | ||
2653 | generate(); | |
2654 | backtrack(); | |
2655 | ||
ed1e77d3 A |
2656 | LinkBuffer linkBuffer(*vm, *this, REGEXP_CODE_ID, JITCompilationCanFail); |
2657 | if (linkBuffer.didFailToAllocate()) { | |
2658 | jitObject.setFallBack(true); | |
2659 | return; | |
2660 | } | |
2661 | ||
14957cd0 | 2662 | m_backtrackingState.linkDataLabels(linkBuffer); |
6fe7ccc8 A |
2663 | |
2664 | if (compileMode == MatchOnly) { | |
2665 | if (m_charSize == Char8) | |
93a37866 | 2666 | jitObject.set8BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, ("Match-only 8-bit regular expression"))); |
6fe7ccc8 | 2667 | else |
93a37866 | 2668 | jitObject.set16BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, ("Match-only 16-bit regular expression"))); |
6fe7ccc8 A |
2669 | } else { |
2670 | if (m_charSize == Char8) | |
93a37866 | 2671 | jitObject.set8BitCode(FINALIZE_CODE(linkBuffer, ("8-bit regular expression"))); |
6fe7ccc8 | 2672 | else |
93a37866 | 2673 | jitObject.set16BitCode(FINALIZE_CODE(linkBuffer, ("16-bit regular expression"))); |
6fe7ccc8 | 2674 | } |
14957cd0 A |
2675 | jitObject.setFallBack(m_shouldFallBack); |
2676 | } | |
2677 | ||
2678 | private: | |
2679 | YarrPattern& m_pattern; | |
2680 | ||
6fe7ccc8 A |
2681 | YarrCharSize m_charSize; |
2682 | ||
2683 | Scale m_charScale; | |
2684 | ||
14957cd0 A |
2685 | // Used to detect regular expression constructs that are not currently |
2686 | // supported in the JIT; fall back to the interpreter when this is detected. | |
2687 | bool m_shouldFallBack; | |
2688 | ||
2689 | // The regular expression expressed as a linear sequence of operations. | |
2690 | Vector<YarrOp, 128> m_ops; | |
2691 | ||
2692 | // This records the current input offset being applied due to the current | |
2693 | // set of alternatives we are nested within. E.g. when matching the | |
2694 | // character 'b' within the regular expression /abc/, we will know that | |
2695 | // the minimum size for the alternative is 3, checked upon entry to the | |
2696 | // alternative, and that 'b' is at offset 1 from the start, and as such | |
2697 | // when matching 'b' we need to apply an offset of -2 to the load. | |
2698 | // | |
2699 | // FIXME: This should go away. Rather than tracking this value throughout | |
2700 | // code generation, we should gather this information up front & store it | |
2701 | // on the YarrOp structure. | |
2702 | int m_checked; | |
2703 | ||
2704 | // This class records state whilst generating the backtracking path of code. | |
2705 | BacktrackingState m_backtrackingState; | |
2706 | }; | |
2707 | ||
93a37866 | 2708 | void jitCompile(YarrPattern& pattern, YarrCharSize charSize, VM* vm, YarrCodeBlock& jitObject, YarrJITCompileMode mode) |
14957cd0 | 2709 | { |
6fe7ccc8 | 2710 | if (mode == MatchOnly) |
93a37866 | 2711 | YarrGenerator<MatchOnly>(pattern, charSize).compile(vm, jitObject); |
6fe7ccc8 | 2712 | else |
93a37866 | 2713 | YarrGenerator<IncludeSubpatterns>(pattern, charSize).compile(vm, jitObject); |
14957cd0 A |
2714 | } |
2715 | ||
2716 | }} | |
2717 | ||
2718 | #endif |