]> git.saurik.com Git - apple/javascriptcore.git/blame - yarr/YarrJIT.cpp
JavaScriptCore-1097.13.tar.gz
[apple/javascriptcore.git] / yarr / YarrJIT.cpp
CommitLineData
14957cd0
A
1/*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "YarrJIT.h"
28
6fe7ccc8 29#include <wtf/ASCIICType.h>
14957cd0
A
30#include "LinkBuffer.h"
31#include "Yarr.h"
6fe7ccc8 32#include "YarrCanonicalizeUCS2.h"
14957cd0
A
33
34#if ENABLE(YARR_JIT)
35
36using namespace WTF;
37
38namespace JSC { namespace Yarr {
39
6fe7ccc8 40template<YarrJITCompileMode compileMode>
14957cd0
A
41class YarrGenerator : private MacroAssembler {
42 friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
43
44#if CPU(ARM)
45 static const RegisterID input = ARMRegisters::r0;
46 static const RegisterID index = ARMRegisters::r1;
47 static const RegisterID length = ARMRegisters::r2;
48 static const RegisterID output = ARMRegisters::r4;
49
50 static const RegisterID regT0 = ARMRegisters::r5;
51 static const RegisterID regT1 = ARMRegisters::r6;
52
53 static const RegisterID returnRegister = ARMRegisters::r0;
6fe7ccc8 54 static const RegisterID returnRegister2 = ARMRegisters::r1;
14957cd0
A
55#elif CPU(MIPS)
56 static const RegisterID input = MIPSRegisters::a0;
57 static const RegisterID index = MIPSRegisters::a1;
58 static const RegisterID length = MIPSRegisters::a2;
59 static const RegisterID output = MIPSRegisters::a3;
60
61 static const RegisterID regT0 = MIPSRegisters::t4;
62 static const RegisterID regT1 = MIPSRegisters::t5;
63
64 static const RegisterID returnRegister = MIPSRegisters::v0;
6fe7ccc8 65 static const RegisterID returnRegister2 = MIPSRegisters::v1;
14957cd0
A
66#elif CPU(SH4)
67 static const RegisterID input = SH4Registers::r4;
68 static const RegisterID index = SH4Registers::r5;
69 static const RegisterID length = SH4Registers::r6;
70 static const RegisterID output = SH4Registers::r7;
71
72 static const RegisterID regT0 = SH4Registers::r0;
73 static const RegisterID regT1 = SH4Registers::r1;
74
75 static const RegisterID returnRegister = SH4Registers::r0;
6fe7ccc8 76 static const RegisterID returnRegister2 = SH4Registers::r1;
14957cd0
A
77#elif CPU(X86)
78 static const RegisterID input = X86Registers::eax;
79 static const RegisterID index = X86Registers::edx;
80 static const RegisterID length = X86Registers::ecx;
81 static const RegisterID output = X86Registers::edi;
82
83 static const RegisterID regT0 = X86Registers::ebx;
84 static const RegisterID regT1 = X86Registers::esi;
85
86 static const RegisterID returnRegister = X86Registers::eax;
6fe7ccc8 87 static const RegisterID returnRegister2 = X86Registers::edx;
14957cd0
A
88#elif CPU(X86_64)
89 static const RegisterID input = X86Registers::edi;
90 static const RegisterID index = X86Registers::esi;
91 static const RegisterID length = X86Registers::edx;
92 static const RegisterID output = X86Registers::ecx;
93
94 static const RegisterID regT0 = X86Registers::eax;
95 static const RegisterID regT1 = X86Registers::ebx;
96
97 static const RegisterID returnRegister = X86Registers::eax;
6fe7ccc8 98 static const RegisterID returnRegister2 = X86Registers::edx;
14957cd0
A
99#endif
100
101 void optimizeAlternative(PatternAlternative* alternative)
102 {
103 if (!alternative->m_terms.size())
104 return;
105
106 for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
107 PatternTerm& term = alternative->m_terms[i];
108 PatternTerm& nextTerm = alternative->m_terms[i + 1];
109
110 if ((term.type == PatternTerm::TypeCharacterClass)
111 && (term.quantityType == QuantifierFixedCount)
112 && (nextTerm.type == PatternTerm::TypePatternCharacter)
113 && (nextTerm.quantityType == QuantifierFixedCount)) {
114 PatternTerm termCopy = term;
115 alternative->m_terms[i] = nextTerm;
116 alternative->m_terms[i + 1] = termCopy;
117 }
118 }
119 }
120
121 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
122 {
123 do {
124 // pick which range we're going to generate
125 int which = count >> 1;
126 char lo = ranges[which].begin;
127 char hi = ranges[which].end;
128
129 // check if there are any ranges or matches below lo. If not, just jl to failure -
130 // if there is anything else to check, check that first, if it falls through jmp to failure.
131 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
132 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
133
134 // generate code for all ranges before this one
135 if (which)
136 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
137
138 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
139 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
140 ++*matchIndex;
141 }
142 failures.append(jump());
143
144 loOrAbove.link(this);
145 } else if (which) {
146 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
147
148 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
149 failures.append(jump());
150
151 loOrAbove.link(this);
152 } else
153 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
154
155 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
156 ++*matchIndex;
157
158 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
159 // fall through to here, the value is above hi.
160
161 // shuffle along & loop around if there are any more matches to handle.
162 unsigned next = which + 1;
163 ranges += next;
164 count -= next;
165 } while (count);
166 }
167
168 void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
169 {
170 if (charClass->m_table) {
171 ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
172 matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));
173 return;
174 }
175 Jump unicodeFail;
176 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
177 Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f));
178
179 if (charClass->m_matchesUnicode.size()) {
180 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
181 UChar ch = charClass->m_matchesUnicode[i];
182 matchDest.append(branch32(Equal, character, Imm32(ch)));
183 }
184 }
185
186 if (charClass->m_rangesUnicode.size()) {
187 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
188 UChar lo = charClass->m_rangesUnicode[i].begin;
189 UChar hi = charClass->m_rangesUnicode[i].end;
190
191 Jump below = branch32(LessThan, character, Imm32(lo));
192 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
193 below.link(this);
194 }
195 }
196
197 unicodeFail = jump();
198 isAscii.link(this);
199 }
200
201 if (charClass->m_ranges.size()) {
202 unsigned matchIndex = 0;
203 JumpList failures;
204 matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
205 while (matchIndex < charClass->m_matches.size())
206 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
207
208 failures.link(this);
209 } else if (charClass->m_matches.size()) {
210 // optimization: gather 'a','A' etc back together, can mask & test once.
211 Vector<char> matchesAZaz;
212
213 for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
214 char ch = charClass->m_matches[i];
215 if (m_pattern.m_ignoreCase) {
216 if (isASCIILower(ch)) {
217 matchesAZaz.append(ch);
218 continue;
219 }
220 if (isASCIIUpper(ch))
221 continue;
222 }
223 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
224 }
225
226 if (unsigned countAZaz = matchesAZaz.size()) {
227 or32(TrustedImm32(32), character);
228 for (unsigned i = 0; i < countAZaz; ++i)
229 matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
230 }
231 }
232
233 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
234 unicodeFail.link(this);
235 }
236
237 // Jumps if input not available; will have (incorrectly) incremented already!
238 Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
239 {
240 if (countToCheck)
241 add32(Imm32(countToCheck), index);
242 return branch32(Above, index, length);
243 }
244
245 Jump jumpIfAvailableInput(unsigned countToCheck)
246 {
247 add32(Imm32(countToCheck), index);
248 return branch32(BelowOrEqual, index, length);
249 }
250
251 Jump checkInput()
252 {
253 return branch32(BelowOrEqual, index, length);
254 }
255
256 Jump atEndOfInput()
257 {
258 return branch32(Equal, index, length);
259 }
260
261 Jump notAtEndOfInput()
262 {
263 return branch32(NotEqual, index, length);
264 }
265
6fe7ccc8 266 Jump jumpIfCharNotEquals(UChar ch, int inputPosition, RegisterID character)
14957cd0 267 {
6fe7ccc8 268 readCharacter(inputPosition, character);
14957cd0 269
6fe7ccc8
A
270 // For case-insesitive compares, non-ascii characters that have different
271 // upper & lower case representations are converted to a character class.
272 ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
273 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
274 or32(TrustedImm32(0x20), character);
275 ch |= 0x20;
276 }
277
278 return branch32(NotEqual, character, Imm32(ch));
14957cd0
A
279 }
280
281 void readCharacter(int inputPosition, RegisterID reg)
282 {
6fe7ccc8
A
283 if (m_charSize == Char8)
284 load8(BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), reg);
285 else
286 load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
14957cd0
A
287 }
288
289 void storeToFrame(RegisterID reg, unsigned frameLocation)
290 {
291 poke(reg, frameLocation);
292 }
293
294 void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
295 {
296 poke(imm, frameLocation);
297 }
298
299 DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
300 {
301 return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
302 }
303
304 void loadFromFrame(unsigned frameLocation, RegisterID reg)
305 {
306 peek(reg, frameLocation);
307 }
308
309 void loadFromFrameAndJump(unsigned frameLocation)
310 {
311 jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
312 }
313
6fe7ccc8
A
314 void initCallFrame()
315 {
316 unsigned callFrameSize = m_pattern.m_body->m_callFrameSize;
317 if (callFrameSize)
318 subPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister);
319 }
320 void removeCallFrame()
321 {
322 unsigned callFrameSize = m_pattern.m_body->m_callFrameSize;
323 if (callFrameSize)
324 addPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister);
325 }
326
327 // Used to record subpatters, should only be called if compileMode is IncludeSubpatterns.
328 void setSubpatternStart(RegisterID reg, unsigned subpattern)
329 {
330 ASSERT(subpattern);
331 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
332 store32(reg, Address(output, (subpattern << 1) * sizeof(int)));
333 }
334 void setSubpatternEnd(RegisterID reg, unsigned subpattern)
335 {
336 ASSERT(subpattern);
337 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
338 store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int)));
339 }
340 void clearSubpatternStart(unsigned subpattern)
341 {
342 ASSERT(subpattern);
343 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
344 store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int)));
345 }
346
347 // We use one of three different strategies to track the start of the current match,
348 // while matching.
349 // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
350 // at the end of matching. This is irrespective of compileMode, and in this case
351 // these methods should never be called.
352 // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output
353 // vector, store the match start in the output vector.
354 // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly
355 // in this register.
356 void setMatchStart(RegisterID reg)
357 {
358 ASSERT(!m_pattern.m_body->m_hasFixedSize);
359 if (compileMode == IncludeSubpatterns)
360 store32(reg, output);
361 else
362 move(reg, output);
363 }
364 void getMatchStart(RegisterID reg)
365 {
366 ASSERT(!m_pattern.m_body->m_hasFixedSize);
367 if (compileMode == IncludeSubpatterns)
368 load32(output, reg);
369 else
370 move(output, reg);
371 }
372
14957cd0
A
373 enum YarrOpCode {
374 // These nodes wrap body alternatives - those in the main disjunction,
375 // rather than subpatterns or assertions. These are chained together in
376 // a doubly linked list, with a 'begin' node for the first alternative,
377 // a 'next' node for each subsequent alternative, and an 'end' node at
378 // the end. In the case of repeating alternatives, the 'end' node also
379 // has a reference back to 'begin'.
380 OpBodyAlternativeBegin,
381 OpBodyAlternativeNext,
382 OpBodyAlternativeEnd,
383 // Similar to the body alternatives, but used for subpatterns with two
384 // or more alternatives.
385 OpNestedAlternativeBegin,
386 OpNestedAlternativeNext,
387 OpNestedAlternativeEnd,
388 // Used for alternatives in subpatterns where there is only a single
389 // alternative (backtrackingis easier in these cases), or for alternatives
390 // which never need to be backtracked (those in parenthetical assertions,
391 // terminal subpatterns).
392 OpSimpleNestedAlternativeBegin,
393 OpSimpleNestedAlternativeNext,
394 OpSimpleNestedAlternativeEnd,
395 // Used to wrap 'Once' subpattern matches (quantityCount == 1).
396 OpParenthesesSubpatternOnceBegin,
397 OpParenthesesSubpatternOnceEnd,
398 // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
399 OpParenthesesSubpatternTerminalBegin,
400 OpParenthesesSubpatternTerminalEnd,
401 // Used to wrap parenthetical assertions.
402 OpParentheticalAssertionBegin,
403 OpParentheticalAssertionEnd,
404 // Wraps all simple terms (pattern characters, character classes).
405 OpTerm,
406 // Where an expression contains only 'once through' body alternatives
407 // and no repeating ones, this op is used to return match failure.
408 OpMatchFailed
409 };
410
411 // This structure is used to hold the compiled opcode information,
412 // including reference back to the original PatternTerm/PatternAlternatives,
413 // and JIT compilation data structures.
414 struct YarrOp {
415 explicit YarrOp(PatternTerm* term)
416 : m_op(OpTerm)
417 , m_term(term)
418 , m_isDeadCode(false)
419 {
420 }
421
422 explicit YarrOp(YarrOpCode op)
423 : m_op(op)
424 , m_isDeadCode(false)
425 {
426 }
427
428 // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
429 YarrOpCode m_op;
430 PatternTerm* m_term;
431
432 // For alternatives, this holds the PatternAlternative and doubly linked
433 // references to this alternative's siblings. In the case of the
434 // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
435 // m_nextOp will reference the OpBodyAlternativeBegin node of the first
436 // repeating alternative.
437 PatternAlternative* m_alternative;
438 size_t m_previousOp;
439 size_t m_nextOp;
440
441 // Used to record a set of Jumps out of the generated code, typically
442 // used for jumps out to backtracking code, and a single reentry back
443 // into the code for a node (likely where a backtrack will trigger
444 // rematching).
445 Label m_reentry;
446 JumpList m_jumps;
447
6fe7ccc8
A
448 // Used for backtracking when the prior alternative did not consume any
449 // characters but matched.
450 Jump m_zeroLengthMatch;
451
14957cd0
A
452 // This flag is used to null out the second pattern character, when
453 // two are fused to match a pair together.
454 bool m_isDeadCode;
455
456 // Currently used in the case of some of the more complex management of
457 // 'm_checked', to cache the offset used in this alternative, to avoid
458 // recalculating it.
459 int m_checkAdjust;
460
461 // Used by OpNestedAlternativeNext/End to hold the pointer to the
462 // value that will be pushed into the pattern's frame to return to,
463 // upon backtracking back into the disjunction.
464 DataLabelPtr m_returnAddress;
465 };
466
467 // BacktrackingState
468 // This class encapsulates information about the state of code generation
469 // whilst generating the code for backtracking, when a term fails to match.
470 // Upon entry to code generation of the backtracking code for a given node,
471 // the Backtracking state will hold references to all control flow sources
472 // that are outputs in need of further backtracking from the prior node
473 // generated (which is the subsequent operation in the regular expression,
474 // and in the m_ops Vector, since we generated backtracking backwards).
475 // These references to control flow take the form of:
476 // - A jump list of jumps, to be linked to code that will backtrack them
477 // further.
478 // - A set of DataLabelPtr values, to be populated with values to be
479 // treated effectively as return addresses backtracking into complex
480 // subpatterns.
481 // - A flag indicating that the current sequence of generated code up to
482 // this point requires backtracking.
483 class BacktrackingState {
484 public:
485 BacktrackingState()
486 : m_pendingFallthrough(false)
487 {
488 }
489
490 // Add a jump or jumps, a return address, or set the flag indicating
491 // that the current 'fallthrough' control flow requires backtracking.
492 void append(const Jump& jump)
493 {
494 m_laterFailures.append(jump);
495 }
496 void append(JumpList& jumpList)
497 {
498 m_laterFailures.append(jumpList);
499 }
500 void append(const DataLabelPtr& returnAddress)
501 {
502 m_pendingReturns.append(returnAddress);
503 }
504 void fallthrough()
505 {
506 ASSERT(!m_pendingFallthrough);
507 m_pendingFallthrough = true;
508 }
509
510 // These methods clear the backtracking state, either linking to the
511 // current location, a provided label, or copying the backtracking out
512 // to a JumpList. All actions may require code generation to take place,
513 // and as such are passed a pointer to the assembler.
514 void link(MacroAssembler* assembler)
515 {
516 if (m_pendingReturns.size()) {
517 Label here(assembler);
518 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
519 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
520 m_pendingReturns.clear();
521 }
522 m_laterFailures.link(assembler);
523 m_laterFailures.clear();
524 m_pendingFallthrough = false;
525 }
526 void linkTo(Label label, MacroAssembler* assembler)
527 {
528 if (m_pendingReturns.size()) {
529 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
530 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
531 m_pendingReturns.clear();
532 }
533 if (m_pendingFallthrough)
534 assembler->jump(label);
535 m_laterFailures.linkTo(label, assembler);
536 m_laterFailures.clear();
537 m_pendingFallthrough = false;
538 }
539 void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
540 {
541 if (m_pendingReturns.size()) {
542 Label here(assembler);
543 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
544 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
545 m_pendingReturns.clear();
546 m_pendingFallthrough = true;
547 }
548 if (m_pendingFallthrough)
549 jumpList.append(assembler->jump());
550 jumpList.append(m_laterFailures);
551 m_laterFailures.clear();
552 m_pendingFallthrough = false;
553 }
554
555 bool isEmpty()
556 {
557 return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
558 }
559
560 // Called at the end of code generation to link all return addresses.
561 void linkDataLabels(LinkBuffer& linkBuffer)
562 {
563 ASSERT(isEmpty());
564 for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
565 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation));
566 }
567
568 private:
569 struct ReturnAddressRecord {
570 ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
571 : m_dataLabel(dataLabel)
572 , m_backtrackLocation(backtrackLocation)
573 {
574 }
575
576 DataLabelPtr m_dataLabel;
577 Label m_backtrackLocation;
578 };
579
580 JumpList m_laterFailures;
581 bool m_pendingFallthrough;
582 Vector<DataLabelPtr, 4> m_pendingReturns;
583 Vector<ReturnAddressRecord, 4> m_backtrackRecords;
584 };
585
586 // Generation methods:
587 // ===================
588
589 // This method provides a default implementation of backtracking common
590 // to many terms; terms commonly jump out of the forwards matching path
591 // on any failed conditions, and add these jumps to the m_jumps list. If
592 // no special handling is required we can often just backtrack to m_jumps.
593 void backtrackTermDefault(size_t opIndex)
594 {
595 YarrOp& op = m_ops[opIndex];
596 m_backtrackingState.append(op.m_jumps);
597 }
598
599 void generateAssertionBOL(size_t opIndex)
600 {
601 YarrOp& op = m_ops[opIndex];
602 PatternTerm* term = op.m_term;
603
604 if (m_pattern.m_multiline) {
605 const RegisterID character = regT0;
606
607 JumpList matchDest;
608 if (!term->inputPosition)
609 matchDest.append(branch32(Equal, index, Imm32(m_checked)));
610
611 readCharacter((term->inputPosition - m_checked) - 1, character);
612 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
613 op.m_jumps.append(jump());
614
615 matchDest.link(this);
616 } else {
617 // Erk, really should poison out these alternatives early. :-/
618 if (term->inputPosition)
619 op.m_jumps.append(jump());
620 else
621 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked)));
622 }
623 }
624 void backtrackAssertionBOL(size_t opIndex)
625 {
626 backtrackTermDefault(opIndex);
627 }
628
629 void generateAssertionEOL(size_t opIndex)
630 {
631 YarrOp& op = m_ops[opIndex];
632 PatternTerm* term = op.m_term;
633
634 if (m_pattern.m_multiline) {
635 const RegisterID character = regT0;
636
637 JumpList matchDest;
638 if (term->inputPosition == m_checked)
639 matchDest.append(atEndOfInput());
640
1df5f87f 641 readCharacter(term->inputPosition - m_checked, character);
14957cd0
A
642 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
643 op.m_jumps.append(jump());
644
645 matchDest.link(this);
646 } else {
647 if (term->inputPosition == m_checked)
648 op.m_jumps.append(notAtEndOfInput());
649 // Erk, really should poison out these alternatives early. :-/
650 else
651 op.m_jumps.append(jump());
652 }
653 }
654 void backtrackAssertionEOL(size_t opIndex)
655 {
656 backtrackTermDefault(opIndex);
657 }
658
659 // Also falls though on nextIsNotWordChar.
660 void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
661 {
662 YarrOp& op = m_ops[opIndex];
663 PatternTerm* term = op.m_term;
664
665 const RegisterID character = regT0;
666
667 if (term->inputPosition == m_checked)
668 nextIsNotWordChar.append(atEndOfInput());
669
670 readCharacter((term->inputPosition - m_checked), character);
671 matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
672 }
673
674 void generateAssertionWordBoundary(size_t opIndex)
675 {
676 YarrOp& op = m_ops[opIndex];
677 PatternTerm* term = op.m_term;
678
679 const RegisterID character = regT0;
680
681 Jump atBegin;
682 JumpList matchDest;
683 if (!term->inputPosition)
684 atBegin = branch32(Equal, index, Imm32(m_checked));
685 readCharacter((term->inputPosition - m_checked) - 1, character);
686 matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
687 if (!term->inputPosition)
688 atBegin.link(this);
689
690 // We fall through to here if the last character was not a wordchar.
691 JumpList nonWordCharThenWordChar;
692 JumpList nonWordCharThenNonWordChar;
693 if (term->invert()) {
694 matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
695 nonWordCharThenWordChar.append(jump());
696 } else {
697 matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
698 nonWordCharThenNonWordChar.append(jump());
699 }
700 op.m_jumps.append(nonWordCharThenNonWordChar);
701
702 // We jump here if the last character was a wordchar.
703 matchDest.link(this);
704 JumpList wordCharThenWordChar;
705 JumpList wordCharThenNonWordChar;
706 if (term->invert()) {
707 matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
708 wordCharThenWordChar.append(jump());
709 } else {
710 matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
711 // This can fall-though!
712 }
713
714 op.m_jumps.append(wordCharThenWordChar);
715
716 nonWordCharThenWordChar.link(this);
717 wordCharThenNonWordChar.link(this);
718 }
719 void backtrackAssertionWordBoundary(size_t opIndex)
720 {
721 backtrackTermDefault(opIndex);
722 }
723
724 void generatePatternCharacterOnce(size_t opIndex)
725 {
726 YarrOp& op = m_ops[opIndex];
727
6fe7ccc8
A
728 if (op.m_isDeadCode)
729 return;
730
14957cd0
A
731 // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
732 // node, so there must always be at least one more node.
733 ASSERT(opIndex + 1 < m_ops.size());
6fe7ccc8 734 YarrOp* nextOp = &m_ops[opIndex + 1];
14957cd0
A
735
736 PatternTerm* term = op.m_term;
737 UChar ch = term->patternCharacter;
738
6fe7ccc8
A
739 if ((ch > 0xff) && (m_charSize == Char8)) {
740 // Have a 16 bit pattern character and an 8 bit string - short circuit
741 op.m_jumps.append(jump());
742 return;
743 }
744
14957cd0 745 const RegisterID character = regT0;
6fe7ccc8
A
746 int maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
747 unsigned ignoreCaseMask = 0;
748 int allCharacters = ch;
749 int numberCharacters;
750 int startTermPosition = term->inputPosition;
14957cd0 751
6fe7ccc8
A
752 // For case-insesitive compares, non-ascii characters that have different
753 // upper & lower case representations are converted to a character class.
754 ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
14957cd0 755
6fe7ccc8
A
756 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch))
757 ignoreCaseMask |= 32;
14957cd0 758
6fe7ccc8
A
759 for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
760 PatternTerm* nextTerm = nextOp->m_term;
761
762 if (nextTerm->type != PatternTerm::TypePatternCharacter
763 || nextTerm->quantityType != QuantifierFixedCount
764 || nextTerm->quantityCount != 1
765 || nextTerm->inputPosition != (startTermPosition + numberCharacters))
766 break;
14957cd0 767
6fe7ccc8 768 nextOp->m_isDeadCode = true;
14957cd0 769
6fe7ccc8 770 int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
14957cd0 771
6fe7ccc8
A
772 UChar currentCharacter = nextTerm->patternCharacter;
773
774 if ((currentCharacter > 0xff) && (m_charSize == Char8)) {
775 // Have a 16 bit pattern character and an 8 bit string - short circuit
776 op.m_jumps.append(jump());
14957cd0
A
777 return;
778 }
6fe7ccc8
A
779
780 // For case-insesitive compares, non-ascii characters that have different
781 // upper & lower case representations are converted to a character class.
782 ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter));
783
784 allCharacters |= (currentCharacter << shiftAmount);
785
786 if ((m_pattern.m_ignoreCase) && (isASCIIAlpha(currentCharacter)))
787 ignoreCaseMask |= 32 << shiftAmount;
14957cd0
A
788 }
789
6fe7ccc8
A
790 if (m_charSize == Char8) {
791 switch (numberCharacters) {
792 case 1:
793 op.m_jumps.append(jumpIfCharNotEquals(ch, startTermPosition - m_checked, character));
794 return;
795 case 2: {
796 BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
797 load16Unaligned(address, character);
798 break;
799 }
800 case 3: {
801 BaseIndex highAddress(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
802 load16Unaligned(highAddress, character);
803 if (ignoreCaseMask)
804 or32(Imm32(ignoreCaseMask), character);
805 op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask)));
806 op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, startTermPosition + 2 - m_checked, character));
807 return;
808 }
809 case 4: {
810 BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
811 load32WithUnalignedHalfWords(address, character);
812 break;
813 }
814 }
14957cd0 815 } else {
6fe7ccc8
A
816 switch (numberCharacters) {
817 case 1:
818 op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
819 return;
820 case 2:
821 BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
822 load32WithUnalignedHalfWords(address, character);
823 break;
824 }
14957cd0 825 }
6fe7ccc8
A
826
827 if (ignoreCaseMask)
828 or32(Imm32(ignoreCaseMask), character);
829 op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask)));
830 return;
14957cd0
A
831 }
832 void backtrackPatternCharacterOnce(size_t opIndex)
833 {
834 backtrackTermDefault(opIndex);
835 }
836
837 void generatePatternCharacterFixed(size_t opIndex)
838 {
839 YarrOp& op = m_ops[opIndex];
840 PatternTerm* term = op.m_term;
841 UChar ch = term->patternCharacter;
842
843 const RegisterID character = regT0;
844 const RegisterID countRegister = regT1;
845
846 move(index, countRegister);
1df5f87f 847 sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
14957cd0
A
848
849 Label loop(this);
6fe7ccc8 850 BaseIndex address(input, countRegister, m_charScale, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(m_charSize == Char8 ? sizeof(char) : sizeof(UChar))).unsafeGet());
14957cd0 851
6fe7ccc8
A
852 if (m_charSize == Char8)
853 load8(address, character);
854 else
14957cd0 855 load16(address, character);
6fe7ccc8
A
856
857 // For case-insesitive compares, non-ascii characters that have different
858 // upper & lower case representations are converted to a character class.
859 ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
860 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
861 or32(TrustedImm32(0x20), character);
862 ch |= 0x20;
14957cd0 863 }
6fe7ccc8
A
864
865 op.m_jumps.append(branch32(NotEqual, character, Imm32(ch)));
14957cd0
A
866 add32(TrustedImm32(1), countRegister);
867 branch32(NotEqual, countRegister, index).linkTo(loop, this);
868 }
869 void backtrackPatternCharacterFixed(size_t opIndex)
870 {
871 backtrackTermDefault(opIndex);
872 }
873
874 void generatePatternCharacterGreedy(size_t opIndex)
875 {
876 YarrOp& op = m_ops[opIndex];
877 PatternTerm* term = op.m_term;
878 UChar ch = term->patternCharacter;
879
880 const RegisterID character = regT0;
881 const RegisterID countRegister = regT1;
882
883 move(TrustedImm32(0), countRegister);
884
6fe7ccc8
A
885 // Unless have a 16 bit pattern character and an 8 bit string - short circuit
886 if (!((ch > 0xff) && (m_charSize == Char8))) {
887 JumpList failures;
888 Label loop(this);
889 failures.append(atEndOfInput());
890 failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
891
892 add32(TrustedImm32(1), countRegister);
893 add32(TrustedImm32(1), index);
894 if (term->quantityCount == quantifyInfinite)
895 jump(loop);
896 else
897 branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
14957cd0 898
6fe7ccc8
A
899 failures.link(this);
900 }
14957cd0
A
901 op.m_reentry = label();
902
903 storeToFrame(countRegister, term->frameLocation);
14957cd0
A
904 }
905 void backtrackPatternCharacterGreedy(size_t opIndex)
906 {
907 YarrOp& op = m_ops[opIndex];
908 PatternTerm* term = op.m_term;
909
910 const RegisterID countRegister = regT1;
911
912 m_backtrackingState.link(this);
913
914 loadFromFrame(term->frameLocation, countRegister);
915 m_backtrackingState.append(branchTest32(Zero, countRegister));
916 sub32(TrustedImm32(1), countRegister);
917 sub32(TrustedImm32(1), index);
918 jump(op.m_reentry);
919 }
920
921 void generatePatternCharacterNonGreedy(size_t opIndex)
922 {
923 YarrOp& op = m_ops[opIndex];
924 PatternTerm* term = op.m_term;
925
926 const RegisterID countRegister = regT1;
927
928 move(TrustedImm32(0), countRegister);
929 op.m_reentry = label();
930 storeToFrame(countRegister, term->frameLocation);
931 }
932 void backtrackPatternCharacterNonGreedy(size_t opIndex)
933 {
934 YarrOp& op = m_ops[opIndex];
935 PatternTerm* term = op.m_term;
936 UChar ch = term->patternCharacter;
937
938 const RegisterID character = regT0;
939 const RegisterID countRegister = regT1;
940
14957cd0
A
941 m_backtrackingState.link(this);
942
943 loadFromFrame(term->frameLocation, countRegister);
944
6fe7ccc8
A
945 // Unless have a 16 bit pattern character and an 8 bit string - short circuit
946 if (!((ch > 0xff) && (m_charSize == Char8))) {
947 JumpList nonGreedyFailures;
948 nonGreedyFailures.append(atEndOfInput());
949 if (term->quantityCount != quantifyInfinite)
950 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
951 nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
14957cd0 952
6fe7ccc8
A
953 add32(TrustedImm32(1), countRegister);
954 add32(TrustedImm32(1), index);
14957cd0 955
6fe7ccc8
A
956 jump(op.m_reentry);
957 nonGreedyFailures.link(this);
958 }
14957cd0 959
14957cd0
A
960 sub32(countRegister, index);
961 m_backtrackingState.fallthrough();
962 }
963
964 void generateCharacterClassOnce(size_t opIndex)
965 {
966 YarrOp& op = m_ops[opIndex];
967 PatternTerm* term = op.m_term;
968
969 const RegisterID character = regT0;
970
971 JumpList matchDest;
1df5f87f 972 readCharacter(term->inputPosition - m_checked, character);
14957cd0
A
973 matchCharacterClass(character, matchDest, term->characterClass);
974
975 if (term->invert())
976 op.m_jumps.append(matchDest);
977 else {
978 op.m_jumps.append(jump());
979 matchDest.link(this);
980 }
981 }
982 void backtrackCharacterClassOnce(size_t opIndex)
983 {
984 backtrackTermDefault(opIndex);
985 }
986
987 void generateCharacterClassFixed(size_t opIndex)
988 {
989 YarrOp& op = m_ops[opIndex];
990 PatternTerm* term = op.m_term;
991
992 const RegisterID character = regT0;
993 const RegisterID countRegister = regT1;
994
995 move(index, countRegister);
1df5f87f 996 sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
14957cd0
A
997
998 Label loop(this);
999 JumpList matchDest;
6fe7ccc8
A
1000 if (m_charSize == Char8)
1001 load8(BaseIndex(input, countRegister, TimesOne, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(char))).unsafeGet()), character);
1002 else
1003 load16(BaseIndex(input, countRegister, TimesTwo, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet()), character);
14957cd0
A
1004 matchCharacterClass(character, matchDest, term->characterClass);
1005
1006 if (term->invert())
1007 op.m_jumps.append(matchDest);
1008 else {
1009 op.m_jumps.append(jump());
1010 matchDest.link(this);
1011 }
1012
1013 add32(TrustedImm32(1), countRegister);
1014 branch32(NotEqual, countRegister, index).linkTo(loop, this);
1015 }
1016 void backtrackCharacterClassFixed(size_t opIndex)
1017 {
1018 backtrackTermDefault(opIndex);
1019 }
1020
1021 void generateCharacterClassGreedy(size_t opIndex)
1022 {
1023 YarrOp& op = m_ops[opIndex];
1024 PatternTerm* term = op.m_term;
1025
1026 const RegisterID character = regT0;
1027 const RegisterID countRegister = regT1;
1028
1029 move(TrustedImm32(0), countRegister);
1030
1031 JumpList failures;
1032 Label loop(this);
1033 failures.append(atEndOfInput());
1034
1035 if (term->invert()) {
1036 readCharacter(term->inputPosition - m_checked, character);
1037 matchCharacterClass(character, failures, term->characterClass);
1038 } else {
1039 JumpList matchDest;
1040 readCharacter(term->inputPosition - m_checked, character);
1041 matchCharacterClass(character, matchDest, term->characterClass);
1042 failures.append(jump());
1043 matchDest.link(this);
1044 }
1045
1046 add32(TrustedImm32(1), countRegister);
1047 add32(TrustedImm32(1), index);
1048 if (term->quantityCount != quantifyInfinite) {
1df5f87f 1049 branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
14957cd0
A
1050 failures.append(jump());
1051 } else
1052 jump(loop);
1053
1054 failures.link(this);
1055 op.m_reentry = label();
1056
1057 storeToFrame(countRegister, term->frameLocation);
1058 }
1059 void backtrackCharacterClassGreedy(size_t opIndex)
1060 {
1061 YarrOp& op = m_ops[opIndex];
1062 PatternTerm* term = op.m_term;
1063
1064 const RegisterID countRegister = regT1;
1065
1066 m_backtrackingState.link(this);
1067
1068 loadFromFrame(term->frameLocation, countRegister);
1069 m_backtrackingState.append(branchTest32(Zero, countRegister));
1070 sub32(TrustedImm32(1), countRegister);
1071 sub32(TrustedImm32(1), index);
1072 jump(op.m_reentry);
1073 }
1074
1075 void generateCharacterClassNonGreedy(size_t opIndex)
1076 {
1077 YarrOp& op = m_ops[opIndex];
1078 PatternTerm* term = op.m_term;
1079
1080 const RegisterID countRegister = regT1;
1081
1082 move(TrustedImm32(0), countRegister);
1083 op.m_reentry = label();
1084 storeToFrame(countRegister, term->frameLocation);
1085 }
1086 void backtrackCharacterClassNonGreedy(size_t opIndex)
1087 {
1088 YarrOp& op = m_ops[opIndex];
1089 PatternTerm* term = op.m_term;
1090
1091 const RegisterID character = regT0;
1092 const RegisterID countRegister = regT1;
1093
1094 JumpList nonGreedyFailures;
1095
1096 m_backtrackingState.link(this);
1097
14957cd0
A
1098 loadFromFrame(term->frameLocation, countRegister);
1099
1100 nonGreedyFailures.append(atEndOfInput());
1df5f87f 1101 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
14957cd0
A
1102
1103 JumpList matchDest;
1104 readCharacter(term->inputPosition - m_checked, character);
1105 matchCharacterClass(character, matchDest, term->characterClass);
1106
1107 if (term->invert())
1108 nonGreedyFailures.append(matchDest);
1109 else {
1110 nonGreedyFailures.append(jump());
1111 matchDest.link(this);
1112 }
1113
1114 add32(TrustedImm32(1), countRegister);
1115 add32(TrustedImm32(1), index);
1116
1117 jump(op.m_reentry);
1118
1119 nonGreedyFailures.link(this);
1120 sub32(countRegister, index);
1121 m_backtrackingState.fallthrough();
1122 }
1123
1124 void generateDotStarEnclosure(size_t opIndex)
1125 {
1126 YarrOp& op = m_ops[opIndex];
1127 PatternTerm* term = op.m_term;
1128
1129 const RegisterID character = regT0;
1130 const RegisterID matchPos = regT1;
1131
1132 JumpList foundBeginningNewLine;
1133 JumpList saveStartIndex;
1134 JumpList foundEndingNewLine;
1135
6fe7ccc8
A
1136 ASSERT(!m_pattern.m_body->m_hasFixedSize);
1137 getMatchStart(matchPos);
14957cd0
A
1138
1139 saveStartIndex.append(branchTest32(Zero, matchPos));
1140 Label findBOLLoop(this);
1141 sub32(TrustedImm32(1), matchPos);
6fe7ccc8
A
1142 if (m_charSize == Char8)
1143 load8(BaseIndex(input, matchPos, TimesOne, 0), character);
1144 else
1145 load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
14957cd0
A
1146 matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
1147 branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this);
1148 saveStartIndex.append(jump());
1149
1150 foundBeginningNewLine.link(this);
1151 add32(TrustedImm32(1), matchPos); // Advance past newline
1152 saveStartIndex.link(this);
1153
1154 if (!m_pattern.m_multiline && term->anchors.bolAnchor)
1155 op.m_jumps.append(branchTest32(NonZero, matchPos));
1156
6fe7ccc8
A
1157 ASSERT(!m_pattern.m_body->m_hasFixedSize);
1158 setMatchStart(matchPos);
14957cd0
A
1159
1160 move(index, matchPos);
1161
1162 Label findEOLLoop(this);
1163 foundEndingNewLine.append(branch32(Equal, matchPos, length));
6fe7ccc8
A
1164 if (m_charSize == Char8)
1165 load8(BaseIndex(input, matchPos, TimesOne, 0), character);
1166 else
1167 load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
14957cd0
A
1168 matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
1169 add32(TrustedImm32(1), matchPos);
1170 jump(findEOLLoop);
1171
1172 foundEndingNewLine.link(this);
1173
1174 if (!m_pattern.m_multiline && term->anchors.eolAnchor)
1175 op.m_jumps.append(branch32(NotEqual, matchPos, length));
1176
1177 move(matchPos, index);
1178 }
1179
1180 void backtrackDotStarEnclosure(size_t opIndex)
1181 {
1182 backtrackTermDefault(opIndex);
1183 }
1184
1185 // Code generation/backtracking for simple terms
1186 // (pattern characters, character classes, and assertions).
1187 // These methods farm out work to the set of functions above.
1188 void generateTerm(size_t opIndex)
1189 {
1190 YarrOp& op = m_ops[opIndex];
1191 PatternTerm* term = op.m_term;
1192
1193 switch (term->type) {
1194 case PatternTerm::TypePatternCharacter:
1195 switch (term->quantityType) {
1196 case QuantifierFixedCount:
1197 if (term->quantityCount == 1)
1198 generatePatternCharacterOnce(opIndex);
1199 else
1200 generatePatternCharacterFixed(opIndex);
1201 break;
1202 case QuantifierGreedy:
1203 generatePatternCharacterGreedy(opIndex);
1204 break;
1205 case QuantifierNonGreedy:
1206 generatePatternCharacterNonGreedy(opIndex);
1207 break;
1208 }
1209 break;
1210
1211 case PatternTerm::TypeCharacterClass:
1212 switch (term->quantityType) {
1213 case QuantifierFixedCount:
1214 if (term->quantityCount == 1)
1215 generateCharacterClassOnce(opIndex);
1216 else
1217 generateCharacterClassFixed(opIndex);
1218 break;
1219 case QuantifierGreedy:
1220 generateCharacterClassGreedy(opIndex);
1221 break;
1222 case QuantifierNonGreedy:
1223 generateCharacterClassNonGreedy(opIndex);
1224 break;
1225 }
1226 break;
1227
1228 case PatternTerm::TypeAssertionBOL:
1229 generateAssertionBOL(opIndex);
1230 break;
1231
1232 case PatternTerm::TypeAssertionEOL:
1233 generateAssertionEOL(opIndex);
1234 break;
1235
1236 case PatternTerm::TypeAssertionWordBoundary:
1237 generateAssertionWordBoundary(opIndex);
1238 break;
1239
1240 case PatternTerm::TypeForwardReference:
1241 break;
1242
1243 case PatternTerm::TypeParenthesesSubpattern:
1244 case PatternTerm::TypeParentheticalAssertion:
1245 ASSERT_NOT_REACHED();
1246 case PatternTerm::TypeBackReference:
1247 m_shouldFallBack = true;
1248 break;
1249 case PatternTerm::TypeDotStarEnclosure:
1250 generateDotStarEnclosure(opIndex);
1251 break;
1252 }
1253 }
1254 void backtrackTerm(size_t opIndex)
1255 {
1256 YarrOp& op = m_ops[opIndex];
1257 PatternTerm* term = op.m_term;
1258
1259 switch (term->type) {
1260 case PatternTerm::TypePatternCharacter:
1261 switch (term->quantityType) {
1262 case QuantifierFixedCount:
1263 if (term->quantityCount == 1)
1264 backtrackPatternCharacterOnce(opIndex);
1265 else
1266 backtrackPatternCharacterFixed(opIndex);
1267 break;
1268 case QuantifierGreedy:
1269 backtrackPatternCharacterGreedy(opIndex);
1270 break;
1271 case QuantifierNonGreedy:
1272 backtrackPatternCharacterNonGreedy(opIndex);
1273 break;
1274 }
1275 break;
1276
1277 case PatternTerm::TypeCharacterClass:
1278 switch (term->quantityType) {
1279 case QuantifierFixedCount:
1280 if (term->quantityCount == 1)
1281 backtrackCharacterClassOnce(opIndex);
1282 else
1283 backtrackCharacterClassFixed(opIndex);
1284 break;
1285 case QuantifierGreedy:
1286 backtrackCharacterClassGreedy(opIndex);
1287 break;
1288 case QuantifierNonGreedy:
1289 backtrackCharacterClassNonGreedy(opIndex);
1290 break;
1291 }
1292 break;
1293
1294 case PatternTerm::TypeAssertionBOL:
1295 backtrackAssertionBOL(opIndex);
1296 break;
1297
1298 case PatternTerm::TypeAssertionEOL:
1299 backtrackAssertionEOL(opIndex);
1300 break;
1301
1302 case PatternTerm::TypeAssertionWordBoundary:
1303 backtrackAssertionWordBoundary(opIndex);
1304 break;
1305
1306 case PatternTerm::TypeForwardReference:
1307 break;
1308
1309 case PatternTerm::TypeParenthesesSubpattern:
1310 case PatternTerm::TypeParentheticalAssertion:
1311 ASSERT_NOT_REACHED();
1312
1313 case PatternTerm::TypeDotStarEnclosure:
1314 backtrackDotStarEnclosure(opIndex);
1315 break;
1316
1317 case PatternTerm::TypeBackReference:
1318 m_shouldFallBack = true;
1319 break;
1320 }
1321 }
1322
1323 void generate()
1324 {
1325 // Forwards generate the matching code.
1326 ASSERT(m_ops.size());
1327 size_t opIndex = 0;
1328
1329 do {
1330 YarrOp& op = m_ops[opIndex];
1331 switch (op.m_op) {
1332
1333 case OpTerm:
1334 generateTerm(opIndex);
1335 break;
1336
1337 // OpBodyAlternativeBegin/Next/End
1338 //
1339 // These nodes wrap the set of alternatives in the body of the regular expression.
1340 // There may be either one or two chains of OpBodyAlternative nodes, one representing
1341 // the 'once through' sequence of alternatives (if any exist), and one representing
1342 // the repeating alternatives (again, if any exist).
1343 //
1344 // Upon normal entry to the Begin alternative, we will check that input is available.
1345 // Reentry to the Begin alternative will take place after the check has taken place,
1346 // and will assume that the input position has already been progressed as appropriate.
1347 //
1348 // Entry to subsequent Next/End alternatives occurs when the prior alternative has
1349 // successfully completed a match - return a success state from JIT code.
1350 //
1351 // Next alternatives allow for reentry optimized to suit backtracking from its
1352 // preceding alternative. It expects the input position to still be set to a position
1353 // appropriate to its predecessor, and it will only perform an input check if the
1354 // predecessor had a minimum size less than its own.
1355 //
1356 // In the case 'once through' expressions, the End node will also have a reentry
1357 // point to jump to when the last alternative fails. Again, this expects the input
1358 // position to still reflect that expected by the prior alternative.
1359 case OpBodyAlternativeBegin: {
1360 PatternAlternative* alternative = op.m_alternative;
1361
1362 // Upon entry at the head of the set of alternatives, check if input is available
1363 // to run the first alternative. (This progresses the input position).
1364 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
1365 // We will reenter after the check, and assume the input position to have been
1366 // set as appropriate to this alternative.
1367 op.m_reentry = label();
1368
1369 m_checked += alternative->m_minimumSize;
1370 break;
1371 }
1372 case OpBodyAlternativeNext:
1373 case OpBodyAlternativeEnd: {
1374 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1375 PatternAlternative* alternative = op.m_alternative;
1376
1377 // If we get here, the prior alternative matched - return success.
1378
1379 // Adjust the stack pointer to remove the pattern's frame.
6fe7ccc8 1380 removeCallFrame();
14957cd0
A
1381
1382 // Load appropriate values into the return register and the first output
1383 // slot, and return. In the case of pattern with a fixed size, we will
1384 // not have yet set the value in the first
1385 ASSERT(index != returnRegister);
1386 if (m_pattern.m_body->m_hasFixedSize) {
1387 move(index, returnRegister);
1388 if (priorAlternative->m_minimumSize)
1389 sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
6fe7ccc8
A
1390 if (compileMode == IncludeSubpatterns)
1391 store32(returnRegister, output);
14957cd0 1392 } else
6fe7ccc8
A
1393 getMatchStart(returnRegister);
1394 if (compileMode == IncludeSubpatterns)
1395 store32(index, Address(output, 4));
1396 move(index, returnRegister2);
1397
14957cd0
A
1398 generateReturn();
1399
1400 // This is the divide between the tail of the prior alternative, above, and
1401 // the head of the subsequent alternative, below.
1402
1403 if (op.m_op == OpBodyAlternativeNext) {
1404 // This is the reentry point for the Next alternative. We expect any code
1405 // that jumps here to do so with the input position matching that of the
1406 // PRIOR alteranative, and we will only check input availability if we
1407 // need to progress it forwards.
1408 op.m_reentry = label();
1409 if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
1410 add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
1411 op.m_jumps.append(jumpIfNoAvailableInput());
1412 } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
1413 sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
1414 } else if (op.m_nextOp == notFound) {
1415 // This is the reentry point for the End of 'once through' alternatives,
6fe7ccc8 1416 // jumped to when the last alternative fails to match.
14957cd0
A
1417 op.m_reentry = label();
1418 sub32(Imm32(priorAlternative->m_minimumSize), index);
1419 }
1420
1421 if (op.m_op == OpBodyAlternativeNext)
1422 m_checked += alternative->m_minimumSize;
1423 m_checked -= priorAlternative->m_minimumSize;
1424 break;
1425 }
1426
1427 // OpSimpleNestedAlternativeBegin/Next/End
1428 // OpNestedAlternativeBegin/Next/End
1429 //
1430 // These nodes are used to handle sets of alternatives that are nested within
1431 // subpatterns and parenthetical assertions. The 'simple' forms are used where
1432 // we do not need to be able to backtrack back into any alternative other than
1433 // the last, the normal forms allow backtracking into any alternative.
1434 //
1435 // Each Begin/Next node is responsible for planting an input check to ensure
1436 // sufficient input is available on entry. Next nodes additionally need to
1437 // jump to the end - Next nodes use the End node's m_jumps list to hold this
1438 // set of jumps.
1439 //
1440 // In the non-simple forms, successful alternative matches must store a
1441 // 'return address' using a DataLabelPtr, used to store the address to jump
1442 // to when backtracking, to get to the code for the appropriate alternative.
1443 case OpSimpleNestedAlternativeBegin:
1444 case OpNestedAlternativeBegin: {
1445 PatternTerm* term = op.m_term;
1446 PatternAlternative* alternative = op.m_alternative;
1447 PatternDisjunction* disjunction = term->parentheses.disjunction;
1448
1449 // Calculate how much input we need to check for, and if non-zero check.
1450 op.m_checkAdjust = alternative->m_minimumSize;
1451 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1452 op.m_checkAdjust -= disjunction->m_minimumSize;
1453 if (op.m_checkAdjust)
1454 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
6fe7ccc8 1455
14957cd0
A
1456 m_checked += op.m_checkAdjust;
1457 break;
1458 }
1459 case OpSimpleNestedAlternativeNext:
1460 case OpNestedAlternativeNext: {
1461 PatternTerm* term = op.m_term;
1462 PatternAlternative* alternative = op.m_alternative;
1463 PatternDisjunction* disjunction = term->parentheses.disjunction;
1464
1465 // In the non-simple case, store a 'return address' so we can backtrack correctly.
1466 if (op.m_op == OpNestedAlternativeNext) {
1467 unsigned parenthesesFrameLocation = term->frameLocation;
1468 unsigned alternativeFrameLocation = parenthesesFrameLocation;
1469 if (term->quantityType != QuantifierFixedCount)
1470 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1471 op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1472 }
1473
6fe7ccc8
A
1474 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
1475 // If the previous alternative matched without consuming characters then
1476 // backtrack to try to match while consumming some input.
1477 op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
1478 }
1479
14957cd0
A
1480 // If we reach here then the last alternative has matched - jump to the
1481 // End node, to skip over any further alternatives.
1482 //
1483 // FIXME: this is logically O(N^2) (though N can be expected to be very
1484 // small). We could avoid this either by adding an extra jump to the JIT
1485 // data structures, or by making backtracking code that jumps to Next
1486 // alternatives are responsible for checking that input is available (if
1487 // we didn't need to plant the input checks, then m_jumps would be free).
1488 YarrOp* endOp = &m_ops[op.m_nextOp];
1489 while (endOp->m_nextOp != notFound) {
1490 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
1491 endOp = &m_ops[endOp->m_nextOp];
1492 }
1493 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
1494 endOp->m_jumps.append(jump());
1495
1496 // This is the entry point for the next alternative.
1497 op.m_reentry = label();
1498
1499 // Calculate how much input we need to check for, and if non-zero check.
1500 op.m_checkAdjust = alternative->m_minimumSize;
1501 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1502 op.m_checkAdjust -= disjunction->m_minimumSize;
1503 if (op.m_checkAdjust)
1504 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
1505
1506 YarrOp& lastOp = m_ops[op.m_previousOp];
1507 m_checked -= lastOp.m_checkAdjust;
1508 m_checked += op.m_checkAdjust;
1509 break;
1510 }
1511 case OpSimpleNestedAlternativeEnd:
1512 case OpNestedAlternativeEnd: {
1513 PatternTerm* term = op.m_term;
1514
1515 // In the non-simple case, store a 'return address' so we can backtrack correctly.
1516 if (op.m_op == OpNestedAlternativeEnd) {
1517 unsigned parenthesesFrameLocation = term->frameLocation;
1518 unsigned alternativeFrameLocation = parenthesesFrameLocation;
1519 if (term->quantityType != QuantifierFixedCount)
1520 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1521 op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1522 }
1523
6fe7ccc8
A
1524 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
1525 // If the previous alternative matched without consuming characters then
1526 // backtrack to try to match while consumming some input.
1527 op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
1528 }
1529
14957cd0
A
1530 // If this set of alternatives contains more than one alternative,
1531 // then the Next nodes will have planted jumps to the End, and added
1532 // them to this node's m_jumps list.
1533 op.m_jumps.link(this);
1534 op.m_jumps.clear();
1535
1536 YarrOp& lastOp = m_ops[op.m_previousOp];
1537 m_checked -= lastOp.m_checkAdjust;
1538 break;
1539 }
1540
1541 // OpParenthesesSubpatternOnceBegin/End
1542 //
1543 // These nodes support (optionally) capturing subpatterns, that have a
1544 // quantity count of 1 (this covers fixed once, and ?/?? quantifiers).
1545 case OpParenthesesSubpatternOnceBegin: {
1546 PatternTerm* term = op.m_term;
1547 unsigned parenthesesFrameLocation = term->frameLocation;
1548 const RegisterID indexTemporary = regT0;
1549 ASSERT(term->quantityCount == 1);
1550
1551 // Upon entry to a Greedy quantified set of parenthese store the index.
1552 // We'll use this for two purposes:
1553 // - To indicate which iteration we are on of mathing the remainder of
1554 // the expression after the parentheses - the first, including the
1555 // match within the parentheses, or the second having skipped over them.
1556 // - To check for empty matches, which must be rejected.
1557 //
1558 // At the head of a NonGreedy set of parentheses we'll immediately set the
1559 // value on the stack to -1 (indicating a match skipping the subpattern),
1560 // and plant a jump to the end. We'll also plant a label to backtrack to
1561 // to reenter the subpattern later, with a store to set up index on the
1562 // second iteration.
1563 //
1564 // FIXME: for capturing parens, could use the index in the capture array?
1565 if (term->quantityType == QuantifierGreedy)
1566 storeToFrame(index, parenthesesFrameLocation);
1567 else if (term->quantityType == QuantifierNonGreedy) {
1568 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
1569 op.m_jumps.append(jump());
1570 op.m_reentry = label();
1571 storeToFrame(index, parenthesesFrameLocation);
1572 }
1573
1574 // If the parenthese are capturing, store the starting index value to the
1575 // captures array, offsetting as necessary.
1576 //
1577 // FIXME: could avoid offsetting this value in JIT code, apply
1578 // offsets only afterwards, at the point the results array is
1579 // being accessed.
6fe7ccc8 1580 if (term->capture() && compileMode == IncludeSubpatterns) {
14957cd0
A
1581 int inputOffset = term->inputPosition - m_checked;
1582 if (term->quantityType == QuantifierFixedCount)
1583 inputOffset -= term->parentheses.disjunction->m_minimumSize;
1584 if (inputOffset) {
1585 move(index, indexTemporary);
1586 add32(Imm32(inputOffset), indexTemporary);
6fe7ccc8 1587 setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
14957cd0 1588 } else
6fe7ccc8 1589 setSubpatternStart(index, term->parentheses.subpatternId);
14957cd0
A
1590 }
1591 break;
1592 }
1593 case OpParenthesesSubpatternOnceEnd: {
1594 PatternTerm* term = op.m_term;
14957cd0
A
1595 const RegisterID indexTemporary = regT0;
1596 ASSERT(term->quantityCount == 1);
1597
6fe7ccc8
A
1598#ifndef NDEBUG
1599 // Runtime ASSERT to make sure that the nested alternative handled the
1600 // "no input consumed" check.
1601 if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
1602 Jump pastBreakpoint;
1603 pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
1604 breakpoint();
1605 pastBreakpoint.link(this);
1606 }
1607#endif
14957cd0
A
1608
1609 // If the parenthese are capturing, store the ending index value to the
1610 // captures array, offsetting as necessary.
1611 //
1612 // FIXME: could avoid offsetting this value in JIT code, apply
1613 // offsets only afterwards, at the point the results array is
1614 // being accessed.
6fe7ccc8 1615 if (term->capture() && compileMode == IncludeSubpatterns) {
14957cd0
A
1616 int inputOffset = term->inputPosition - m_checked;
1617 if (inputOffset) {
1618 move(index, indexTemporary);
1619 add32(Imm32(inputOffset), indexTemporary);
6fe7ccc8 1620 setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
14957cd0 1621 } else
6fe7ccc8 1622 setSubpatternEnd(index, term->parentheses.subpatternId);
14957cd0
A
1623 }
1624
1625 // If the parentheses are quantified Greedy then add a label to jump back
1626 // to if get a failed match from after the parentheses. For NonGreedy
1627 // parentheses, link the jump from before the subpattern to here.
1628 if (term->quantityType == QuantifierGreedy)
1629 op.m_reentry = label();
1630 else if (term->quantityType == QuantifierNonGreedy) {
1631 YarrOp& beginOp = m_ops[op.m_previousOp];
1632 beginOp.m_jumps.link(this);
1633 }
1634 break;
1635 }
1636
1637 // OpParenthesesSubpatternTerminalBegin/End
1638 case OpParenthesesSubpatternTerminalBegin: {
1639 PatternTerm* term = op.m_term;
1640 ASSERT(term->quantityType == QuantifierGreedy);
1641 ASSERT(term->quantityCount == quantifyInfinite);
1642 ASSERT(!term->capture());
1643
1644 // Upon entry set a label to loop back to.
1645 op.m_reentry = label();
1646
1647 // Store the start index of the current match; we need to reject zero
1648 // length matches.
1649 storeToFrame(index, term->frameLocation);
1650 break;
1651 }
1652 case OpParenthesesSubpatternTerminalEnd: {
6fe7ccc8
A
1653 YarrOp& beginOp = m_ops[op.m_previousOp];
1654#ifndef NDEBUG
14957cd0
A
1655 PatternTerm* term = op.m_term;
1656
6fe7ccc8
A
1657 // Runtime ASSERT to make sure that the nested alternative handled the
1658 // "no input consumed" check.
1659 Jump pastBreakpoint;
1660 pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
1661 breakpoint();
1662 pastBreakpoint.link(this);
1663#endif
14957cd0 1664
6fe7ccc8
A
1665 // We know that the match is non-zero, we can accept it and
1666 // loop back up to the head of the subpattern.
1667 jump(beginOp.m_reentry);
14957cd0
A
1668
1669 // This is the entry point to jump to when we stop matching - we will
1670 // do so once the subpattern cannot match any more.
1671 op.m_reentry = label();
1672 break;
1673 }
1674
1675 // OpParentheticalAssertionBegin/End
1676 case OpParentheticalAssertionBegin: {
1677 PatternTerm* term = op.m_term;
1678
1679 // Store the current index - assertions should not update index, so
1680 // we will need to restore it upon a successful match.
1681 unsigned parenthesesFrameLocation = term->frameLocation;
1682 storeToFrame(index, parenthesesFrameLocation);
1683
1684 // Check
1685 op.m_checkAdjust = m_checked - term->inputPosition;
1686 if (op.m_checkAdjust)
1687 sub32(Imm32(op.m_checkAdjust), index);
1688
1689 m_checked -= op.m_checkAdjust;
1690 break;
1691 }
1692 case OpParentheticalAssertionEnd: {
1693 PatternTerm* term = op.m_term;
1694
1695 // Restore the input index value.
1696 unsigned parenthesesFrameLocation = term->frameLocation;
1697 loadFromFrame(parenthesesFrameLocation, index);
1698
1699 // If inverted, a successful match of the assertion must be treated
1700 // as a failure, so jump to backtracking.
1701 if (term->invert()) {
1702 op.m_jumps.append(jump());
1703 op.m_reentry = label();
1704 }
1705
1706 YarrOp& lastOp = m_ops[op.m_previousOp];
1707 m_checked += lastOp.m_checkAdjust;
1708 break;
1709 }
1710
1711 case OpMatchFailed:
6fe7ccc8
A
1712 removeCallFrame();
1713 move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
1714 move(TrustedImm32(0), returnRegister2);
14957cd0
A
1715 generateReturn();
1716 break;
1717 }
1718
1719 ++opIndex;
1720 } while (opIndex < m_ops.size());
1721 }
1722
1723 void backtrack()
1724 {
1725 // Backwards generate the backtracking code.
1726 size_t opIndex = m_ops.size();
1727 ASSERT(opIndex);
1728
1729 do {
1730 --opIndex;
1731 YarrOp& op = m_ops[opIndex];
1732 switch (op.m_op) {
1733
1734 case OpTerm:
1735 backtrackTerm(opIndex);
1736 break;
1737
1738 // OpBodyAlternativeBegin/Next/End
1739 //
1740 // For each Begin/Next node representing an alternative, we need to decide what to do
1741 // in two circumstances:
1742 // - If we backtrack back into this node, from within the alternative.
1743 // - If the input check at the head of the alternative fails (if this exists).
1744 //
1745 // We treat these two cases differently since in the former case we have slightly
1746 // more information - since we are backtracking out of a prior alternative we know
1747 // that at least enough input was available to run it. For example, given the regular
1748 // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
1749 // character match of 'a'), then we need not perform an additional input availability
1750 // check before running the second alternative.
1751 //
1752 // Backtracking required differs for the last alternative, which in the case of the
1753 // repeating set of alternatives must loop. The code generated for the last alternative
1754 // will also be used to handle all input check failures from any prior alternatives -
1755 // these require similar functionality, in seeking the next available alternative for
1756 // which there is sufficient input.
1757 //
1758 // Since backtracking of all other alternatives simply requires us to link backtracks
1759 // to the reentry point for the subsequent alternative, we will only be generating any
1760 // code when backtracking the last alternative.
1761 case OpBodyAlternativeBegin:
1762 case OpBodyAlternativeNext: {
1763 PatternAlternative* alternative = op.m_alternative;
1764
1765 if (op.m_op == OpBodyAlternativeNext) {
1766 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1767 m_checked += priorAlternative->m_minimumSize;
1768 }
1769 m_checked -= alternative->m_minimumSize;
1770
1771 // Is this the last alternative? If not, then if we backtrack to this point we just
1772 // need to jump to try to match the next alternative.
1773 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
1774 m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
1775 break;
1776 }
1777 YarrOp& endOp = m_ops[op.m_nextOp];
1778
1779 YarrOp* beginOp = &op;
1780 while (beginOp->m_op != OpBodyAlternativeBegin) {
1781 ASSERT(beginOp->m_op == OpBodyAlternativeNext);
1782 beginOp = &m_ops[beginOp->m_previousOp];
1783 }
1784
1785 bool onceThrough = endOp.m_nextOp == notFound;
1786
1787 // First, generate code to handle cases where we backtrack out of an attempted match
1788 // of the last alternative. If this is a 'once through' set of alternatives then we
1789 // have nothing to do - link this straight through to the End.
1790 if (onceThrough)
1791 m_backtrackingState.linkTo(endOp.m_reentry, this);
1792 else {
1793 // If we don't need to move the input poistion, and the pattern has a fixed size
1794 // (in which case we omit the store of the start index until the pattern has matched)
1795 // then we can just link the backtrack out of the last alternative straight to the
1796 // head of the first alternative.
1797 if (m_pattern.m_body->m_hasFixedSize
1798 && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
1799 && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
1800 m_backtrackingState.linkTo(beginOp->m_reentry, this);
1801 else {
1802 // We need to generate a trampoline of code to execute before looping back
1803 // around to the first alternative.
1804 m_backtrackingState.link(this);
1805
1806 // If the pattern size is not fixed, then store the start index, for use if we match.
1807 if (!m_pattern.m_body->m_hasFixedSize) {
1808 if (alternative->m_minimumSize == 1)
6fe7ccc8 1809 setMatchStart(index);
14957cd0
A
1810 else {
1811 move(index, regT0);
1812 if (alternative->m_minimumSize)
1813 sub32(Imm32(alternative->m_minimumSize - 1), regT0);
1814 else
6fe7ccc8
A
1815 add32(TrustedImm32(1), regT0);
1816 setMatchStart(regT0);
14957cd0
A
1817 }
1818 }
1819
1820 // Generate code to loop. Check whether the last alternative is longer than the
1821 // first (e.g. /a|xy/ or /a|xyz/).
1822 if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
1823 // We want to loop, and increment input position. If the delta is 1, it is
1824 // already correctly incremented, if more than one then decrement as appropriate.
1825 unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
1826 ASSERT(delta);
1827 if (delta != 1)
1828 sub32(Imm32(delta - 1), index);
1829 jump(beginOp->m_reentry);
1830 } else {
1831 // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
1832 // be sufficent input available to handle this, so just fall through.
1833 unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
1834 if (delta != 0xFFFFFFFFu) {
1835 // We need to check input because we are incrementing the input.
1836 add32(Imm32(delta + 1), index);
1837 checkInput().linkTo(beginOp->m_reentry, this);
1838 }
1839 }
1840 }
1841 }
1842
1843 // We can reach this point in the code in two ways:
1844 // - Fallthrough from the code above (a repeating alternative backtracked out of its
1845 // last alternative, and did not have sufficent input to run the first).
1846 // - We will loop back up to the following label when a releating alternative loops,
1847 // following a failed input check.
1848 //
1849 // Either way, we have just failed the input check for the first alternative.
1850 Label firstInputCheckFailed(this);
1851
1852 // Generate code to handle input check failures from alternatives except the last.
1853 // prevOp is the alternative we're handling a bail out from (initially Begin), and
1854 // nextOp is the alternative we will be attempting to reenter into.
1855 //
1856 // We will link input check failures from the forwards matching path back to the code
1857 // that can handle them.
1858 YarrOp* prevOp = beginOp;
1859 YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
1860 while (nextOp->m_op != OpBodyAlternativeEnd) {
1861 prevOp->m_jumps.link(this);
1862
1863 // We only get here if an input check fails, it is only worth checking again
1864 // if the next alternative has a minimum size less than the last.
1865 if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
1866 // FIXME: if we added an extra label to YarrOp, we could avoid needing to
1867 // subtract delta back out, and reduce this code. Should performance test
1868 // the benefit of this.
1869 unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
1870 sub32(Imm32(delta), index);
1871 Jump fail = jumpIfNoAvailableInput();
1872 add32(Imm32(delta), index);
1873 jump(nextOp->m_reentry);
1874 fail.link(this);
1875 } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
1876 add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
1877 prevOp = nextOp;
1878 nextOp = &m_ops[nextOp->m_nextOp];
1879 }
1880
1881 // We fall through to here if there is insufficient input to run the last alternative.
1882
1883 // If there is insufficient input to run the last alternative, then for 'once through'
1884 // alternatives we are done - just jump back up into the forwards matching path at the End.
1885 if (onceThrough) {
1886 op.m_jumps.linkTo(endOp.m_reentry, this);
1887 jump(endOp.m_reentry);
1888 break;
1889 }
1890
1891 // For repeating alternatives, link any input check failure from the last alternative to
1892 // this point.
1893 op.m_jumps.link(this);
1894
1895 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
1896
1897 // Check for cases where input position is already incremented by 1 for the last
1898 // alternative (this is particularly useful where the minimum size of the body
1899 // disjunction is 0, e.g. /a*|b/).
1900 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
1901 // index is already incremented by 1, so just store it now!
6fe7ccc8 1902 setMatchStart(index);
14957cd0
A
1903 needsToUpdateMatchStart = false;
1904 }
1905
1906 // Check whether there is sufficient input to loop. Increment the input position by
1907 // one, and check. Also add in the minimum disjunction size before checking - there
1908 // is no point in looping if we're just going to fail all the input checks around
1909 // the next iteration.
1910 ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
1911 if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
1912 // If the last alternative had the same minimum size as the disjunction,
1913 // just simply increment input pos by 1, no adjustment based on minimum size.
6fe7ccc8 1914 add32(TrustedImm32(1), index);
14957cd0
A
1915 } else {
1916 // If the minumum for the last alternative was one greater than than that
1917 // for the disjunction, we're already progressed by 1, nothing to do!
1918 unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
1919 if (delta)
1920 sub32(Imm32(delta), index);
1921 }
1922 Jump matchFailed = jumpIfNoAvailableInput();
1923
1924 if (needsToUpdateMatchStart) {
1925 if (!m_pattern.m_body->m_minimumSize)
6fe7ccc8 1926 setMatchStart(index);
14957cd0
A
1927 else {
1928 move(index, regT0);
1929 sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
6fe7ccc8 1930 setMatchStart(regT0);
14957cd0
A
1931 }
1932 }
1933
1934 // Calculate how much more input the first alternative requires than the minimum
1935 // for the body as a whole. If no more is needed then we dont need an additional
1936 // input check here - jump straight back up to the start of the first alternative.
1937 if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
1938 jump(beginOp->m_reentry);
1939 else {
1940 if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
1941 add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
1942 else
1943 sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
1944 checkInput().linkTo(beginOp->m_reentry, this);
1945 jump(firstInputCheckFailed);
1946 }
1947
1948 // We jump to here if we iterate to the point that there is insufficient input to
1949 // run any matches, and need to return a failure state from JIT code.
1950 matchFailed.link(this);
1951
6fe7ccc8
A
1952 removeCallFrame();
1953 move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
1954 move(TrustedImm32(0), returnRegister2);
14957cd0
A
1955 generateReturn();
1956 break;
1957 }
1958 case OpBodyAlternativeEnd: {
1959 // We should never backtrack back into a body disjunction.
1960 ASSERT(m_backtrackingState.isEmpty());
1961
1962 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1963 m_checked += priorAlternative->m_minimumSize;
1964 break;
1965 }
1966
1967 // OpSimpleNestedAlternativeBegin/Next/End
1968 // OpNestedAlternativeBegin/Next/End
1969 //
1970 // Generate code for when we backtrack back out of an alternative into
1971 // a Begin or Next node, or when the entry input count check fails. If
1972 // there are more alternatives we need to jump to the next alternative,
1973 // if not we backtrack back out of the current set of parentheses.
1974 //
1975 // In the case of non-simple nested assertions we need to also link the
1976 // 'return address' appropriately to backtrack back out into the correct
1977 // alternative.
1978 case OpSimpleNestedAlternativeBegin:
1979 case OpSimpleNestedAlternativeNext:
1980 case OpNestedAlternativeBegin:
1981 case OpNestedAlternativeNext: {
1982 YarrOp& nextOp = m_ops[op.m_nextOp];
1983 bool isBegin = op.m_previousOp == notFound;
1984 bool isLastAlternative = nextOp.m_nextOp == notFound;
1985 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
1986 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
1987
1988 // Treat an input check failure the same as a failed match.
1989 m_backtrackingState.append(op.m_jumps);
1990
1991 // Set the backtracks to jump to the appropriate place. We may need
1992 // to link the backtracks in one of three different way depending on
1993 // the type of alternative we are dealing with:
1994 // - A single alternative, with no simplings.
1995 // - The last alternative of a set of two or more.
1996 // - An alternative other than the last of a set of two or more.
1997 //
1998 // In the case of a single alternative on its own, we don't need to
1999 // jump anywhere - if the alternative fails to match we can just
2000 // continue to backtrack out of the parentheses without jumping.
2001 //
2002 // In the case of the last alternative in a set of more than one, we
2003 // need to jump to return back out to the beginning. We'll do so by
2004 // adding a jump to the End node's m_jumps list, and linking this
2005 // when we come to generate the Begin node. For alternatives other
2006 // than the last, we need to jump to the next alternative.
2007 //
2008 // If the alternative had adjusted the input position we must link
2009 // backtracking to here, correct, and then jump on. If not we can
2010 // link the backtracks directly to their destination.
2011 if (op.m_checkAdjust) {
2012 // Handle the cases where we need to link the backtracks here.
2013 m_backtrackingState.link(this);
2014 sub32(Imm32(op.m_checkAdjust), index);
2015 if (!isLastAlternative) {
2016 // An alternative that is not the last should jump to its successor.
2017 jump(nextOp.m_reentry);
2018 } else if (!isBegin) {
6fe7ccc8 2019 // The last of more than one alternatives must jump back to the beginning.
14957cd0
A
2020 nextOp.m_jumps.append(jump());
2021 } else {
2022 // A single alternative on its own can fall through.
2023 m_backtrackingState.fallthrough();
2024 }
2025 } else {
2026 // Handle the cases where we can link the backtracks directly to their destinations.
2027 if (!isLastAlternative) {
2028 // An alternative that is not the last should jump to its successor.
2029 m_backtrackingState.linkTo(nextOp.m_reentry, this);
2030 } else if (!isBegin) {
6fe7ccc8 2031 // The last of more than one alternatives must jump back to the beginning.
14957cd0
A
2032 m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
2033 }
2034 // In the case of a single alternative on its own do nothing - it can fall through.
2035 }
2036
6fe7ccc8
A
2037 // If there is a backtrack jump from a zero length match link it here.
2038 if (op.m_zeroLengthMatch.isSet())
2039 m_backtrackingState.append(op.m_zeroLengthMatch);
2040
14957cd0
A
2041 // At this point we've handled the backtracking back into this node.
2042 // Now link any backtracks that need to jump to here.
2043
2044 // For non-simple alternatives, link the alternative's 'return address'
2045 // so that we backtrack back out into the previous alternative.
2046 if (op.m_op == OpNestedAlternativeNext)
2047 m_backtrackingState.append(op.m_returnAddress);
2048
2049 // If there is more than one alternative, then the last alternative will
2050 // have planted a jump to be linked to the end. This jump was added to the
2051 // End node's m_jumps list. If we are back at the beginning, link it here.
2052 if (isBegin) {
2053 YarrOp* endOp = &m_ops[op.m_nextOp];
2054 while (endOp->m_nextOp != notFound) {
2055 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
2056 endOp = &m_ops[endOp->m_nextOp];
2057 }
2058 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
2059 m_backtrackingState.append(endOp->m_jumps);
2060 }
2061
2062 if (!isBegin) {
2063 YarrOp& lastOp = m_ops[op.m_previousOp];
2064 m_checked += lastOp.m_checkAdjust;
2065 }
2066 m_checked -= op.m_checkAdjust;
2067 break;
2068 }
2069 case OpSimpleNestedAlternativeEnd:
2070 case OpNestedAlternativeEnd: {
2071 PatternTerm* term = op.m_term;
2072
6fe7ccc8
A
2073 // If there is a backtrack jump from a zero length match link it here.
2074 if (op.m_zeroLengthMatch.isSet())
2075 m_backtrackingState.append(op.m_zeroLengthMatch);
2076
14957cd0
A
2077 // If we backtrack into the end of a simple subpattern do nothing;
2078 // just continue through into the last alternative. If we backtrack
2079 // into the end of a non-simple set of alterntives we need to jump
2080 // to the backtracking return address set up during generation.
2081 if (op.m_op == OpNestedAlternativeEnd) {
2082 m_backtrackingState.link(this);
2083
2084 // Plant a jump to the return address.
2085 unsigned parenthesesFrameLocation = term->frameLocation;
2086 unsigned alternativeFrameLocation = parenthesesFrameLocation;
2087 if (term->quantityType != QuantifierFixedCount)
2088 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
2089 loadFromFrameAndJump(alternativeFrameLocation);
2090
2091 // Link the DataLabelPtr associated with the end of the last
2092 // alternative to this point.
2093 m_backtrackingState.append(op.m_returnAddress);
2094 }
2095
2096 YarrOp& lastOp = m_ops[op.m_previousOp];
2097 m_checked += lastOp.m_checkAdjust;
2098 break;
2099 }
2100
2101 // OpParenthesesSubpatternOnceBegin/End
2102 //
2103 // When we are backtracking back out of a capturing subpattern we need
2104 // to clear the start index in the matches output array, to record that
2105 // this subpattern has not been captured.
2106 //
2107 // When backtracking back out of a Greedy quantified subpattern we need
2108 // to catch this, and try running the remainder of the alternative after
2109 // the subpattern again, skipping the parentheses.
2110 //
2111 // Upon backtracking back into a quantified set of parentheses we need to
2112 // check whether we were currently skipping the subpattern. If not, we
2113 // can backtrack into them, if we were we need to either backtrack back
2114 // out of the start of the parentheses, or jump back to the forwards
2115 // matching start, depending of whether the match is Greedy or NonGreedy.
2116 case OpParenthesesSubpatternOnceBegin: {
2117 PatternTerm* term = op.m_term;
2118 ASSERT(term->quantityCount == 1);
2119
2120 // We only need to backtrack to thispoint if capturing or greedy.
6fe7ccc8 2121 if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) {
14957cd0
A
2122 m_backtrackingState.link(this);
2123
2124 // If capturing, clear the capture (we only need to reset start).
6fe7ccc8
A
2125 if (term->capture() && compileMode == IncludeSubpatterns)
2126 clearSubpatternStart(term->parentheses.subpatternId);
14957cd0
A
2127
2128 // If Greedy, jump to the end.
2129 if (term->quantityType == QuantifierGreedy) {
2130 // Clear the flag in the stackframe indicating we ran through the subpattern.
2131 unsigned parenthesesFrameLocation = term->frameLocation;
2132 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
2133 // Jump to after the parentheses, skipping the subpattern.
2134 jump(m_ops[op.m_nextOp].m_reentry);
2135 // A backtrack from after the parentheses, when skipping the subpattern,
2136 // will jump back to here.
2137 op.m_jumps.link(this);
2138 }
2139
2140 m_backtrackingState.fallthrough();
2141 }
2142 break;
2143 }
2144 case OpParenthesesSubpatternOnceEnd: {
2145 PatternTerm* term = op.m_term;
2146
2147 if (term->quantityType != QuantifierFixedCount) {
2148 m_backtrackingState.link(this);
2149
2150 // Check whether we should backtrack back into the parentheses, or if we
2151 // are currently in a state where we had skipped over the subpattern
2152 // (in which case the flag value on the stack will be -1).
2153 unsigned parenthesesFrameLocation = term->frameLocation;
2154 Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1));
2155
2156 if (term->quantityType == QuantifierGreedy) {
2157 // For Greedy parentheses, we skip after having already tried going
2158 // through the subpattern, so if we get here we're done.
2159 YarrOp& beginOp = m_ops[op.m_previousOp];
2160 beginOp.m_jumps.append(hadSkipped);
2161 } else {
2162 // For NonGreedy parentheses, we try skipping the subpattern first,
2163 // so if we get here we need to try running through the subpattern
2164 // next. Jump back to the start of the parentheses in the forwards
2165 // matching path.
2166 ASSERT(term->quantityType == QuantifierNonGreedy);
2167 YarrOp& beginOp = m_ops[op.m_previousOp];
2168 hadSkipped.linkTo(beginOp.m_reentry, this);
2169 }
2170
2171 m_backtrackingState.fallthrough();
2172 }
2173
2174 m_backtrackingState.append(op.m_jumps);
2175 break;
2176 }
2177
2178 // OpParenthesesSubpatternTerminalBegin/End
2179 //
2180 // Terminal subpatterns will always match - there is nothing after them to
2181 // force a backtrack, and they have a minimum count of 0, and as such will
2182 // always produce an acceptable result.
2183 case OpParenthesesSubpatternTerminalBegin: {
2184 // We will backtrack to this point once the subpattern cannot match any
2185 // more. Since no match is accepted as a successful match (we are Greedy
2186 // quantified with a minimum of zero) jump back to the forwards matching
2187 // path at the end.
2188 YarrOp& endOp = m_ops[op.m_nextOp];
2189 m_backtrackingState.linkTo(endOp.m_reentry, this);
2190 break;
2191 }
2192 case OpParenthesesSubpatternTerminalEnd:
2193 // We should never be backtracking to here (hence the 'terminal' in the name).
2194 ASSERT(m_backtrackingState.isEmpty());
2195 m_backtrackingState.append(op.m_jumps);
2196 break;
2197
2198 // OpParentheticalAssertionBegin/End
2199 case OpParentheticalAssertionBegin: {
2200 PatternTerm* term = op.m_term;
2201 YarrOp& endOp = m_ops[op.m_nextOp];
2202
2203 // We need to handle the backtracks upon backtracking back out
2204 // of a parenthetical assertion if either we need to correct
2205 // the input index, or the assertion was inverted.
2206 if (op.m_checkAdjust || term->invert()) {
2207 m_backtrackingState.link(this);
2208
2209 if (op.m_checkAdjust)
2210 add32(Imm32(op.m_checkAdjust), index);
2211
2212 // In an inverted assertion failure to match the subpattern
2213 // is treated as a successful match - jump to the end of the
2214 // subpattern. We already have adjusted the input position
2215 // back to that before the assertion, which is correct.
2216 if (term->invert())
2217 jump(endOp.m_reentry);
2218
2219 m_backtrackingState.fallthrough();
2220 }
2221
2222 // The End node's jump list will contain any backtracks into
2223 // the end of the assertion. Also, if inverted, we will have
2224 // added the failure caused by a successful match to this.
2225 m_backtrackingState.append(endOp.m_jumps);
2226
2227 m_checked += op.m_checkAdjust;
2228 break;
2229 }
2230 case OpParentheticalAssertionEnd: {
2231 // FIXME: We should really be clearing any nested subpattern
2232 // matches on bailing out from after the pattern. Firefox has
2233 // this bug too (presumably because they use YARR!)
2234
2235 // Never backtrack into an assertion; later failures bail to before the begin.
2236 m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
2237
2238 YarrOp& lastOp = m_ops[op.m_previousOp];
2239 m_checked -= lastOp.m_checkAdjust;
2240 break;
2241 }
2242
2243 case OpMatchFailed:
2244 break;
2245 }
2246
2247 } while (opIndex);
2248 }
2249
2250 // Compilation methods:
2251 // ====================
2252
2253 // opCompileParenthesesSubpattern
2254 // Emits ops for a subpattern (set of parentheses). These consist
2255 // of a set of alternatives wrapped in an outer set of nodes for
2256 // the parentheses.
2257 // Supported types of parentheses are 'Once' (quantityCount == 1)
2258 // and 'Terminal' (non-capturing parentheses quantified as greedy
2259 // and infinite).
2260 // Alternatives will use the 'Simple' set of ops if either the
2261 // subpattern is terminal (in which case we will never need to
2262 // backtrack), or if the subpattern only contains one alternative.
2263 void opCompileParenthesesSubpattern(PatternTerm* term)
2264 {
2265 YarrOpCode parenthesesBeginOpCode;
2266 YarrOpCode parenthesesEndOpCode;
2267 YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
2268 YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
2269 YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
2270
2271 // We can currently only compile quantity 1 subpatterns that are
2272 // not copies. We generate a copy in the case of a range quantifier,
2273 // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
2274 // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
2275 // comes where the subpattern is capturing, in which case we would
2276 // need to restore the capture from the first subpattern upon a
2277 // failure in the second.
2278 if (term->quantityCount == 1 && !term->parentheses.isCopy) {
2279 // Select the 'Once' nodes.
2280 parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
2281 parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
2282
2283 // If there is more than one alternative we cannot use the 'simple' nodes.
2284 if (term->parentheses.disjunction->m_alternatives.size() != 1) {
2285 alternativeBeginOpCode = OpNestedAlternativeBegin;
2286 alternativeNextOpCode = OpNestedAlternativeNext;
2287 alternativeEndOpCode = OpNestedAlternativeEnd;
2288 }
2289 } else if (term->parentheses.isTerminal) {
2290 // Select the 'Terminal' nodes.
2291 parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
2292 parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
2293 } else {
2294 // This subpattern is not supported by the JIT.
2295 m_shouldFallBack = true;
2296 return;
2297 }
2298
2299 size_t parenBegin = m_ops.size();
2300 m_ops.append(parenthesesBeginOpCode);
2301
2302 m_ops.append(alternativeBeginOpCode);
2303 m_ops.last().m_previousOp = notFound;
2304 m_ops.last().m_term = term;
2305 Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives;
2306 for (unsigned i = 0; i < alternatives.size(); ++i) {
2307 size_t lastOpIndex = m_ops.size() - 1;
2308
2309 PatternAlternative* nestedAlternative = alternatives[i];
2310 opCompileAlternative(nestedAlternative);
2311
2312 size_t thisOpIndex = m_ops.size();
2313 m_ops.append(YarrOp(alternativeNextOpCode));
2314
2315 YarrOp& lastOp = m_ops[lastOpIndex];
2316 YarrOp& thisOp = m_ops[thisOpIndex];
2317
2318 lastOp.m_alternative = nestedAlternative;
2319 lastOp.m_nextOp = thisOpIndex;
2320 thisOp.m_previousOp = lastOpIndex;
2321 thisOp.m_term = term;
2322 }
2323 YarrOp& lastOp = m_ops.last();
2324 ASSERT(lastOp.m_op == alternativeNextOpCode);
2325 lastOp.m_op = alternativeEndOpCode;
2326 lastOp.m_alternative = 0;
2327 lastOp.m_nextOp = notFound;
2328
2329 size_t parenEnd = m_ops.size();
2330 m_ops.append(parenthesesEndOpCode);
2331
2332 m_ops[parenBegin].m_term = term;
2333 m_ops[parenBegin].m_previousOp = notFound;
2334 m_ops[parenBegin].m_nextOp = parenEnd;
2335 m_ops[parenEnd].m_term = term;
2336 m_ops[parenEnd].m_previousOp = parenBegin;
2337 m_ops[parenEnd].m_nextOp = notFound;
2338 }
2339
2340 // opCompileParentheticalAssertion
2341 // Emits ops for a parenthetical assertion. These consist of an
2342 // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
2343 // the alternatives, with these wrapped by an outer pair of
2344 // OpParentheticalAssertionBegin/End nodes.
2345 // We can always use the OpSimpleNestedAlternative nodes in the
2346 // case of parenthetical assertions since these only ever match
2347 // once, and will never backtrack back into the assertion.
2348 void opCompileParentheticalAssertion(PatternTerm* term)
2349 {
2350 size_t parenBegin = m_ops.size();
2351 m_ops.append(OpParentheticalAssertionBegin);
2352
2353 m_ops.append(OpSimpleNestedAlternativeBegin);
2354 m_ops.last().m_previousOp = notFound;
2355 m_ops.last().m_term = term;
2356 Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives;
2357 for (unsigned i = 0; i < alternatives.size(); ++i) {
2358 size_t lastOpIndex = m_ops.size() - 1;
2359
2360 PatternAlternative* nestedAlternative = alternatives[i];
2361 opCompileAlternative(nestedAlternative);
2362
2363 size_t thisOpIndex = m_ops.size();
2364 m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
2365
2366 YarrOp& lastOp = m_ops[lastOpIndex];
2367 YarrOp& thisOp = m_ops[thisOpIndex];
2368
2369 lastOp.m_alternative = nestedAlternative;
2370 lastOp.m_nextOp = thisOpIndex;
2371 thisOp.m_previousOp = lastOpIndex;
2372 thisOp.m_term = term;
2373 }
2374 YarrOp& lastOp = m_ops.last();
2375 ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
2376 lastOp.m_op = OpSimpleNestedAlternativeEnd;
2377 lastOp.m_alternative = 0;
2378 lastOp.m_nextOp = notFound;
2379
2380 size_t parenEnd = m_ops.size();
2381 m_ops.append(OpParentheticalAssertionEnd);
2382
2383 m_ops[parenBegin].m_term = term;
2384 m_ops[parenBegin].m_previousOp = notFound;
2385 m_ops[parenBegin].m_nextOp = parenEnd;
2386 m_ops[parenEnd].m_term = term;
2387 m_ops[parenEnd].m_previousOp = parenBegin;
2388 m_ops[parenEnd].m_nextOp = notFound;
2389 }
2390
2391 // opCompileAlternative
2392 // Called to emit nodes for all terms in an alternative.
2393 void opCompileAlternative(PatternAlternative* alternative)
2394 {
2395 optimizeAlternative(alternative);
2396
2397 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
2398 PatternTerm* term = &alternative->m_terms[i];
2399
2400 switch (term->type) {
2401 case PatternTerm::TypeParenthesesSubpattern:
2402 opCompileParenthesesSubpattern(term);
2403 break;
2404
2405 case PatternTerm::TypeParentheticalAssertion:
2406 opCompileParentheticalAssertion(term);
2407 break;
2408
2409 default:
2410 m_ops.append(term);
2411 }
2412 }
2413 }
2414
2415 // opCompileBody
2416 // This method compiles the body disjunction of the regular expression.
2417 // The body consists of two sets of alternatives - zero or more 'once
2418 // through' (BOL anchored) alternatives, followed by zero or more
2419 // repeated alternatives.
2420 // For each of these two sets of alteratives, if not empty they will be
2421 // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
2422 // 'begin' node referencing the first alternative, and 'next' nodes
2423 // referencing any further alternatives. The begin/next/end nodes are
2424 // linked together in a doubly linked list. In the case of repeating
2425 // alternatives, the end node is also linked back to the beginning.
2426 // If no repeating alternatives exist, then a OpMatchFailed node exists
2427 // to return the failing result.
2428 void opCompileBody(PatternDisjunction* disjunction)
2429 {
2430 Vector<PatternAlternative*>& alternatives = disjunction->m_alternatives;
2431 size_t currentAlternativeIndex = 0;
2432
2433 // Emit the 'once through' alternatives.
2434 if (alternatives.size() && alternatives[0]->onceThrough()) {
2435 m_ops.append(YarrOp(OpBodyAlternativeBegin));
2436 m_ops.last().m_previousOp = notFound;
2437
2438 do {
2439 size_t lastOpIndex = m_ops.size() - 1;
2440 PatternAlternative* alternative = alternatives[currentAlternativeIndex];
2441 opCompileAlternative(alternative);
2442
2443 size_t thisOpIndex = m_ops.size();
2444 m_ops.append(YarrOp(OpBodyAlternativeNext));
2445
2446 YarrOp& lastOp = m_ops[lastOpIndex];
2447 YarrOp& thisOp = m_ops[thisOpIndex];
2448
2449 lastOp.m_alternative = alternative;
2450 lastOp.m_nextOp = thisOpIndex;
2451 thisOp.m_previousOp = lastOpIndex;
2452
2453 ++currentAlternativeIndex;
2454 } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough());
2455
2456 YarrOp& lastOp = m_ops.last();
2457
2458 ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2459 lastOp.m_op = OpBodyAlternativeEnd;
2460 lastOp.m_alternative = 0;
2461 lastOp.m_nextOp = notFound;
2462 }
2463
2464 if (currentAlternativeIndex == alternatives.size()) {
2465 m_ops.append(YarrOp(OpMatchFailed));
2466 return;
2467 }
2468
2469 // Emit the repeated alternatives.
2470 size_t repeatLoop = m_ops.size();
2471 m_ops.append(YarrOp(OpBodyAlternativeBegin));
2472 m_ops.last().m_previousOp = notFound;
2473 do {
2474 size_t lastOpIndex = m_ops.size() - 1;
2475 PatternAlternative* alternative = alternatives[currentAlternativeIndex];
2476 ASSERT(!alternative->onceThrough());
2477 opCompileAlternative(alternative);
2478
2479 size_t thisOpIndex = m_ops.size();
2480 m_ops.append(YarrOp(OpBodyAlternativeNext));
2481
2482 YarrOp& lastOp = m_ops[lastOpIndex];
2483 YarrOp& thisOp = m_ops[thisOpIndex];
2484
2485 lastOp.m_alternative = alternative;
2486 lastOp.m_nextOp = thisOpIndex;
2487 thisOp.m_previousOp = lastOpIndex;
2488
2489 ++currentAlternativeIndex;
2490 } while (currentAlternativeIndex < alternatives.size());
2491 YarrOp& lastOp = m_ops.last();
2492 ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2493 lastOp.m_op = OpBodyAlternativeEnd;
2494 lastOp.m_alternative = 0;
2495 lastOp.m_nextOp = repeatLoop;
2496 }
2497
2498 void generateEnter()
2499 {
2500#if CPU(X86_64)
2501 push(X86Registers::ebp);
2502 move(stackPointerRegister, X86Registers::ebp);
2503 push(X86Registers::ebx);
2504#elif CPU(X86)
2505 push(X86Registers::ebp);
2506 move(stackPointerRegister, X86Registers::ebp);
2507 // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
2508 push(X86Registers::ebx);
2509 push(X86Registers::edi);
2510 push(X86Registers::esi);
2511 // load output into edi (2 = saved ebp + return address).
2512 #if COMPILER(MSVC)
2513 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
2514 loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
2515 loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
6fe7ccc8
A
2516 if (compileMode == IncludeSubpatterns)
2517 loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
14957cd0 2518 #else
6fe7ccc8
A
2519 if (compileMode == IncludeSubpatterns)
2520 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
14957cd0
A
2521 #endif
2522#elif CPU(ARM)
2523 push(ARMRegisters::r4);
2524 push(ARMRegisters::r5);
2525 push(ARMRegisters::r6);
2526#if CPU(ARM_TRADITIONAL)
2527 push(ARMRegisters::r8); // scratch register
2528#endif
6fe7ccc8
A
2529 if (compileMode == IncludeSubpatterns)
2530 move(ARMRegisters::r3, output);
14957cd0
A
2531#elif CPU(SH4)
2532 push(SH4Registers::r11);
2533 push(SH4Registers::r13);
2534#elif CPU(MIPS)
2535 // Do nothing.
2536#endif
2537 }
2538
2539 void generateReturn()
2540 {
2541#if CPU(X86_64)
2542 pop(X86Registers::ebx);
2543 pop(X86Registers::ebp);
2544#elif CPU(X86)
2545 pop(X86Registers::esi);
2546 pop(X86Registers::edi);
2547 pop(X86Registers::ebx);
2548 pop(X86Registers::ebp);
2549#elif CPU(ARM)
2550#if CPU(ARM_TRADITIONAL)
2551 pop(ARMRegisters::r8); // scratch register
2552#endif
2553 pop(ARMRegisters::r6);
2554 pop(ARMRegisters::r5);
2555 pop(ARMRegisters::r4);
2556#elif CPU(SH4)
2557 pop(SH4Registers::r13);
2558 pop(SH4Registers::r11);
2559#elif CPU(MIPS)
2560 // Do nothing
2561#endif
2562 ret();
2563 }
2564
2565public:
6fe7ccc8 2566 YarrGenerator(YarrPattern& pattern, YarrCharSize charSize)
14957cd0 2567 : m_pattern(pattern)
6fe7ccc8
A
2568 , m_charSize(charSize)
2569 , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo)
14957cd0
A
2570 , m_shouldFallBack(false)
2571 , m_checked(0)
2572 {
2573 }
2574
2575 void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject)
2576 {
2577 generateEnter();
2578
6fe7ccc8
A
2579 Jump hasInput = checkInput();
2580 move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
2581 move(TrustedImm32(0), returnRegister2);
2582 generateReturn();
2583 hasInput.link(this);
2584
2585 if (compileMode == IncludeSubpatterns) {
2586 for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i)
2587 store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int)));
2588 }
2589
14957cd0 2590 if (!m_pattern.m_body->m_hasFixedSize)
6fe7ccc8 2591 setMatchStart(index);
14957cd0 2592
6fe7ccc8 2593 initCallFrame();
14957cd0
A
2594
2595 // Compile the pattern to the internal 'YarrOp' representation.
2596 opCompileBody(m_pattern.m_body);
2597
2598 // If we encountered anything we can't handle in the JIT code
2599 // (e.g. backreferences) then return early.
2600 if (m_shouldFallBack) {
2601 jitObject.setFallBack(true);
2602 return;
2603 }
2604
2605 generate();
2606 backtrack();
2607
2608 // Link & finalize the code.
6fe7ccc8 2609 LinkBuffer linkBuffer(*globalData, this, REGEXP_CODE_ID);
14957cd0 2610 m_backtrackingState.linkDataLabels(linkBuffer);
6fe7ccc8
A
2611
2612 if (compileMode == MatchOnly) {
2613 if (m_charSize == Char8)
2614 jitObject.set8BitCodeMatchOnly(linkBuffer.finalizeCode());
2615 else
2616 jitObject.set16BitCodeMatchOnly(linkBuffer.finalizeCode());
2617 } else {
2618 if (m_charSize == Char8)
2619 jitObject.set8BitCode(linkBuffer.finalizeCode());
2620 else
2621 jitObject.set16BitCode(linkBuffer.finalizeCode());
2622 }
14957cd0
A
2623 jitObject.setFallBack(m_shouldFallBack);
2624 }
2625
2626private:
2627 YarrPattern& m_pattern;
2628
6fe7ccc8
A
2629 YarrCharSize m_charSize;
2630
2631 Scale m_charScale;
2632
14957cd0
A
2633 // Used to detect regular expression constructs that are not currently
2634 // supported in the JIT; fall back to the interpreter when this is detected.
2635 bool m_shouldFallBack;
2636
2637 // The regular expression expressed as a linear sequence of operations.
2638 Vector<YarrOp, 128> m_ops;
2639
2640 // This records the current input offset being applied due to the current
2641 // set of alternatives we are nested within. E.g. when matching the
2642 // character 'b' within the regular expression /abc/, we will know that
2643 // the minimum size for the alternative is 3, checked upon entry to the
2644 // alternative, and that 'b' is at offset 1 from the start, and as such
2645 // when matching 'b' we need to apply an offset of -2 to the load.
2646 //
2647 // FIXME: This should go away. Rather than tracking this value throughout
2648 // code generation, we should gather this information up front & store it
2649 // on the YarrOp structure.
2650 int m_checked;
2651
2652 // This class records state whilst generating the backtracking path of code.
2653 BacktrackingState m_backtrackingState;
2654};
2655
6fe7ccc8 2656void jitCompile(YarrPattern& pattern, YarrCharSize charSize, JSGlobalData* globalData, YarrCodeBlock& jitObject, YarrJITCompileMode mode)
14957cd0 2657{
6fe7ccc8
A
2658 if (mode == MatchOnly)
2659 YarrGenerator<MatchOnly>(pattern, charSize).compile(globalData, jitObject);
2660 else
2661 YarrGenerator<IncludeSubpatterns>(pattern, charSize).compile(globalData, jitObject);
14957cd0
A
2662}
2663
2664}}
2665
2666#endif