2 * Copyright (C) 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include "ASCIICType.h"
30 #include "LinkBuffer.h"
37 namespace JSC
{ namespace Yarr
{
39 class YarrGenerator
: private MacroAssembler
{
40 friend void jitCompile(JSGlobalData
*, YarrCodeBlock
& jitObject
, const UString
& pattern
, unsigned& numSubpatterns
, const char*& error
, bool ignoreCase
, bool multiline
);
43 static const RegisterID input
= ARMRegisters::r0
;
44 static const RegisterID index
= ARMRegisters::r1
;
45 static const RegisterID length
= ARMRegisters::r2
;
46 static const RegisterID output
= ARMRegisters::r4
;
48 static const RegisterID regT0
= ARMRegisters::r5
;
49 static const RegisterID regT1
= ARMRegisters::r6
;
51 static const RegisterID returnRegister
= ARMRegisters::r0
;
53 static const RegisterID input
= MIPSRegisters::a0
;
54 static const RegisterID index
= MIPSRegisters::a1
;
55 static const RegisterID length
= MIPSRegisters::a2
;
56 static const RegisterID output
= MIPSRegisters::a3
;
58 static const RegisterID regT0
= MIPSRegisters::t4
;
59 static const RegisterID regT1
= MIPSRegisters::t5
;
61 static const RegisterID returnRegister
= MIPSRegisters::v0
;
63 static const RegisterID input
= SH4Registers::r4
;
64 static const RegisterID index
= SH4Registers::r5
;
65 static const RegisterID length
= SH4Registers::r6
;
66 static const RegisterID output
= SH4Registers::r7
;
68 static const RegisterID regT0
= SH4Registers::r0
;
69 static const RegisterID regT1
= SH4Registers::r1
;
71 static const RegisterID returnRegister
= SH4Registers::r0
;
73 static const RegisterID input
= X86Registers::eax
;
74 static const RegisterID index
= X86Registers::edx
;
75 static const RegisterID length
= X86Registers::ecx
;
76 static const RegisterID output
= X86Registers::edi
;
78 static const RegisterID regT0
= X86Registers::ebx
;
79 static const RegisterID regT1
= X86Registers::esi
;
81 static const RegisterID returnRegister
= X86Registers::eax
;
83 static const RegisterID input
= X86Registers::edi
;
84 static const RegisterID index
= X86Registers::esi
;
85 static const RegisterID length
= X86Registers::edx
;
86 static const RegisterID output
= X86Registers::ecx
;
88 static const RegisterID regT0
= X86Registers::eax
;
89 static const RegisterID regT1
= X86Registers::ebx
;
91 static const RegisterID returnRegister
= X86Registers::eax
;
94 void optimizeAlternative(PatternAlternative
* alternative
)
96 if (!alternative
->m_terms
.size())
99 for (unsigned i
= 0; i
< alternative
->m_terms
.size() - 1; ++i
) {
100 PatternTerm
& term
= alternative
->m_terms
[i
];
101 PatternTerm
& nextTerm
= alternative
->m_terms
[i
+ 1];
103 if ((term
.type
== PatternTerm::TypeCharacterClass
)
104 && (term
.quantityType
== QuantifierFixedCount
)
105 && (nextTerm
.type
== PatternTerm::TypePatternCharacter
)
106 && (nextTerm
.quantityType
== QuantifierFixedCount
)) {
107 PatternTerm termCopy
= term
;
108 alternative
->m_terms
[i
] = nextTerm
;
109 alternative
->m_terms
[i
+ 1] = termCopy
;
114 void matchCharacterClassRange(RegisterID character
, JumpList
& failures
, JumpList
& matchDest
, const CharacterRange
* ranges
, unsigned count
, unsigned* matchIndex
, const UChar
* matches
, unsigned matchCount
)
117 // pick which range we're going to generate
118 int which
= count
>> 1;
119 char lo
= ranges
[which
].begin
;
120 char hi
= ranges
[which
].end
;
122 // check if there are any ranges or matches below lo. If not, just jl to failure -
123 // if there is anything else to check, check that first, if it falls through jmp to failure.
124 if ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] < lo
)) {
125 Jump loOrAbove
= branch32(GreaterThanOrEqual
, character
, Imm32((unsigned short)lo
));
127 // generate code for all ranges before this one
129 matchCharacterClassRange(character
, failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
);
131 while ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] < lo
)) {
132 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)matches
[*matchIndex
])));
135 failures
.append(jump());
137 loOrAbove
.link(this);
139 Jump loOrAbove
= branch32(GreaterThanOrEqual
, character
, Imm32((unsigned short)lo
));
141 matchCharacterClassRange(character
, failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
);
142 failures
.append(jump());
144 loOrAbove
.link(this);
146 failures
.append(branch32(LessThan
, character
, Imm32((unsigned short)lo
)));
148 while ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] <= hi
))
151 matchDest
.append(branch32(LessThanOrEqual
, character
, Imm32((unsigned short)hi
)));
152 // fall through to here, the value is above hi.
154 // shuffle along & loop around if there are any more matches to handle.
155 unsigned next
= which
+ 1;
161 void matchCharacterClass(RegisterID character
, JumpList
& matchDest
, const CharacterClass
* charClass
)
163 if (charClass
->m_table
) {
164 ExtendedAddress
tableEntry(character
, reinterpret_cast<intptr_t>(charClass
->m_table
->m_table
));
165 matchDest
.append(branchTest8(charClass
->m_table
->m_inverted
? Zero
: NonZero
, tableEntry
));
169 if (charClass
->m_matchesUnicode
.size() || charClass
->m_rangesUnicode
.size()) {
170 Jump isAscii
= branch32(LessThanOrEqual
, character
, TrustedImm32(0x7f));
172 if (charClass
->m_matchesUnicode
.size()) {
173 for (unsigned i
= 0; i
< charClass
->m_matchesUnicode
.size(); ++i
) {
174 UChar ch
= charClass
->m_matchesUnicode
[i
];
175 matchDest
.append(branch32(Equal
, character
, Imm32(ch
)));
179 if (charClass
->m_rangesUnicode
.size()) {
180 for (unsigned i
= 0; i
< charClass
->m_rangesUnicode
.size(); ++i
) {
181 UChar lo
= charClass
->m_rangesUnicode
[i
].begin
;
182 UChar hi
= charClass
->m_rangesUnicode
[i
].end
;
184 Jump below
= branch32(LessThan
, character
, Imm32(lo
));
185 matchDest
.append(branch32(LessThanOrEqual
, character
, Imm32(hi
)));
190 unicodeFail
= jump();
194 if (charClass
->m_ranges
.size()) {
195 unsigned matchIndex
= 0;
197 matchCharacterClassRange(character
, failures
, matchDest
, charClass
->m_ranges
.begin(), charClass
->m_ranges
.size(), &matchIndex
, charClass
->m_matches
.begin(), charClass
->m_matches
.size());
198 while (matchIndex
< charClass
->m_matches
.size())
199 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)charClass
->m_matches
[matchIndex
++])));
202 } else if (charClass
->m_matches
.size()) {
203 // optimization: gather 'a','A' etc back together, can mask & test once.
204 Vector
<char> matchesAZaz
;
206 for (unsigned i
= 0; i
< charClass
->m_matches
.size(); ++i
) {
207 char ch
= charClass
->m_matches
[i
];
208 if (m_pattern
.m_ignoreCase
) {
209 if (isASCIILower(ch
)) {
210 matchesAZaz
.append(ch
);
213 if (isASCIIUpper(ch
))
216 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)ch
)));
219 if (unsigned countAZaz
= matchesAZaz
.size()) {
220 or32(TrustedImm32(32), character
);
221 for (unsigned i
= 0; i
< countAZaz
; ++i
)
222 matchDest
.append(branch32(Equal
, character
, TrustedImm32(matchesAZaz
[i
])));
226 if (charClass
->m_matchesUnicode
.size() || charClass
->m_rangesUnicode
.size())
227 unicodeFail
.link(this);
230 // Jumps if input not available; will have (incorrectly) incremented already!
231 Jump
jumpIfNoAvailableInput(unsigned countToCheck
= 0)
234 add32(Imm32(countToCheck
), index
);
235 return branch32(Above
, index
, length
);
238 Jump
jumpIfAvailableInput(unsigned countToCheck
)
240 add32(Imm32(countToCheck
), index
);
241 return branch32(BelowOrEqual
, index
, length
);
246 return branch32(BelowOrEqual
, index
, length
);
251 return branch32(Equal
, index
, length
);
254 Jump
notAtEndOfInput()
256 return branch32(NotEqual
, index
, length
);
259 Jump
jumpIfCharEquals(UChar ch
, int inputPosition
)
261 return branch16(Equal
, BaseIndex(input
, index
, TimesTwo
, inputPosition
* sizeof(UChar
)), Imm32(ch
));
264 Jump
jumpIfCharNotEquals(UChar ch
, int inputPosition
)
266 return branch16(NotEqual
, BaseIndex(input
, index
, TimesTwo
, inputPosition
* sizeof(UChar
)), Imm32(ch
));
269 void readCharacter(int inputPosition
, RegisterID reg
)
271 load16(BaseIndex(input
, index
, TimesTwo
, inputPosition
* sizeof(UChar
)), reg
);
274 void storeToFrame(RegisterID reg
, unsigned frameLocation
)
276 poke(reg
, frameLocation
);
279 void storeToFrame(TrustedImm32 imm
, unsigned frameLocation
)
281 poke(imm
, frameLocation
);
284 DataLabelPtr
storeToFrameWithPatch(unsigned frameLocation
)
286 return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister
, frameLocation
* sizeof(void*)));
289 void loadFromFrame(unsigned frameLocation
, RegisterID reg
)
291 peek(reg
, frameLocation
);
294 void loadFromFrameAndJump(unsigned frameLocation
)
296 jump(Address(stackPointerRegister
, frameLocation
* sizeof(void*)));
300 // These nodes wrap body alternatives - those in the main disjunction,
301 // rather than subpatterns or assertions. These are chained together in
302 // a doubly linked list, with a 'begin' node for the first alternative,
303 // a 'next' node for each subsequent alternative, and an 'end' node at
304 // the end. In the case of repeating alternatives, the 'end' node also
305 // has a reference back to 'begin'.
306 OpBodyAlternativeBegin
,
307 OpBodyAlternativeNext
,
308 OpBodyAlternativeEnd
,
309 // Similar to the body alternatives, but used for subpatterns with two
310 // or more alternatives.
311 OpNestedAlternativeBegin
,
312 OpNestedAlternativeNext
,
313 OpNestedAlternativeEnd
,
314 // Used for alternatives in subpatterns where there is only a single
315 // alternative (backtrackingis easier in these cases), or for alternatives
316 // which never need to be backtracked (those in parenthetical assertions,
317 // terminal subpatterns).
318 OpSimpleNestedAlternativeBegin
,
319 OpSimpleNestedAlternativeNext
,
320 OpSimpleNestedAlternativeEnd
,
321 // Used to wrap 'Once' subpattern matches (quantityCount == 1).
322 OpParenthesesSubpatternOnceBegin
,
323 OpParenthesesSubpatternOnceEnd
,
324 // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
325 OpParenthesesSubpatternTerminalBegin
,
326 OpParenthesesSubpatternTerminalEnd
,
327 // Used to wrap parenthetical assertions.
328 OpParentheticalAssertionBegin
,
329 OpParentheticalAssertionEnd
,
330 // Wraps all simple terms (pattern characters, character classes).
332 // Where an expression contains only 'once through' body alternatives
333 // and no repeating ones, this op is used to return match failure.
337 // This structure is used to hold the compiled opcode information,
338 // including reference back to the original PatternTerm/PatternAlternatives,
339 // and JIT compilation data structures.
341 explicit YarrOp(PatternTerm
* term
)
344 , m_isDeadCode(false)
348 explicit YarrOp(YarrOpCode op
)
350 , m_isDeadCode(false)
354 // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
358 // For alternatives, this holds the PatternAlternative and doubly linked
359 // references to this alternative's siblings. In the case of the
360 // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
361 // m_nextOp will reference the OpBodyAlternativeBegin node of the first
362 // repeating alternative.
363 PatternAlternative
* m_alternative
;
367 // Used to record a set of Jumps out of the generated code, typically
368 // used for jumps out to backtracking code, and a single reentry back
369 // into the code for a node (likely where a backtrack will trigger
374 // This flag is used to null out the second pattern character, when
375 // two are fused to match a pair together.
378 // Currently used in the case of some of the more complex management of
379 // 'm_checked', to cache the offset used in this alternative, to avoid
383 // Used by OpNestedAlternativeNext/End to hold the pointer to the
384 // value that will be pushed into the pattern's frame to return to,
385 // upon backtracking back into the disjunction.
386 DataLabelPtr m_returnAddress
;
390 // This class encapsulates information about the state of code generation
391 // whilst generating the code for backtracking, when a term fails to match.
392 // Upon entry to code generation of the backtracking code for a given node,
393 // the Backtracking state will hold references to all control flow sources
394 // that are outputs in need of further backtracking from the prior node
395 // generated (which is the subsequent operation in the regular expression,
396 // and in the m_ops Vector, since we generated backtracking backwards).
397 // These references to control flow take the form of:
398 // - A jump list of jumps, to be linked to code that will backtrack them
400 // - A set of DataLabelPtr values, to be populated with values to be
401 // treated effectively as return addresses backtracking into complex
403 // - A flag indicating that the current sequence of generated code up to
404 // this point requires backtracking.
405 class BacktrackingState
{
408 : m_pendingFallthrough(false)
412 // Add a jump or jumps, a return address, or set the flag indicating
413 // that the current 'fallthrough' control flow requires backtracking.
414 void append(const Jump
& jump
)
416 m_laterFailures
.append(jump
);
418 void append(JumpList
& jumpList
)
420 m_laterFailures
.append(jumpList
);
422 void append(const DataLabelPtr
& returnAddress
)
424 m_pendingReturns
.append(returnAddress
);
428 ASSERT(!m_pendingFallthrough
);
429 m_pendingFallthrough
= true;
432 // These methods clear the backtracking state, either linking to the
433 // current location, a provided label, or copying the backtracking out
434 // to a JumpList. All actions may require code generation to take place,
435 // and as such are passed a pointer to the assembler.
436 void link(MacroAssembler
* assembler
)
438 if (m_pendingReturns
.size()) {
439 Label
here(assembler
);
440 for (unsigned i
= 0; i
< m_pendingReturns
.size(); ++i
)
441 m_backtrackRecords
.append(ReturnAddressRecord(m_pendingReturns
[i
], here
));
442 m_pendingReturns
.clear();
444 m_laterFailures
.link(assembler
);
445 m_laterFailures
.clear();
446 m_pendingFallthrough
= false;
448 void linkTo(Label label
, MacroAssembler
* assembler
)
450 if (m_pendingReturns
.size()) {
451 for (unsigned i
= 0; i
< m_pendingReturns
.size(); ++i
)
452 m_backtrackRecords
.append(ReturnAddressRecord(m_pendingReturns
[i
], label
));
453 m_pendingReturns
.clear();
455 if (m_pendingFallthrough
)
456 assembler
->jump(label
);
457 m_laterFailures
.linkTo(label
, assembler
);
458 m_laterFailures
.clear();
459 m_pendingFallthrough
= false;
461 void takeBacktracksToJumpList(JumpList
& jumpList
, MacroAssembler
* assembler
)
463 if (m_pendingReturns
.size()) {
464 Label
here(assembler
);
465 for (unsigned i
= 0; i
< m_pendingReturns
.size(); ++i
)
466 m_backtrackRecords
.append(ReturnAddressRecord(m_pendingReturns
[i
], here
));
467 m_pendingReturns
.clear();
468 m_pendingFallthrough
= true;
470 if (m_pendingFallthrough
)
471 jumpList
.append(assembler
->jump());
472 jumpList
.append(m_laterFailures
);
473 m_laterFailures
.clear();
474 m_pendingFallthrough
= false;
479 return m_laterFailures
.empty() && m_pendingReturns
.isEmpty() && !m_pendingFallthrough
;
482 // Called at the end of code generation to link all return addresses.
483 void linkDataLabels(LinkBuffer
& linkBuffer
)
486 for (unsigned i
= 0; i
< m_backtrackRecords
.size(); ++i
)
487 linkBuffer
.patch(m_backtrackRecords
[i
].m_dataLabel
, linkBuffer
.locationOf(m_backtrackRecords
[i
].m_backtrackLocation
));
491 struct ReturnAddressRecord
{
492 ReturnAddressRecord(DataLabelPtr dataLabel
, Label backtrackLocation
)
493 : m_dataLabel(dataLabel
)
494 , m_backtrackLocation(backtrackLocation
)
498 DataLabelPtr m_dataLabel
;
499 Label m_backtrackLocation
;
502 JumpList m_laterFailures
;
503 bool m_pendingFallthrough
;
504 Vector
<DataLabelPtr
, 4> m_pendingReturns
;
505 Vector
<ReturnAddressRecord
, 4> m_backtrackRecords
;
508 // Generation methods:
509 // ===================
511 // This method provides a default implementation of backtracking common
512 // to many terms; terms commonly jump out of the forwards matching path
513 // on any failed conditions, and add these jumps to the m_jumps list. If
514 // no special handling is required we can often just backtrack to m_jumps.
515 void backtrackTermDefault(size_t opIndex
)
517 YarrOp
& op
= m_ops
[opIndex
];
518 m_backtrackingState
.append(op
.m_jumps
);
521 void generateAssertionBOL(size_t opIndex
)
523 YarrOp
& op
= m_ops
[opIndex
];
524 PatternTerm
* term
= op
.m_term
;
526 if (m_pattern
.m_multiline
) {
527 const RegisterID character
= regT0
;
530 if (!term
->inputPosition
)
531 matchDest
.append(branch32(Equal
, index
, Imm32(m_checked
)));
533 readCharacter((term
->inputPosition
- m_checked
) - 1, character
);
534 matchCharacterClass(character
, matchDest
, m_pattern
.newlineCharacterClass());
535 op
.m_jumps
.append(jump());
537 matchDest
.link(this);
539 // Erk, really should poison out these alternatives early. :-/
540 if (term
->inputPosition
)
541 op
.m_jumps
.append(jump());
543 op
.m_jumps
.append(branch32(NotEqual
, index
, Imm32(m_checked
)));
546 void backtrackAssertionBOL(size_t opIndex
)
548 backtrackTermDefault(opIndex
);
551 void generateAssertionEOL(size_t opIndex
)
553 YarrOp
& op
= m_ops
[opIndex
];
554 PatternTerm
* term
= op
.m_term
;
556 if (m_pattern
.m_multiline
) {
557 const RegisterID character
= regT0
;
560 if (term
->inputPosition
== m_checked
)
561 matchDest
.append(atEndOfInput());
563 readCharacter(term
->inputPosition
- m_checked
, character
);
564 matchCharacterClass(character
, matchDest
, m_pattern
.newlineCharacterClass());
565 op
.m_jumps
.append(jump());
567 matchDest
.link(this);
569 if (term
->inputPosition
== m_checked
)
570 op
.m_jumps
.append(notAtEndOfInput());
571 // Erk, really should poison out these alternatives early. :-/
573 op
.m_jumps
.append(jump());
576 void backtrackAssertionEOL(size_t opIndex
)
578 backtrackTermDefault(opIndex
);
581 // Also falls though on nextIsNotWordChar.
582 void matchAssertionWordchar(size_t opIndex
, JumpList
& nextIsWordChar
, JumpList
& nextIsNotWordChar
)
584 YarrOp
& op
= m_ops
[opIndex
];
585 PatternTerm
* term
= op
.m_term
;
587 const RegisterID character
= regT0
;
589 if (term
->inputPosition
== m_checked
)
590 nextIsNotWordChar
.append(atEndOfInput());
592 readCharacter((term
->inputPosition
- m_checked
), character
);
593 matchCharacterClass(character
, nextIsWordChar
, m_pattern
.wordcharCharacterClass());
596 void generateAssertionWordBoundary(size_t opIndex
)
598 YarrOp
& op
= m_ops
[opIndex
];
599 PatternTerm
* term
= op
.m_term
;
601 const RegisterID character
= regT0
;
605 if (!term
->inputPosition
)
606 atBegin
= branch32(Equal
, index
, Imm32(m_checked
));
607 readCharacter((term
->inputPosition
- m_checked
) - 1, character
);
608 matchCharacterClass(character
, matchDest
, m_pattern
.wordcharCharacterClass());
609 if (!term
->inputPosition
)
612 // We fall through to here if the last character was not a wordchar.
613 JumpList nonWordCharThenWordChar
;
614 JumpList nonWordCharThenNonWordChar
;
615 if (term
->invert()) {
616 matchAssertionWordchar(opIndex
, nonWordCharThenNonWordChar
, nonWordCharThenWordChar
);
617 nonWordCharThenWordChar
.append(jump());
619 matchAssertionWordchar(opIndex
, nonWordCharThenWordChar
, nonWordCharThenNonWordChar
);
620 nonWordCharThenNonWordChar
.append(jump());
622 op
.m_jumps
.append(nonWordCharThenNonWordChar
);
624 // We jump here if the last character was a wordchar.
625 matchDest
.link(this);
626 JumpList wordCharThenWordChar
;
627 JumpList wordCharThenNonWordChar
;
628 if (term
->invert()) {
629 matchAssertionWordchar(opIndex
, wordCharThenNonWordChar
, wordCharThenWordChar
);
630 wordCharThenWordChar
.append(jump());
632 matchAssertionWordchar(opIndex
, wordCharThenWordChar
, wordCharThenNonWordChar
);
633 // This can fall-though!
636 op
.m_jumps
.append(wordCharThenWordChar
);
638 nonWordCharThenWordChar
.link(this);
639 wordCharThenNonWordChar
.link(this);
641 void backtrackAssertionWordBoundary(size_t opIndex
)
643 backtrackTermDefault(opIndex
);
646 void generatePatternCharacterOnce(size_t opIndex
)
648 YarrOp
& op
= m_ops
[opIndex
];
650 // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
651 // node, so there must always be at least one more node.
652 ASSERT(opIndex
+ 1 < m_ops
.size());
653 YarrOp
& nextOp
= m_ops
[opIndex
+ 1];
658 PatternTerm
* term
= op
.m_term
;
659 UChar ch
= term
->patternCharacter
;
661 const RegisterID character
= regT0
;
663 if (nextOp
.m_op
== OpTerm
) {
664 PatternTerm
* nextTerm
= nextOp
.m_term
;
665 if (nextTerm
->type
== PatternTerm::TypePatternCharacter
666 && nextTerm
->quantityType
== QuantifierFixedCount
667 && nextTerm
->quantityCount
== 1
668 && nextTerm
->inputPosition
== (term
->inputPosition
+ 1)) {
670 UChar ch2
= nextTerm
->patternCharacter
;
673 int chPair
= ch
| (ch2
<< 16);
675 if (m_pattern
.m_ignoreCase
) {
676 if (isASCIIAlpha(ch
))
678 if (isASCIIAlpha(ch2
))
682 BaseIndex
address(input
, index
, TimesTwo
, (term
->inputPosition
- m_checked
) * sizeof(UChar
));
684 load32WithUnalignedHalfWords(address
, character
);
685 or32(Imm32(mask
), character
);
686 op
.m_jumps
.append(branch32(NotEqual
, character
, Imm32(chPair
| mask
)));
688 op
.m_jumps
.append(branch32WithUnalignedHalfWords(NotEqual
, address
, Imm32(chPair
)));
690 nextOp
.m_isDeadCode
= true;
695 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
696 readCharacter(term
->inputPosition
- m_checked
, character
);
697 or32(TrustedImm32(32), character
);
698 op
.m_jumps
.append(branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))));
700 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
701 op
.m_jumps
.append(jumpIfCharNotEquals(ch
, term
->inputPosition
- m_checked
));
704 void backtrackPatternCharacterOnce(size_t opIndex
)
706 backtrackTermDefault(opIndex
);
709 void generatePatternCharacterFixed(size_t opIndex
)
711 YarrOp
& op
= m_ops
[opIndex
];
712 PatternTerm
* term
= op
.m_term
;
713 UChar ch
= term
->patternCharacter
;
715 const RegisterID character
= regT0
;
716 const RegisterID countRegister
= regT1
;
718 move(index
, countRegister
);
719 sub32(Imm32(term
->quantityCount
.unsafeGet()), countRegister
);
722 BaseIndex
address(input
, countRegister
, TimesTwo
, ((term
->inputPosition
- m_checked
+ Checked
<int>(term
->quantityCount
)) * static_cast<int>(sizeof(UChar
))).unsafeGet());
724 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
725 load16(address
, character
);
726 or32(TrustedImm32(32), character
);
727 op
.m_jumps
.append(branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))));
729 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
730 op
.m_jumps
.append(branch16(NotEqual
, address
, Imm32(ch
)));
732 add32(TrustedImm32(1), countRegister
);
733 branch32(NotEqual
, countRegister
, index
).linkTo(loop
, this);
735 void backtrackPatternCharacterFixed(size_t opIndex
)
737 backtrackTermDefault(opIndex
);
740 void generatePatternCharacterGreedy(size_t opIndex
)
742 YarrOp
& op
= m_ops
[opIndex
];
743 PatternTerm
* term
= op
.m_term
;
744 UChar ch
= term
->patternCharacter
;
746 const RegisterID character
= regT0
;
747 const RegisterID countRegister
= regT1
;
749 move(TrustedImm32(0), countRegister
);
753 failures
.append(atEndOfInput());
754 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
755 readCharacter(term
->inputPosition
- m_checked
, character
);
756 or32(TrustedImm32(32), character
);
757 failures
.append(branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))));
759 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
760 failures
.append(jumpIfCharNotEquals(ch
, term
->inputPosition
- m_checked
));
763 add32(TrustedImm32(1), countRegister
);
764 add32(TrustedImm32(1), index
);
765 if (term
->quantityCount
== quantifyInfinite
)
768 branch32(NotEqual
, countRegister
, Imm32(term
->quantityCount
.unsafeGet())).linkTo(loop
, this);
771 op
.m_reentry
= label();
773 storeToFrame(countRegister
, term
->frameLocation
);
776 void backtrackPatternCharacterGreedy(size_t opIndex
)
778 YarrOp
& op
= m_ops
[opIndex
];
779 PatternTerm
* term
= op
.m_term
;
781 const RegisterID countRegister
= regT1
;
783 m_backtrackingState
.link(this);
785 loadFromFrame(term
->frameLocation
, countRegister
);
786 m_backtrackingState
.append(branchTest32(Zero
, countRegister
));
787 sub32(TrustedImm32(1), countRegister
);
788 sub32(TrustedImm32(1), index
);
792 void generatePatternCharacterNonGreedy(size_t opIndex
)
794 YarrOp
& op
= m_ops
[opIndex
];
795 PatternTerm
* term
= op
.m_term
;
797 const RegisterID countRegister
= regT1
;
799 move(TrustedImm32(0), countRegister
);
800 op
.m_reentry
= label();
801 storeToFrame(countRegister
, term
->frameLocation
);
803 void backtrackPatternCharacterNonGreedy(size_t opIndex
)
805 YarrOp
& op
= m_ops
[opIndex
];
806 PatternTerm
* term
= op
.m_term
;
807 UChar ch
= term
->patternCharacter
;
809 const RegisterID character
= regT0
;
810 const RegisterID countRegister
= regT1
;
812 JumpList nonGreedyFailures
;
814 m_backtrackingState
.link(this);
816 loadFromFrame(term
->frameLocation
, countRegister
);
818 nonGreedyFailures
.append(atEndOfInput());
819 if (term
->quantityCount
!= quantifyInfinite
)
820 nonGreedyFailures
.append(branch32(Equal
, countRegister
, Imm32(term
->quantityCount
.unsafeGet())));
821 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
822 readCharacter(term
->inputPosition
- m_checked
, character
);
823 or32(TrustedImm32(32), character
);
824 nonGreedyFailures
.append(branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))));
826 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
827 nonGreedyFailures
.append(jumpIfCharNotEquals(ch
, term
->inputPosition
- m_checked
));
830 add32(TrustedImm32(1), countRegister
);
831 add32(TrustedImm32(1), index
);
835 nonGreedyFailures
.link(this);
836 sub32(countRegister
, index
);
837 m_backtrackingState
.fallthrough();
840 void generateCharacterClassOnce(size_t opIndex
)
842 YarrOp
& op
= m_ops
[opIndex
];
843 PatternTerm
* term
= op
.m_term
;
845 const RegisterID character
= regT0
;
848 readCharacter(term
->inputPosition
- m_checked
, character
);
849 matchCharacterClass(character
, matchDest
, term
->characterClass
);
852 op
.m_jumps
.append(matchDest
);
854 op
.m_jumps
.append(jump());
855 matchDest
.link(this);
858 void backtrackCharacterClassOnce(size_t opIndex
)
860 backtrackTermDefault(opIndex
);
863 void generateCharacterClassFixed(size_t opIndex
)
865 YarrOp
& op
= m_ops
[opIndex
];
866 PatternTerm
* term
= op
.m_term
;
868 const RegisterID character
= regT0
;
869 const RegisterID countRegister
= regT1
;
871 move(index
, countRegister
);
872 sub32(Imm32(term
->quantityCount
.unsafeGet()), countRegister
);
876 load16(BaseIndex(input
, countRegister
, TimesTwo
, ((term
->inputPosition
- m_checked
+ Checked
<int>(term
->quantityCount
)) * static_cast<int>(sizeof(UChar
))).unsafeGet()), character
);
877 matchCharacterClass(character
, matchDest
, term
->characterClass
);
880 op
.m_jumps
.append(matchDest
);
882 op
.m_jumps
.append(jump());
883 matchDest
.link(this);
886 add32(TrustedImm32(1), countRegister
);
887 branch32(NotEqual
, countRegister
, index
).linkTo(loop
, this);
889 void backtrackCharacterClassFixed(size_t opIndex
)
891 backtrackTermDefault(opIndex
);
894 void generateCharacterClassGreedy(size_t opIndex
)
896 YarrOp
& op
= m_ops
[opIndex
];
897 PatternTerm
* term
= op
.m_term
;
899 const RegisterID character
= regT0
;
900 const RegisterID countRegister
= regT1
;
902 move(TrustedImm32(0), countRegister
);
906 failures
.append(atEndOfInput());
908 if (term
->invert()) {
909 readCharacter(term
->inputPosition
- m_checked
, character
);
910 matchCharacterClass(character
, failures
, term
->characterClass
);
913 readCharacter(term
->inputPosition
- m_checked
, character
);
914 matchCharacterClass(character
, matchDest
, term
->characterClass
);
915 failures
.append(jump());
916 matchDest
.link(this);
919 add32(TrustedImm32(1), countRegister
);
920 add32(TrustedImm32(1), index
);
921 if (term
->quantityCount
!= quantifyInfinite
) {
922 branch32(NotEqual
, countRegister
, Imm32(term
->quantityCount
.unsafeGet())).linkTo(loop
, this);
923 failures
.append(jump());
928 op
.m_reentry
= label();
930 storeToFrame(countRegister
, term
->frameLocation
);
932 void backtrackCharacterClassGreedy(size_t opIndex
)
934 YarrOp
& op
= m_ops
[opIndex
];
935 PatternTerm
* term
= op
.m_term
;
937 const RegisterID countRegister
= regT1
;
939 m_backtrackingState
.link(this);
941 loadFromFrame(term
->frameLocation
, countRegister
);
942 m_backtrackingState
.append(branchTest32(Zero
, countRegister
));
943 sub32(TrustedImm32(1), countRegister
);
944 sub32(TrustedImm32(1), index
);
948 void generateCharacterClassNonGreedy(size_t opIndex
)
950 YarrOp
& op
= m_ops
[opIndex
];
951 PatternTerm
* term
= op
.m_term
;
953 const RegisterID countRegister
= regT1
;
955 move(TrustedImm32(0), countRegister
);
956 op
.m_reentry
= label();
957 storeToFrame(countRegister
, term
->frameLocation
);
959 void backtrackCharacterClassNonGreedy(size_t opIndex
)
961 YarrOp
& op
= m_ops
[opIndex
];
962 PatternTerm
* term
= op
.m_term
;
964 const RegisterID character
= regT0
;
965 const RegisterID countRegister
= regT1
;
967 JumpList nonGreedyFailures
;
969 m_backtrackingState
.link(this);
971 Label
backtrackBegin(this);
972 loadFromFrame(term
->frameLocation
, countRegister
);
974 nonGreedyFailures
.append(atEndOfInput());
975 nonGreedyFailures
.append(branch32(Equal
, countRegister
, Imm32(term
->quantityCount
.unsafeGet())));
978 readCharacter(term
->inputPosition
- m_checked
, character
);
979 matchCharacterClass(character
, matchDest
, term
->characterClass
);
982 nonGreedyFailures
.append(matchDest
);
984 nonGreedyFailures
.append(jump());
985 matchDest
.link(this);
988 add32(TrustedImm32(1), countRegister
);
989 add32(TrustedImm32(1), index
);
993 nonGreedyFailures
.link(this);
994 sub32(countRegister
, index
);
995 m_backtrackingState
.fallthrough();
998 void generateDotStarEnclosure(size_t opIndex
)
1000 YarrOp
& op
= m_ops
[opIndex
];
1001 PatternTerm
* term
= op
.m_term
;
1003 const RegisterID character
= regT0
;
1004 const RegisterID matchPos
= regT1
;
1006 JumpList foundBeginningNewLine
;
1007 JumpList saveStartIndex
;
1008 JumpList foundEndingNewLine
;
1010 if (m_pattern
.m_body
->m_hasFixedSize
) {
1011 move(index
, matchPos
);
1012 sub32(Imm32(m_checked
), matchPos
);
1014 load32(Address(output
), matchPos
);
1016 saveStartIndex
.append(branchTest32(Zero
, matchPos
));
1017 Label
findBOLLoop(this);
1018 sub32(TrustedImm32(1), matchPos
);
1019 load16(BaseIndex(input
, matchPos
, TimesTwo
, 0), character
);
1020 matchCharacterClass(character
, foundBeginningNewLine
, m_pattern
.newlineCharacterClass());
1021 branchTest32(NonZero
, matchPos
).linkTo(findBOLLoop
, this);
1022 saveStartIndex
.append(jump());
1024 foundBeginningNewLine
.link(this);
1025 add32(TrustedImm32(1), matchPos
); // Advance past newline
1026 saveStartIndex
.link(this);
1028 if (!m_pattern
.m_multiline
&& term
->anchors
.bolAnchor
)
1029 op
.m_jumps
.append(branchTest32(NonZero
, matchPos
));
1031 store32(matchPos
, Address(output
));
1033 move(index
, matchPos
);
1035 Label
findEOLLoop(this);
1036 foundEndingNewLine
.append(branch32(Equal
, matchPos
, length
));
1037 load16(BaseIndex(input
, matchPos
, TimesTwo
, 0), character
);
1038 matchCharacterClass(character
, foundEndingNewLine
, m_pattern
.newlineCharacterClass());
1039 add32(TrustedImm32(1), matchPos
);
1042 foundEndingNewLine
.link(this);
1044 if (!m_pattern
.m_multiline
&& term
->anchors
.eolAnchor
)
1045 op
.m_jumps
.append(branch32(NotEqual
, matchPos
, length
));
1047 move(matchPos
, index
);
1050 void backtrackDotStarEnclosure(size_t opIndex
)
1052 backtrackTermDefault(opIndex
);
1055 // Code generation/backtracking for simple terms
1056 // (pattern characters, character classes, and assertions).
1057 // These methods farm out work to the set of functions above.
1058 void generateTerm(size_t opIndex
)
1060 YarrOp
& op
= m_ops
[opIndex
];
1061 PatternTerm
* term
= op
.m_term
;
1063 switch (term
->type
) {
1064 case PatternTerm::TypePatternCharacter
:
1065 switch (term
->quantityType
) {
1066 case QuantifierFixedCount
:
1067 if (term
->quantityCount
== 1)
1068 generatePatternCharacterOnce(opIndex
);
1070 generatePatternCharacterFixed(opIndex
);
1072 case QuantifierGreedy
:
1073 generatePatternCharacterGreedy(opIndex
);
1075 case QuantifierNonGreedy
:
1076 generatePatternCharacterNonGreedy(opIndex
);
1081 case PatternTerm::TypeCharacterClass
:
1082 switch (term
->quantityType
) {
1083 case QuantifierFixedCount
:
1084 if (term
->quantityCount
== 1)
1085 generateCharacterClassOnce(opIndex
);
1087 generateCharacterClassFixed(opIndex
);
1089 case QuantifierGreedy
:
1090 generateCharacterClassGreedy(opIndex
);
1092 case QuantifierNonGreedy
:
1093 generateCharacterClassNonGreedy(opIndex
);
1098 case PatternTerm::TypeAssertionBOL
:
1099 generateAssertionBOL(opIndex
);
1102 case PatternTerm::TypeAssertionEOL
:
1103 generateAssertionEOL(opIndex
);
1106 case PatternTerm::TypeAssertionWordBoundary
:
1107 generateAssertionWordBoundary(opIndex
);
1110 case PatternTerm::TypeForwardReference
:
1113 case PatternTerm::TypeParenthesesSubpattern
:
1114 case PatternTerm::TypeParentheticalAssertion
:
1115 ASSERT_NOT_REACHED();
1116 case PatternTerm::TypeBackReference
:
1117 m_shouldFallBack
= true;
1119 case PatternTerm::TypeDotStarEnclosure
:
1120 generateDotStarEnclosure(opIndex
);
1124 void backtrackTerm(size_t opIndex
)
1126 YarrOp
& op
= m_ops
[opIndex
];
1127 PatternTerm
* term
= op
.m_term
;
1129 switch (term
->type
) {
1130 case PatternTerm::TypePatternCharacter
:
1131 switch (term
->quantityType
) {
1132 case QuantifierFixedCount
:
1133 if (term
->quantityCount
== 1)
1134 backtrackPatternCharacterOnce(opIndex
);
1136 backtrackPatternCharacterFixed(opIndex
);
1138 case QuantifierGreedy
:
1139 backtrackPatternCharacterGreedy(opIndex
);
1141 case QuantifierNonGreedy
:
1142 backtrackPatternCharacterNonGreedy(opIndex
);
1147 case PatternTerm::TypeCharacterClass
:
1148 switch (term
->quantityType
) {
1149 case QuantifierFixedCount
:
1150 if (term
->quantityCount
== 1)
1151 backtrackCharacterClassOnce(opIndex
);
1153 backtrackCharacterClassFixed(opIndex
);
1155 case QuantifierGreedy
:
1156 backtrackCharacterClassGreedy(opIndex
);
1158 case QuantifierNonGreedy
:
1159 backtrackCharacterClassNonGreedy(opIndex
);
1164 case PatternTerm::TypeAssertionBOL
:
1165 backtrackAssertionBOL(opIndex
);
1168 case PatternTerm::TypeAssertionEOL
:
1169 backtrackAssertionEOL(opIndex
);
1172 case PatternTerm::TypeAssertionWordBoundary
:
1173 backtrackAssertionWordBoundary(opIndex
);
1176 case PatternTerm::TypeForwardReference
:
1179 case PatternTerm::TypeParenthesesSubpattern
:
1180 case PatternTerm::TypeParentheticalAssertion
:
1181 ASSERT_NOT_REACHED();
1183 case PatternTerm::TypeDotStarEnclosure
:
1184 backtrackDotStarEnclosure(opIndex
);
1187 case PatternTerm::TypeBackReference
:
1188 m_shouldFallBack
= true;
1195 // Forwards generate the matching code.
1196 ASSERT(m_ops
.size());
1200 YarrOp
& op
= m_ops
[opIndex
];
1204 generateTerm(opIndex
);
1207 // OpBodyAlternativeBegin/Next/End
1209 // These nodes wrap the set of alternatives in the body of the regular expression.
1210 // There may be either one or two chains of OpBodyAlternative nodes, one representing
1211 // the 'once through' sequence of alternatives (if any exist), and one representing
1212 // the repeating alternatives (again, if any exist).
1214 // Upon normal entry to the Begin alternative, we will check that input is available.
1215 // Reentry to the Begin alternative will take place after the check has taken place,
1216 // and will assume that the input position has already been progressed as appropriate.
1218 // Entry to subsequent Next/End alternatives occurs when the prior alternative has
1219 // successfully completed a match - return a success state from JIT code.
1221 // Next alternatives allow for reentry optimized to suit backtracking from its
1222 // preceding alternative. It expects the input position to still be set to a position
1223 // appropriate to its predecessor, and it will only perform an input check if the
1224 // predecessor had a minimum size less than its own.
1226 // In the case 'once through' expressions, the End node will also have a reentry
1227 // point to jump to when the last alternative fails. Again, this expects the input
1228 // position to still reflect that expected by the prior alternative.
1229 case OpBodyAlternativeBegin
: {
1230 PatternAlternative
* alternative
= op
.m_alternative
;
1232 // Upon entry at the head of the set of alternatives, check if input is available
1233 // to run the first alternative. (This progresses the input position).
1234 op
.m_jumps
.append(jumpIfNoAvailableInput(alternative
->m_minimumSize
));
1235 // We will reenter after the check, and assume the input position to have been
1236 // set as appropriate to this alternative.
1237 op
.m_reentry
= label();
1239 m_checked
+= alternative
->m_minimumSize
;
1242 case OpBodyAlternativeNext
:
1243 case OpBodyAlternativeEnd
: {
1244 PatternAlternative
* priorAlternative
= m_ops
[op
.m_previousOp
].m_alternative
;
1245 PatternAlternative
* alternative
= op
.m_alternative
;
1247 // If we get here, the prior alternative matched - return success.
1249 // Adjust the stack pointer to remove the pattern's frame.
1250 if (m_pattern
.m_body
->m_callFrameSize
)
1251 addPtr(Imm32(m_pattern
.m_body
->m_callFrameSize
* sizeof(void*)), stackPointerRegister
);
1253 // Load appropriate values into the return register and the first output
1254 // slot, and return. In the case of pattern with a fixed size, we will
1255 // not have yet set the value in the first
1256 ASSERT(index
!= returnRegister
);
1257 if (m_pattern
.m_body
->m_hasFixedSize
) {
1258 move(index
, returnRegister
);
1259 if (priorAlternative
->m_minimumSize
)
1260 sub32(Imm32(priorAlternative
->m_minimumSize
), returnRegister
);
1261 store32(returnRegister
, output
);
1263 load32(Address(output
), returnRegister
);
1264 store32(index
, Address(output
, 4));
1267 // This is the divide between the tail of the prior alternative, above, and
1268 // the head of the subsequent alternative, below.
1270 if (op
.m_op
== OpBodyAlternativeNext
) {
1271 // This is the reentry point for the Next alternative. We expect any code
1272 // that jumps here to do so with the input position matching that of the
1273 // PRIOR alteranative, and we will only check input availability if we
1274 // need to progress it forwards.
1275 op
.m_reentry
= label();
1276 if (alternative
->m_minimumSize
> priorAlternative
->m_minimumSize
) {
1277 add32(Imm32(alternative
->m_minimumSize
- priorAlternative
->m_minimumSize
), index
);
1278 op
.m_jumps
.append(jumpIfNoAvailableInput());
1279 } else if (priorAlternative
->m_minimumSize
> alternative
->m_minimumSize
)
1280 sub32(Imm32(priorAlternative
->m_minimumSize
- alternative
->m_minimumSize
), index
);
1281 } else if (op
.m_nextOp
== notFound
) {
1282 // This is the reentry point for the End of 'once through' alternatives,
1283 // jumped to when the las alternative fails to match.
1284 op
.m_reentry
= label();
1285 sub32(Imm32(priorAlternative
->m_minimumSize
), index
);
1288 if (op
.m_op
== OpBodyAlternativeNext
)
1289 m_checked
+= alternative
->m_minimumSize
;
1290 m_checked
-= priorAlternative
->m_minimumSize
;
1294 // OpSimpleNestedAlternativeBegin/Next/End
1295 // OpNestedAlternativeBegin/Next/End
1297 // These nodes are used to handle sets of alternatives that are nested within
1298 // subpatterns and parenthetical assertions. The 'simple' forms are used where
1299 // we do not need to be able to backtrack back into any alternative other than
1300 // the last, the normal forms allow backtracking into any alternative.
1302 // Each Begin/Next node is responsible for planting an input check to ensure
1303 // sufficient input is available on entry. Next nodes additionally need to
1304 // jump to the end - Next nodes use the End node's m_jumps list to hold this
1307 // In the non-simple forms, successful alternative matches must store a
1308 // 'return address' using a DataLabelPtr, used to store the address to jump
1309 // to when backtracking, to get to the code for the appropriate alternative.
1310 case OpSimpleNestedAlternativeBegin
:
1311 case OpNestedAlternativeBegin
: {
1312 PatternTerm
* term
= op
.m_term
;
1313 PatternAlternative
* alternative
= op
.m_alternative
;
1314 PatternDisjunction
* disjunction
= term
->parentheses
.disjunction
;
1316 // Calculate how much input we need to check for, and if non-zero check.
1317 op
.m_checkAdjust
= alternative
->m_minimumSize
;
1318 if ((term
->quantityType
== QuantifierFixedCount
) && (term
->type
!= PatternTerm::TypeParentheticalAssertion
))
1319 op
.m_checkAdjust
-= disjunction
->m_minimumSize
;
1320 if (op
.m_checkAdjust
)
1321 op
.m_jumps
.append(jumpIfNoAvailableInput(op
.m_checkAdjust
));
1323 m_checked
+= op
.m_checkAdjust
;
1326 case OpSimpleNestedAlternativeNext
:
1327 case OpNestedAlternativeNext
: {
1328 PatternTerm
* term
= op
.m_term
;
1329 PatternAlternative
* alternative
= op
.m_alternative
;
1330 PatternDisjunction
* disjunction
= term
->parentheses
.disjunction
;
1332 // In the non-simple case, store a 'return address' so we can backtrack correctly.
1333 if (op
.m_op
== OpNestedAlternativeNext
) {
1334 unsigned parenthesesFrameLocation
= term
->frameLocation
;
1335 unsigned alternativeFrameLocation
= parenthesesFrameLocation
;
1336 if (term
->quantityType
!= QuantifierFixedCount
)
1337 alternativeFrameLocation
+= YarrStackSpaceForBackTrackInfoParenthesesOnce
;
1338 op
.m_returnAddress
= storeToFrameWithPatch(alternativeFrameLocation
);
1341 // If we reach here then the last alternative has matched - jump to the
1342 // End node, to skip over any further alternatives.
1344 // FIXME: this is logically O(N^2) (though N can be expected to be very
1345 // small). We could avoid this either by adding an extra jump to the JIT
1346 // data structures, or by making backtracking code that jumps to Next
1347 // alternatives are responsible for checking that input is available (if
1348 // we didn't need to plant the input checks, then m_jumps would be free).
1349 YarrOp
* endOp
= &m_ops
[op
.m_nextOp
];
1350 while (endOp
->m_nextOp
!= notFound
) {
1351 ASSERT(endOp
->m_op
== OpSimpleNestedAlternativeNext
|| endOp
->m_op
== OpNestedAlternativeNext
);
1352 endOp
= &m_ops
[endOp
->m_nextOp
];
1354 ASSERT(endOp
->m_op
== OpSimpleNestedAlternativeEnd
|| endOp
->m_op
== OpNestedAlternativeEnd
);
1355 endOp
->m_jumps
.append(jump());
1357 // This is the entry point for the next alternative.
1358 op
.m_reentry
= label();
1360 // Calculate how much input we need to check for, and if non-zero check.
1361 op
.m_checkAdjust
= alternative
->m_minimumSize
;
1362 if ((term
->quantityType
== QuantifierFixedCount
) && (term
->type
!= PatternTerm::TypeParentheticalAssertion
))
1363 op
.m_checkAdjust
-= disjunction
->m_minimumSize
;
1364 if (op
.m_checkAdjust
)
1365 op
.m_jumps
.append(jumpIfNoAvailableInput(op
.m_checkAdjust
));
1367 YarrOp
& lastOp
= m_ops
[op
.m_previousOp
];
1368 m_checked
-= lastOp
.m_checkAdjust
;
1369 m_checked
+= op
.m_checkAdjust
;
1372 case OpSimpleNestedAlternativeEnd
:
1373 case OpNestedAlternativeEnd
: {
1374 PatternTerm
* term
= op
.m_term
;
1376 // In the non-simple case, store a 'return address' so we can backtrack correctly.
1377 if (op
.m_op
== OpNestedAlternativeEnd
) {
1378 unsigned parenthesesFrameLocation
= term
->frameLocation
;
1379 unsigned alternativeFrameLocation
= parenthesesFrameLocation
;
1380 if (term
->quantityType
!= QuantifierFixedCount
)
1381 alternativeFrameLocation
+= YarrStackSpaceForBackTrackInfoParenthesesOnce
;
1382 op
.m_returnAddress
= storeToFrameWithPatch(alternativeFrameLocation
);
1385 // If this set of alternatives contains more than one alternative,
1386 // then the Next nodes will have planted jumps to the End, and added
1387 // them to this node's m_jumps list.
1388 op
.m_jumps
.link(this);
1391 YarrOp
& lastOp
= m_ops
[op
.m_previousOp
];
1392 m_checked
-= lastOp
.m_checkAdjust
;
1396 // OpParenthesesSubpatternOnceBegin/End
1398 // These nodes support (optionally) capturing subpatterns, that have a
1399 // quantity count of 1 (this covers fixed once, and ?/?? quantifiers).
1400 case OpParenthesesSubpatternOnceBegin
: {
1401 PatternTerm
* term
= op
.m_term
;
1402 unsigned parenthesesFrameLocation
= term
->frameLocation
;
1403 const RegisterID indexTemporary
= regT0
;
1404 ASSERT(term
->quantityCount
== 1);
1406 // Upon entry to a Greedy quantified set of parenthese store the index.
1407 // We'll use this for two purposes:
1408 // - To indicate which iteration we are on of mathing the remainder of
1409 // the expression after the parentheses - the first, including the
1410 // match within the parentheses, or the second having skipped over them.
1411 // - To check for empty matches, which must be rejected.
1413 // At the head of a NonGreedy set of parentheses we'll immediately set the
1414 // value on the stack to -1 (indicating a match skipping the subpattern),
1415 // and plant a jump to the end. We'll also plant a label to backtrack to
1416 // to reenter the subpattern later, with a store to set up index on the
1417 // second iteration.
1419 // FIXME: for capturing parens, could use the index in the capture array?
1420 if (term
->quantityType
== QuantifierGreedy
)
1421 storeToFrame(index
, parenthesesFrameLocation
);
1422 else if (term
->quantityType
== QuantifierNonGreedy
) {
1423 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation
);
1424 op
.m_jumps
.append(jump());
1425 op
.m_reentry
= label();
1426 storeToFrame(index
, parenthesesFrameLocation
);
1429 // If the parenthese are capturing, store the starting index value to the
1430 // captures array, offsetting as necessary.
1432 // FIXME: could avoid offsetting this value in JIT code, apply
1433 // offsets only afterwards, at the point the results array is
1435 if (term
->capture()) {
1436 int offsetId
= term
->parentheses
.subpatternId
<< 1;
1437 int inputOffset
= term
->inputPosition
- m_checked
;
1438 if (term
->quantityType
== QuantifierFixedCount
)
1439 inputOffset
-= term
->parentheses
.disjunction
->m_minimumSize
;
1441 move(index
, indexTemporary
);
1442 add32(Imm32(inputOffset
), indexTemporary
);
1443 store32(indexTemporary
, Address(output
, offsetId
* sizeof(int)));
1445 store32(index
, Address(output
, offsetId
* sizeof(int)));
1449 case OpParenthesesSubpatternOnceEnd
: {
1450 PatternTerm
* term
= op
.m_term
;
1451 unsigned parenthesesFrameLocation
= term
->frameLocation
;
1452 const RegisterID indexTemporary
= regT0
;
1453 ASSERT(term
->quantityCount
== 1);
1455 // For Greedy/NonGreedy quantified parentheses, we must reject zero length
1456 // matches. If the minimum size is know to be non-zero we need not check.
1457 if (term
->quantityType
!= QuantifierFixedCount
&& !term
->parentheses
.disjunction
->m_minimumSize
)
1458 op
.m_jumps
.append(branch32(Equal
, index
, Address(stackPointerRegister
, parenthesesFrameLocation
* sizeof(void*))));
1460 // If the parenthese are capturing, store the ending index value to the
1461 // captures array, offsetting as necessary.
1463 // FIXME: could avoid offsetting this value in JIT code, apply
1464 // offsets only afterwards, at the point the results array is
1466 if (term
->capture()) {
1467 int offsetId
= (term
->parentheses
.subpatternId
<< 1) + 1;
1468 int inputOffset
= term
->inputPosition
- m_checked
;
1470 move(index
, indexTemporary
);
1471 add32(Imm32(inputOffset
), indexTemporary
);
1472 store32(indexTemporary
, Address(output
, offsetId
* sizeof(int)));
1474 store32(index
, Address(output
, offsetId
* sizeof(int)));
1477 // If the parentheses are quantified Greedy then add a label to jump back
1478 // to if get a failed match from after the parentheses. For NonGreedy
1479 // parentheses, link the jump from before the subpattern to here.
1480 if (term
->quantityType
== QuantifierGreedy
)
1481 op
.m_reentry
= label();
1482 else if (term
->quantityType
== QuantifierNonGreedy
) {
1483 YarrOp
& beginOp
= m_ops
[op
.m_previousOp
];
1484 beginOp
.m_jumps
.link(this);
1489 // OpParenthesesSubpatternTerminalBegin/End
1490 case OpParenthesesSubpatternTerminalBegin
: {
1491 PatternTerm
* term
= op
.m_term
;
1492 ASSERT(term
->quantityType
== QuantifierGreedy
);
1493 ASSERT(term
->quantityCount
== quantifyInfinite
);
1494 ASSERT(!term
->capture());
1496 // Upon entry set a label to loop back to.
1497 op
.m_reentry
= label();
1499 // Store the start index of the current match; we need to reject zero
1501 storeToFrame(index
, term
->frameLocation
);
1504 case OpParenthesesSubpatternTerminalEnd
: {
1505 PatternTerm
* term
= op
.m_term
;
1507 // Check for zero length matches - if the match is non-zero, then we
1508 // can accept it & loop back up to the head of the subpattern.
1509 YarrOp
& beginOp
= m_ops
[op
.m_previousOp
];
1510 branch32(NotEqual
, index
, Address(stackPointerRegister
, term
->frameLocation
* sizeof(void*)), beginOp
.m_reentry
);
1512 // Reject the match - backtrack back into the subpattern.
1513 op
.m_jumps
.append(jump());
1515 // This is the entry point to jump to when we stop matching - we will
1516 // do so once the subpattern cannot match any more.
1517 op
.m_reentry
= label();
1521 // OpParentheticalAssertionBegin/End
1522 case OpParentheticalAssertionBegin
: {
1523 PatternTerm
* term
= op
.m_term
;
1525 // Store the current index - assertions should not update index, so
1526 // we will need to restore it upon a successful match.
1527 unsigned parenthesesFrameLocation
= term
->frameLocation
;
1528 storeToFrame(index
, parenthesesFrameLocation
);
1531 op
.m_checkAdjust
= m_checked
- term
->inputPosition
;
1532 if (op
.m_checkAdjust
)
1533 sub32(Imm32(op
.m_checkAdjust
), index
);
1535 m_checked
-= op
.m_checkAdjust
;
1538 case OpParentheticalAssertionEnd
: {
1539 PatternTerm
* term
= op
.m_term
;
1541 // Restore the input index value.
1542 unsigned parenthesesFrameLocation
= term
->frameLocation
;
1543 loadFromFrame(parenthesesFrameLocation
, index
);
1545 // If inverted, a successful match of the assertion must be treated
1546 // as a failure, so jump to backtracking.
1547 if (term
->invert()) {
1548 op
.m_jumps
.append(jump());
1549 op
.m_reentry
= label();
1552 YarrOp
& lastOp
= m_ops
[op
.m_previousOp
];
1553 m_checked
+= lastOp
.m_checkAdjust
;
1558 if (m_pattern
.m_body
->m_callFrameSize
)
1559 addPtr(Imm32(m_pattern
.m_body
->m_callFrameSize
* sizeof(void*)), stackPointerRegister
);
1560 move(TrustedImm32(-1), returnRegister
);
1566 } while (opIndex
< m_ops
.size());
1571 // Backwards generate the backtracking code.
1572 size_t opIndex
= m_ops
.size();
1577 YarrOp
& op
= m_ops
[opIndex
];
1581 backtrackTerm(opIndex
);
1584 // OpBodyAlternativeBegin/Next/End
1586 // For each Begin/Next node representing an alternative, we need to decide what to do
1587 // in two circumstances:
1588 // - If we backtrack back into this node, from within the alternative.
1589 // - If the input check at the head of the alternative fails (if this exists).
1591 // We treat these two cases differently since in the former case we have slightly
1592 // more information - since we are backtracking out of a prior alternative we know
1593 // that at least enough input was available to run it. For example, given the regular
1594 // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
1595 // character match of 'a'), then we need not perform an additional input availability
1596 // check before running the second alternative.
1598 // Backtracking required differs for the last alternative, which in the case of the
1599 // repeating set of alternatives must loop. The code generated for the last alternative
1600 // will also be used to handle all input check failures from any prior alternatives -
1601 // these require similar functionality, in seeking the next available alternative for
1602 // which there is sufficient input.
1604 // Since backtracking of all other alternatives simply requires us to link backtracks
1605 // to the reentry point for the subsequent alternative, we will only be generating any
1606 // code when backtracking the last alternative.
1607 case OpBodyAlternativeBegin
:
1608 case OpBodyAlternativeNext
: {
1609 PatternAlternative
* alternative
= op
.m_alternative
;
1611 if (op
.m_op
== OpBodyAlternativeNext
) {
1612 PatternAlternative
* priorAlternative
= m_ops
[op
.m_previousOp
].m_alternative
;
1613 m_checked
+= priorAlternative
->m_minimumSize
;
1615 m_checked
-= alternative
->m_minimumSize
;
1617 // Is this the last alternative? If not, then if we backtrack to this point we just
1618 // need to jump to try to match the next alternative.
1619 if (m_ops
[op
.m_nextOp
].m_op
!= OpBodyAlternativeEnd
) {
1620 m_backtrackingState
.linkTo(m_ops
[op
.m_nextOp
].m_reentry
, this);
1623 YarrOp
& endOp
= m_ops
[op
.m_nextOp
];
1625 YarrOp
* beginOp
= &op
;
1626 while (beginOp
->m_op
!= OpBodyAlternativeBegin
) {
1627 ASSERT(beginOp
->m_op
== OpBodyAlternativeNext
);
1628 beginOp
= &m_ops
[beginOp
->m_previousOp
];
1631 bool onceThrough
= endOp
.m_nextOp
== notFound
;
1633 // First, generate code to handle cases where we backtrack out of an attempted match
1634 // of the last alternative. If this is a 'once through' set of alternatives then we
1635 // have nothing to do - link this straight through to the End.
1637 m_backtrackingState
.linkTo(endOp
.m_reentry
, this);
1639 // If we don't need to move the input poistion, and the pattern has a fixed size
1640 // (in which case we omit the store of the start index until the pattern has matched)
1641 // then we can just link the backtrack out of the last alternative straight to the
1642 // head of the first alternative.
1643 if (m_pattern
.m_body
->m_hasFixedSize
1644 && (alternative
->m_minimumSize
> beginOp
->m_alternative
->m_minimumSize
)
1645 && (alternative
->m_minimumSize
- beginOp
->m_alternative
->m_minimumSize
== 1))
1646 m_backtrackingState
.linkTo(beginOp
->m_reentry
, this);
1648 // We need to generate a trampoline of code to execute before looping back
1649 // around to the first alternative.
1650 m_backtrackingState
.link(this);
1652 // If the pattern size is not fixed, then store the start index, for use if we match.
1653 if (!m_pattern
.m_body
->m_hasFixedSize
) {
1654 if (alternative
->m_minimumSize
== 1)
1655 store32(index
, Address(output
));
1658 if (alternative
->m_minimumSize
)
1659 sub32(Imm32(alternative
->m_minimumSize
- 1), regT0
);
1661 add32(Imm32(1), regT0
);
1662 store32(regT0
, Address(output
));
1666 // Generate code to loop. Check whether the last alternative is longer than the
1667 // first (e.g. /a|xy/ or /a|xyz/).
1668 if (alternative
->m_minimumSize
> beginOp
->m_alternative
->m_minimumSize
) {
1669 // We want to loop, and increment input position. If the delta is 1, it is
1670 // already correctly incremented, if more than one then decrement as appropriate.
1671 unsigned delta
= alternative
->m_minimumSize
- beginOp
->m_alternative
->m_minimumSize
;
1674 sub32(Imm32(delta
- 1), index
);
1675 jump(beginOp
->m_reentry
);
1677 // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
1678 // be sufficent input available to handle this, so just fall through.
1679 unsigned delta
= beginOp
->m_alternative
->m_minimumSize
- alternative
->m_minimumSize
;
1680 if (delta
!= 0xFFFFFFFFu
) {
1681 // We need to check input because we are incrementing the input.
1682 add32(Imm32(delta
+ 1), index
);
1683 checkInput().linkTo(beginOp
->m_reentry
, this);
1689 // We can reach this point in the code in two ways:
1690 // - Fallthrough from the code above (a repeating alternative backtracked out of its
1691 // last alternative, and did not have sufficent input to run the first).
1692 // - We will loop back up to the following label when a releating alternative loops,
1693 // following a failed input check.
1695 // Either way, we have just failed the input check for the first alternative.
1696 Label
firstInputCheckFailed(this);
1698 // Generate code to handle input check failures from alternatives except the last.
1699 // prevOp is the alternative we're handling a bail out from (initially Begin), and
1700 // nextOp is the alternative we will be attempting to reenter into.
1702 // We will link input check failures from the forwards matching path back to the code
1703 // that can handle them.
1704 YarrOp
* prevOp
= beginOp
;
1705 YarrOp
* nextOp
= &m_ops
[beginOp
->m_nextOp
];
1706 while (nextOp
->m_op
!= OpBodyAlternativeEnd
) {
1707 prevOp
->m_jumps
.link(this);
1709 // We only get here if an input check fails, it is only worth checking again
1710 // if the next alternative has a minimum size less than the last.
1711 if (prevOp
->m_alternative
->m_minimumSize
> nextOp
->m_alternative
->m_minimumSize
) {
1712 // FIXME: if we added an extra label to YarrOp, we could avoid needing to
1713 // subtract delta back out, and reduce this code. Should performance test
1714 // the benefit of this.
1715 unsigned delta
= prevOp
->m_alternative
->m_minimumSize
- nextOp
->m_alternative
->m_minimumSize
;
1716 sub32(Imm32(delta
), index
);
1717 Jump fail
= jumpIfNoAvailableInput();
1718 add32(Imm32(delta
), index
);
1719 jump(nextOp
->m_reentry
);
1721 } else if (prevOp
->m_alternative
->m_minimumSize
< nextOp
->m_alternative
->m_minimumSize
)
1722 add32(Imm32(nextOp
->m_alternative
->m_minimumSize
- prevOp
->m_alternative
->m_minimumSize
), index
);
1724 nextOp
= &m_ops
[nextOp
->m_nextOp
];
1727 // We fall through to here if there is insufficient input to run the last alternative.
1729 // If there is insufficient input to run the last alternative, then for 'once through'
1730 // alternatives we are done - just jump back up into the forwards matching path at the End.
1732 op
.m_jumps
.linkTo(endOp
.m_reentry
, this);
1733 jump(endOp
.m_reentry
);
1737 // For repeating alternatives, link any input check failure from the last alternative to
1739 op
.m_jumps
.link(this);
1741 bool needsToUpdateMatchStart
= !m_pattern
.m_body
->m_hasFixedSize
;
1743 // Check for cases where input position is already incremented by 1 for the last
1744 // alternative (this is particularly useful where the minimum size of the body
1745 // disjunction is 0, e.g. /a*|b/).
1746 if (needsToUpdateMatchStart
&& alternative
->m_minimumSize
== 1) {
1747 // index is already incremented by 1, so just store it now!
1748 store32(index
, Address(output
));
1749 needsToUpdateMatchStart
= false;
1752 // Check whether there is sufficient input to loop. Increment the input position by
1753 // one, and check. Also add in the minimum disjunction size before checking - there
1754 // is no point in looping if we're just going to fail all the input checks around
1755 // the next iteration.
1756 ASSERT(alternative
->m_minimumSize
>= m_pattern
.m_body
->m_minimumSize
);
1757 if (alternative
->m_minimumSize
== m_pattern
.m_body
->m_minimumSize
) {
1758 // If the last alternative had the same minimum size as the disjunction,
1759 // just simply increment input pos by 1, no adjustment based on minimum size.
1760 add32(Imm32(1), index
);
1762 // If the minumum for the last alternative was one greater than than that
1763 // for the disjunction, we're already progressed by 1, nothing to do!
1764 unsigned delta
= (alternative
->m_minimumSize
- m_pattern
.m_body
->m_minimumSize
) - 1;
1766 sub32(Imm32(delta
), index
);
1768 Jump matchFailed
= jumpIfNoAvailableInput();
1770 if (needsToUpdateMatchStart
) {
1771 if (!m_pattern
.m_body
->m_minimumSize
)
1772 store32(index
, Address(output
));
1775 sub32(Imm32(m_pattern
.m_body
->m_minimumSize
), regT0
);
1776 store32(regT0
, Address(output
));
1780 // Calculate how much more input the first alternative requires than the minimum
1781 // for the body as a whole. If no more is needed then we dont need an additional
1782 // input check here - jump straight back up to the start of the first alternative.
1783 if (beginOp
->m_alternative
->m_minimumSize
== m_pattern
.m_body
->m_minimumSize
)
1784 jump(beginOp
->m_reentry
);
1786 if (beginOp
->m_alternative
->m_minimumSize
> m_pattern
.m_body
->m_minimumSize
)
1787 add32(Imm32(beginOp
->m_alternative
->m_minimumSize
- m_pattern
.m_body
->m_minimumSize
), index
);
1789 sub32(Imm32(m_pattern
.m_body
->m_minimumSize
- beginOp
->m_alternative
->m_minimumSize
), index
);
1790 checkInput().linkTo(beginOp
->m_reentry
, this);
1791 jump(firstInputCheckFailed
);
1794 // We jump to here if we iterate to the point that there is insufficient input to
1795 // run any matches, and need to return a failure state from JIT code.
1796 matchFailed
.link(this);
1798 if (m_pattern
.m_body
->m_callFrameSize
)
1799 addPtr(Imm32(m_pattern
.m_body
->m_callFrameSize
* sizeof(void*)), stackPointerRegister
);
1800 move(TrustedImm32(-1), returnRegister
);
1804 case OpBodyAlternativeEnd
: {
1805 // We should never backtrack back into a body disjunction.
1806 ASSERT(m_backtrackingState
.isEmpty());
1808 PatternAlternative
* priorAlternative
= m_ops
[op
.m_previousOp
].m_alternative
;
1809 m_checked
+= priorAlternative
->m_minimumSize
;
1813 // OpSimpleNestedAlternativeBegin/Next/End
1814 // OpNestedAlternativeBegin/Next/End
1816 // Generate code for when we backtrack back out of an alternative into
1817 // a Begin or Next node, or when the entry input count check fails. If
1818 // there are more alternatives we need to jump to the next alternative,
1819 // if not we backtrack back out of the current set of parentheses.
1821 // In the case of non-simple nested assertions we need to also link the
1822 // 'return address' appropriately to backtrack back out into the correct
1824 case OpSimpleNestedAlternativeBegin
:
1825 case OpSimpleNestedAlternativeNext
:
1826 case OpNestedAlternativeBegin
:
1827 case OpNestedAlternativeNext
: {
1828 YarrOp
& nextOp
= m_ops
[op
.m_nextOp
];
1829 bool isBegin
= op
.m_previousOp
== notFound
;
1830 bool isLastAlternative
= nextOp
.m_nextOp
== notFound
;
1831 ASSERT(isBegin
== (op
.m_op
== OpSimpleNestedAlternativeBegin
|| op
.m_op
== OpNestedAlternativeBegin
));
1832 ASSERT(isLastAlternative
== (nextOp
.m_op
== OpSimpleNestedAlternativeEnd
|| nextOp
.m_op
== OpNestedAlternativeEnd
));
1834 // Treat an input check failure the same as a failed match.
1835 m_backtrackingState
.append(op
.m_jumps
);
1837 // Set the backtracks to jump to the appropriate place. We may need
1838 // to link the backtracks in one of three different way depending on
1839 // the type of alternative we are dealing with:
1840 // - A single alternative, with no simplings.
1841 // - The last alternative of a set of two or more.
1842 // - An alternative other than the last of a set of two or more.
1844 // In the case of a single alternative on its own, we don't need to
1845 // jump anywhere - if the alternative fails to match we can just
1846 // continue to backtrack out of the parentheses without jumping.
1848 // In the case of the last alternative in a set of more than one, we
1849 // need to jump to return back out to the beginning. We'll do so by
1850 // adding a jump to the End node's m_jumps list, and linking this
1851 // when we come to generate the Begin node. For alternatives other
1852 // than the last, we need to jump to the next alternative.
1854 // If the alternative had adjusted the input position we must link
1855 // backtracking to here, correct, and then jump on. If not we can
1856 // link the backtracks directly to their destination.
1857 if (op
.m_checkAdjust
) {
1858 // Handle the cases where we need to link the backtracks here.
1859 m_backtrackingState
.link(this);
1860 sub32(Imm32(op
.m_checkAdjust
), index
);
1861 if (!isLastAlternative
) {
1862 // An alternative that is not the last should jump to its successor.
1863 jump(nextOp
.m_reentry
);
1864 } else if (!isBegin
) {
1865 // The last of more than one alternatives must jump back to the begnning.
1866 nextOp
.m_jumps
.append(jump());
1868 // A single alternative on its own can fall through.
1869 m_backtrackingState
.fallthrough();
1872 // Handle the cases where we can link the backtracks directly to their destinations.
1873 if (!isLastAlternative
) {
1874 // An alternative that is not the last should jump to its successor.
1875 m_backtrackingState
.linkTo(nextOp
.m_reentry
, this);
1876 } else if (!isBegin
) {
1877 // The last of more than one alternatives must jump back to the begnning.
1878 m_backtrackingState
.takeBacktracksToJumpList(nextOp
.m_jumps
, this);
1880 // In the case of a single alternative on its own do nothing - it can fall through.
1883 // At this point we've handled the backtracking back into this node.
1884 // Now link any backtracks that need to jump to here.
1886 // For non-simple alternatives, link the alternative's 'return address'
1887 // so that we backtrack back out into the previous alternative.
1888 if (op
.m_op
== OpNestedAlternativeNext
)
1889 m_backtrackingState
.append(op
.m_returnAddress
);
1891 // If there is more than one alternative, then the last alternative will
1892 // have planted a jump to be linked to the end. This jump was added to the
1893 // End node's m_jumps list. If we are back at the beginning, link it here.
1895 YarrOp
* endOp
= &m_ops
[op
.m_nextOp
];
1896 while (endOp
->m_nextOp
!= notFound
) {
1897 ASSERT(endOp
->m_op
== OpSimpleNestedAlternativeNext
|| endOp
->m_op
== OpNestedAlternativeNext
);
1898 endOp
= &m_ops
[endOp
->m_nextOp
];
1900 ASSERT(endOp
->m_op
== OpSimpleNestedAlternativeEnd
|| endOp
->m_op
== OpNestedAlternativeEnd
);
1901 m_backtrackingState
.append(endOp
->m_jumps
);
1905 YarrOp
& lastOp
= m_ops
[op
.m_previousOp
];
1906 m_checked
+= lastOp
.m_checkAdjust
;
1908 m_checked
-= op
.m_checkAdjust
;
1911 case OpSimpleNestedAlternativeEnd
:
1912 case OpNestedAlternativeEnd
: {
1913 PatternTerm
* term
= op
.m_term
;
1915 // If we backtrack into the end of a simple subpattern do nothing;
1916 // just continue through into the last alternative. If we backtrack
1917 // into the end of a non-simple set of alterntives we need to jump
1918 // to the backtracking return address set up during generation.
1919 if (op
.m_op
== OpNestedAlternativeEnd
) {
1920 m_backtrackingState
.link(this);
1922 // Plant a jump to the return address.
1923 unsigned parenthesesFrameLocation
= term
->frameLocation
;
1924 unsigned alternativeFrameLocation
= parenthesesFrameLocation
;
1925 if (term
->quantityType
!= QuantifierFixedCount
)
1926 alternativeFrameLocation
+= YarrStackSpaceForBackTrackInfoParenthesesOnce
;
1927 loadFromFrameAndJump(alternativeFrameLocation
);
1929 // Link the DataLabelPtr associated with the end of the last
1930 // alternative to this point.
1931 m_backtrackingState
.append(op
.m_returnAddress
);
1934 YarrOp
& lastOp
= m_ops
[op
.m_previousOp
];
1935 m_checked
+= lastOp
.m_checkAdjust
;
1939 // OpParenthesesSubpatternOnceBegin/End
1941 // When we are backtracking back out of a capturing subpattern we need
1942 // to clear the start index in the matches output array, to record that
1943 // this subpattern has not been captured.
1945 // When backtracking back out of a Greedy quantified subpattern we need
1946 // to catch this, and try running the remainder of the alternative after
1947 // the subpattern again, skipping the parentheses.
1949 // Upon backtracking back into a quantified set of parentheses we need to
1950 // check whether we were currently skipping the subpattern. If not, we
1951 // can backtrack into them, if we were we need to either backtrack back
1952 // out of the start of the parentheses, or jump back to the forwards
1953 // matching start, depending of whether the match is Greedy or NonGreedy.
1954 case OpParenthesesSubpatternOnceBegin
: {
1955 PatternTerm
* term
= op
.m_term
;
1956 ASSERT(term
->quantityCount
== 1);
1958 // We only need to backtrack to thispoint if capturing or greedy.
1959 if (term
->capture() || term
->quantityType
== QuantifierGreedy
) {
1960 m_backtrackingState
.link(this);
1962 // If capturing, clear the capture (we only need to reset start).
1963 if (term
->capture())
1964 store32(TrustedImm32(-1), Address(output
, (term
->parentheses
.subpatternId
<< 1) * sizeof(int)));
1966 // If Greedy, jump to the end.
1967 if (term
->quantityType
== QuantifierGreedy
) {
1968 // Clear the flag in the stackframe indicating we ran through the subpattern.
1969 unsigned parenthesesFrameLocation
= term
->frameLocation
;
1970 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation
);
1971 // Jump to after the parentheses, skipping the subpattern.
1972 jump(m_ops
[op
.m_nextOp
].m_reentry
);
1973 // A backtrack from after the parentheses, when skipping the subpattern,
1974 // will jump back to here.
1975 op
.m_jumps
.link(this);
1978 m_backtrackingState
.fallthrough();
1982 case OpParenthesesSubpatternOnceEnd
: {
1983 PatternTerm
* term
= op
.m_term
;
1985 if (term
->quantityType
!= QuantifierFixedCount
) {
1986 m_backtrackingState
.link(this);
1988 // Check whether we should backtrack back into the parentheses, or if we
1989 // are currently in a state where we had skipped over the subpattern
1990 // (in which case the flag value on the stack will be -1).
1991 unsigned parenthesesFrameLocation
= term
->frameLocation
;
1992 Jump hadSkipped
= branch32(Equal
, Address(stackPointerRegister
, parenthesesFrameLocation
* sizeof(void*)), TrustedImm32(-1));
1994 if (term
->quantityType
== QuantifierGreedy
) {
1995 // For Greedy parentheses, we skip after having already tried going
1996 // through the subpattern, so if we get here we're done.
1997 YarrOp
& beginOp
= m_ops
[op
.m_previousOp
];
1998 beginOp
.m_jumps
.append(hadSkipped
);
2000 // For NonGreedy parentheses, we try skipping the subpattern first,
2001 // so if we get here we need to try running through the subpattern
2002 // next. Jump back to the start of the parentheses in the forwards
2004 ASSERT(term
->quantityType
== QuantifierNonGreedy
);
2005 YarrOp
& beginOp
= m_ops
[op
.m_previousOp
];
2006 hadSkipped
.linkTo(beginOp
.m_reentry
, this);
2009 m_backtrackingState
.fallthrough();
2012 m_backtrackingState
.append(op
.m_jumps
);
2016 // OpParenthesesSubpatternTerminalBegin/End
2018 // Terminal subpatterns will always match - there is nothing after them to
2019 // force a backtrack, and they have a minimum count of 0, and as such will
2020 // always produce an acceptable result.
2021 case OpParenthesesSubpatternTerminalBegin
: {
2022 // We will backtrack to this point once the subpattern cannot match any
2023 // more. Since no match is accepted as a successful match (we are Greedy
2024 // quantified with a minimum of zero) jump back to the forwards matching
2026 YarrOp
& endOp
= m_ops
[op
.m_nextOp
];
2027 m_backtrackingState
.linkTo(endOp
.m_reentry
, this);
2030 case OpParenthesesSubpatternTerminalEnd
:
2031 // We should never be backtracking to here (hence the 'terminal' in the name).
2032 ASSERT(m_backtrackingState
.isEmpty());
2033 m_backtrackingState
.append(op
.m_jumps
);
2036 // OpParentheticalAssertionBegin/End
2037 case OpParentheticalAssertionBegin
: {
2038 PatternTerm
* term
= op
.m_term
;
2039 YarrOp
& endOp
= m_ops
[op
.m_nextOp
];
2041 // We need to handle the backtracks upon backtracking back out
2042 // of a parenthetical assertion if either we need to correct
2043 // the input index, or the assertion was inverted.
2044 if (op
.m_checkAdjust
|| term
->invert()) {
2045 m_backtrackingState
.link(this);
2047 if (op
.m_checkAdjust
)
2048 add32(Imm32(op
.m_checkAdjust
), index
);
2050 // In an inverted assertion failure to match the subpattern
2051 // is treated as a successful match - jump to the end of the
2052 // subpattern. We already have adjusted the input position
2053 // back to that before the assertion, which is correct.
2055 jump(endOp
.m_reentry
);
2057 m_backtrackingState
.fallthrough();
2060 // The End node's jump list will contain any backtracks into
2061 // the end of the assertion. Also, if inverted, we will have
2062 // added the failure caused by a successful match to this.
2063 m_backtrackingState
.append(endOp
.m_jumps
);
2065 m_checked
+= op
.m_checkAdjust
;
2068 case OpParentheticalAssertionEnd
: {
2069 // FIXME: We should really be clearing any nested subpattern
2070 // matches on bailing out from after the pattern. Firefox has
2071 // this bug too (presumably because they use YARR!)
2073 // Never backtrack into an assertion; later failures bail to before the begin.
2074 m_backtrackingState
.takeBacktracksToJumpList(op
.m_jumps
, this);
2076 YarrOp
& lastOp
= m_ops
[op
.m_previousOp
];
2077 m_checked
-= lastOp
.m_checkAdjust
;
2088 // Compilation methods:
2089 // ====================
2091 // opCompileParenthesesSubpattern
2092 // Emits ops for a subpattern (set of parentheses). These consist
2093 // of a set of alternatives wrapped in an outer set of nodes for
2095 // Supported types of parentheses are 'Once' (quantityCount == 1)
2096 // and 'Terminal' (non-capturing parentheses quantified as greedy
2098 // Alternatives will use the 'Simple' set of ops if either the
2099 // subpattern is terminal (in which case we will never need to
2100 // backtrack), or if the subpattern only contains one alternative.
2101 void opCompileParenthesesSubpattern(PatternTerm
* term
)
2103 YarrOpCode parenthesesBeginOpCode
;
2104 YarrOpCode parenthesesEndOpCode
;
2105 YarrOpCode alternativeBeginOpCode
= OpSimpleNestedAlternativeBegin
;
2106 YarrOpCode alternativeNextOpCode
= OpSimpleNestedAlternativeNext
;
2107 YarrOpCode alternativeEndOpCode
= OpSimpleNestedAlternativeEnd
;
2109 // We can currently only compile quantity 1 subpatterns that are
2110 // not copies. We generate a copy in the case of a range quantifier,
2111 // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
2112 // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
2113 // comes where the subpattern is capturing, in which case we would
2114 // need to restore the capture from the first subpattern upon a
2115 // failure in the second.
2116 if (term
->quantityCount
== 1 && !term
->parentheses
.isCopy
) {
2117 // Select the 'Once' nodes.
2118 parenthesesBeginOpCode
= OpParenthesesSubpatternOnceBegin
;
2119 parenthesesEndOpCode
= OpParenthesesSubpatternOnceEnd
;
2121 // If there is more than one alternative we cannot use the 'simple' nodes.
2122 if (term
->parentheses
.disjunction
->m_alternatives
.size() != 1) {
2123 alternativeBeginOpCode
= OpNestedAlternativeBegin
;
2124 alternativeNextOpCode
= OpNestedAlternativeNext
;
2125 alternativeEndOpCode
= OpNestedAlternativeEnd
;
2127 } else if (term
->parentheses
.isTerminal
) {
2128 // Select the 'Terminal' nodes.
2129 parenthesesBeginOpCode
= OpParenthesesSubpatternTerminalBegin
;
2130 parenthesesEndOpCode
= OpParenthesesSubpatternTerminalEnd
;
2132 // This subpattern is not supported by the JIT.
2133 m_shouldFallBack
= true;
2137 size_t parenBegin
= m_ops
.size();
2138 m_ops
.append(parenthesesBeginOpCode
);
2140 m_ops
.append(alternativeBeginOpCode
);
2141 m_ops
.last().m_previousOp
= notFound
;
2142 m_ops
.last().m_term
= term
;
2143 Vector
<PatternAlternative
*>& alternatives
= term
->parentheses
.disjunction
->m_alternatives
;
2144 for (unsigned i
= 0; i
< alternatives
.size(); ++i
) {
2145 size_t lastOpIndex
= m_ops
.size() - 1;
2147 PatternAlternative
* nestedAlternative
= alternatives
[i
];
2148 opCompileAlternative(nestedAlternative
);
2150 size_t thisOpIndex
= m_ops
.size();
2151 m_ops
.append(YarrOp(alternativeNextOpCode
));
2153 YarrOp
& lastOp
= m_ops
[lastOpIndex
];
2154 YarrOp
& thisOp
= m_ops
[thisOpIndex
];
2156 lastOp
.m_alternative
= nestedAlternative
;
2157 lastOp
.m_nextOp
= thisOpIndex
;
2158 thisOp
.m_previousOp
= lastOpIndex
;
2159 thisOp
.m_term
= term
;
2161 YarrOp
& lastOp
= m_ops
.last();
2162 ASSERT(lastOp
.m_op
== alternativeNextOpCode
);
2163 lastOp
.m_op
= alternativeEndOpCode
;
2164 lastOp
.m_alternative
= 0;
2165 lastOp
.m_nextOp
= notFound
;
2167 size_t parenEnd
= m_ops
.size();
2168 m_ops
.append(parenthesesEndOpCode
);
2170 m_ops
[parenBegin
].m_term
= term
;
2171 m_ops
[parenBegin
].m_previousOp
= notFound
;
2172 m_ops
[parenBegin
].m_nextOp
= parenEnd
;
2173 m_ops
[parenEnd
].m_term
= term
;
2174 m_ops
[parenEnd
].m_previousOp
= parenBegin
;
2175 m_ops
[parenEnd
].m_nextOp
= notFound
;
2178 // opCompileParentheticalAssertion
2179 // Emits ops for a parenthetical assertion. These consist of an
2180 // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
2181 // the alternatives, with these wrapped by an outer pair of
2182 // OpParentheticalAssertionBegin/End nodes.
2183 // We can always use the OpSimpleNestedAlternative nodes in the
2184 // case of parenthetical assertions since these only ever match
2185 // once, and will never backtrack back into the assertion.
2186 void opCompileParentheticalAssertion(PatternTerm
* term
)
2188 size_t parenBegin
= m_ops
.size();
2189 m_ops
.append(OpParentheticalAssertionBegin
);
2191 m_ops
.append(OpSimpleNestedAlternativeBegin
);
2192 m_ops
.last().m_previousOp
= notFound
;
2193 m_ops
.last().m_term
= term
;
2194 Vector
<PatternAlternative
*>& alternatives
= term
->parentheses
.disjunction
->m_alternatives
;
2195 for (unsigned i
= 0; i
< alternatives
.size(); ++i
) {
2196 size_t lastOpIndex
= m_ops
.size() - 1;
2198 PatternAlternative
* nestedAlternative
= alternatives
[i
];
2199 opCompileAlternative(nestedAlternative
);
2201 size_t thisOpIndex
= m_ops
.size();
2202 m_ops
.append(YarrOp(OpSimpleNestedAlternativeNext
));
2204 YarrOp
& lastOp
= m_ops
[lastOpIndex
];
2205 YarrOp
& thisOp
= m_ops
[thisOpIndex
];
2207 lastOp
.m_alternative
= nestedAlternative
;
2208 lastOp
.m_nextOp
= thisOpIndex
;
2209 thisOp
.m_previousOp
= lastOpIndex
;
2210 thisOp
.m_term
= term
;
2212 YarrOp
& lastOp
= m_ops
.last();
2213 ASSERT(lastOp
.m_op
== OpSimpleNestedAlternativeNext
);
2214 lastOp
.m_op
= OpSimpleNestedAlternativeEnd
;
2215 lastOp
.m_alternative
= 0;
2216 lastOp
.m_nextOp
= notFound
;
2218 size_t parenEnd
= m_ops
.size();
2219 m_ops
.append(OpParentheticalAssertionEnd
);
2221 m_ops
[parenBegin
].m_term
= term
;
2222 m_ops
[parenBegin
].m_previousOp
= notFound
;
2223 m_ops
[parenBegin
].m_nextOp
= parenEnd
;
2224 m_ops
[parenEnd
].m_term
= term
;
2225 m_ops
[parenEnd
].m_previousOp
= parenBegin
;
2226 m_ops
[parenEnd
].m_nextOp
= notFound
;
2229 // opCompileAlternative
2230 // Called to emit nodes for all terms in an alternative.
2231 void opCompileAlternative(PatternAlternative
* alternative
)
2233 optimizeAlternative(alternative
);
2235 for (unsigned i
= 0; i
< alternative
->m_terms
.size(); ++i
) {
2236 PatternTerm
* term
= &alternative
->m_terms
[i
];
2238 switch (term
->type
) {
2239 case PatternTerm::TypeParenthesesSubpattern
:
2240 opCompileParenthesesSubpattern(term
);
2243 case PatternTerm::TypeParentheticalAssertion
:
2244 opCompileParentheticalAssertion(term
);
2254 // This method compiles the body disjunction of the regular expression.
2255 // The body consists of two sets of alternatives - zero or more 'once
2256 // through' (BOL anchored) alternatives, followed by zero or more
2257 // repeated alternatives.
2258 // For each of these two sets of alteratives, if not empty they will be
2259 // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
2260 // 'begin' node referencing the first alternative, and 'next' nodes
2261 // referencing any further alternatives. The begin/next/end nodes are
2262 // linked together in a doubly linked list. In the case of repeating
2263 // alternatives, the end node is also linked back to the beginning.
2264 // If no repeating alternatives exist, then a OpMatchFailed node exists
2265 // to return the failing result.
2266 void opCompileBody(PatternDisjunction
* disjunction
)
2268 Vector
<PatternAlternative
*>& alternatives
= disjunction
->m_alternatives
;
2269 size_t currentAlternativeIndex
= 0;
2271 // Emit the 'once through' alternatives.
2272 if (alternatives
.size() && alternatives
[0]->onceThrough()) {
2273 m_ops
.append(YarrOp(OpBodyAlternativeBegin
));
2274 m_ops
.last().m_previousOp
= notFound
;
2277 size_t lastOpIndex
= m_ops
.size() - 1;
2278 PatternAlternative
* alternative
= alternatives
[currentAlternativeIndex
];
2279 opCompileAlternative(alternative
);
2281 size_t thisOpIndex
= m_ops
.size();
2282 m_ops
.append(YarrOp(OpBodyAlternativeNext
));
2284 YarrOp
& lastOp
= m_ops
[lastOpIndex
];
2285 YarrOp
& thisOp
= m_ops
[thisOpIndex
];
2287 lastOp
.m_alternative
= alternative
;
2288 lastOp
.m_nextOp
= thisOpIndex
;
2289 thisOp
.m_previousOp
= lastOpIndex
;
2291 ++currentAlternativeIndex
;
2292 } while (currentAlternativeIndex
< alternatives
.size() && alternatives
[currentAlternativeIndex
]->onceThrough());
2294 YarrOp
& lastOp
= m_ops
.last();
2296 ASSERT(lastOp
.m_op
== OpBodyAlternativeNext
);
2297 lastOp
.m_op
= OpBodyAlternativeEnd
;
2298 lastOp
.m_alternative
= 0;
2299 lastOp
.m_nextOp
= notFound
;
2302 if (currentAlternativeIndex
== alternatives
.size()) {
2303 m_ops
.append(YarrOp(OpMatchFailed
));
2307 // Emit the repeated alternatives.
2308 size_t repeatLoop
= m_ops
.size();
2309 m_ops
.append(YarrOp(OpBodyAlternativeBegin
));
2310 m_ops
.last().m_previousOp
= notFound
;
2312 size_t lastOpIndex
= m_ops
.size() - 1;
2313 PatternAlternative
* alternative
= alternatives
[currentAlternativeIndex
];
2314 ASSERT(!alternative
->onceThrough());
2315 opCompileAlternative(alternative
);
2317 size_t thisOpIndex
= m_ops
.size();
2318 m_ops
.append(YarrOp(OpBodyAlternativeNext
));
2320 YarrOp
& lastOp
= m_ops
[lastOpIndex
];
2321 YarrOp
& thisOp
= m_ops
[thisOpIndex
];
2323 lastOp
.m_alternative
= alternative
;
2324 lastOp
.m_nextOp
= thisOpIndex
;
2325 thisOp
.m_previousOp
= lastOpIndex
;
2327 ++currentAlternativeIndex
;
2328 } while (currentAlternativeIndex
< alternatives
.size());
2329 YarrOp
& lastOp
= m_ops
.last();
2330 ASSERT(lastOp
.m_op
== OpBodyAlternativeNext
);
2331 lastOp
.m_op
= OpBodyAlternativeEnd
;
2332 lastOp
.m_alternative
= 0;
2333 lastOp
.m_nextOp
= repeatLoop
;
2336 void generateEnter()
2339 push(X86Registers::ebp
);
2340 move(stackPointerRegister
, X86Registers::ebp
);
2341 push(X86Registers::ebx
);
2343 push(X86Registers::ebp
);
2344 move(stackPointerRegister
, X86Registers::ebp
);
2345 // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
2346 push(X86Registers::ebx
);
2347 push(X86Registers::edi
);
2348 push(X86Registers::esi
);
2349 // load output into edi (2 = saved ebp + return address).
2351 loadPtr(Address(X86Registers::ebp
, 2 * sizeof(void*)), input
);
2352 loadPtr(Address(X86Registers::ebp
, 3 * sizeof(void*)), index
);
2353 loadPtr(Address(X86Registers::ebp
, 4 * sizeof(void*)), length
);
2354 loadPtr(Address(X86Registers::ebp
, 5 * sizeof(void*)), output
);
2356 loadPtr(Address(X86Registers::ebp
, 2 * sizeof(void*)), output
);
2359 push(ARMRegisters::r4
);
2360 push(ARMRegisters::r5
);
2361 push(ARMRegisters::r6
);
2362 #if CPU(ARM_TRADITIONAL)
2363 push(ARMRegisters::r8
); // scratch register
2365 move(ARMRegisters::r3
, output
);
2367 push(SH4Registers::r11
);
2368 push(SH4Registers::r13
);
2374 void generateReturn()
2377 pop(X86Registers::ebx
);
2378 pop(X86Registers::ebp
);
2380 pop(X86Registers::esi
);
2381 pop(X86Registers::edi
);
2382 pop(X86Registers::ebx
);
2383 pop(X86Registers::ebp
);
2385 #if CPU(ARM_TRADITIONAL)
2386 pop(ARMRegisters::r8
); // scratch register
2388 pop(ARMRegisters::r6
);
2389 pop(ARMRegisters::r5
);
2390 pop(ARMRegisters::r4
);
2392 pop(SH4Registers::r13
);
2393 pop(SH4Registers::r11
);
2401 YarrGenerator(YarrPattern
& pattern
)
2402 : m_pattern(pattern
)
2403 , m_shouldFallBack(false)
2408 void compile(JSGlobalData
* globalData
, YarrCodeBlock
& jitObject
)
2412 if (!m_pattern
.m_body
->m_hasFixedSize
)
2413 store32(index
, Address(output
));
2415 if (m_pattern
.m_body
->m_callFrameSize
)
2416 subPtr(Imm32(m_pattern
.m_body
->m_callFrameSize
* sizeof(void*)), stackPointerRegister
);
2418 // Compile the pattern to the internal 'YarrOp' representation.
2419 opCompileBody(m_pattern
.m_body
);
2421 // If we encountered anything we can't handle in the JIT code
2422 // (e.g. backreferences) then return early.
2423 if (m_shouldFallBack
) {
2424 jitObject
.setFallBack(true);
2431 // Link & finalize the code.
2432 LinkBuffer
linkBuffer(*globalData
, this, globalData
->regexAllocator
);
2433 m_backtrackingState
.linkDataLabels(linkBuffer
);
2434 jitObject
.set(linkBuffer
.finalizeCode());
2435 jitObject
.setFallBack(m_shouldFallBack
);
2439 YarrPattern
& m_pattern
;
2441 // Used to detect regular expression constructs that are not currently
2442 // supported in the JIT; fall back to the interpreter when this is detected.
2443 bool m_shouldFallBack
;
2445 // The regular expression expressed as a linear sequence of operations.
2446 Vector
<YarrOp
, 128> m_ops
;
2448 // This records the current input offset being applied due to the current
2449 // set of alternatives we are nested within. E.g. when matching the
2450 // character 'b' within the regular expression /abc/, we will know that
2451 // the minimum size for the alternative is 3, checked upon entry to the
2452 // alternative, and that 'b' is at offset 1 from the start, and as such
2453 // when matching 'b' we need to apply an offset of -2 to the load.
2455 // FIXME: This should go away. Rather than tracking this value throughout
2456 // code generation, we should gather this information up front & store it
2457 // on the YarrOp structure.
2460 // This class records state whilst generating the backtracking path of code.
2461 BacktrackingState m_backtrackingState
;
2464 void jitCompile(YarrPattern
& pattern
, JSGlobalData
* globalData
, YarrCodeBlock
& jitObject
)
2466 YarrGenerator(pattern
).compile(globalData
, jitObject
);
2469 int execute(YarrCodeBlock
& jitObject
, const UChar
* input
, unsigned start
, unsigned length
, int* output
)
2471 return jitObject
.execute(input
, start
, length
, output
);