2 * Copyright (C) 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <wtf/ASCIICType.h>
30 #include "LinkBuffer.h"
32 #include "YarrCanonicalizeUCS2.h"
38 namespace JSC
{ namespace Yarr
{
40 template<YarrJITCompileMode compileMode
>
41 class YarrGenerator
: private MacroAssembler
{
42 friend void jitCompile(JSGlobalData
*, YarrCodeBlock
& jitObject
, const UString
& pattern
, unsigned& numSubpatterns
, const char*& error
, bool ignoreCase
, bool multiline
);
45 static const RegisterID input
= ARMRegisters::r0
;
46 static const RegisterID index
= ARMRegisters::r1
;
47 static const RegisterID length
= ARMRegisters::r2
;
48 static const RegisterID output
= ARMRegisters::r4
;
50 static const RegisterID regT0
= ARMRegisters::r5
;
51 static const RegisterID regT1
= ARMRegisters::r6
;
53 static const RegisterID returnRegister
= ARMRegisters::r0
;
54 static const RegisterID returnRegister2
= ARMRegisters::r1
;
56 static const RegisterID input
= MIPSRegisters::a0
;
57 static const RegisterID index
= MIPSRegisters::a1
;
58 static const RegisterID length
= MIPSRegisters::a2
;
59 static const RegisterID output
= MIPSRegisters::a3
;
61 static const RegisterID regT0
= MIPSRegisters::t4
;
62 static const RegisterID regT1
= MIPSRegisters::t5
;
64 static const RegisterID returnRegister
= MIPSRegisters::v0
;
65 static const RegisterID returnRegister2
= MIPSRegisters::v1
;
67 static const RegisterID input
= SH4Registers::r4
;
68 static const RegisterID index
= SH4Registers::r5
;
69 static const RegisterID length
= SH4Registers::r6
;
70 static const RegisterID output
= SH4Registers::r7
;
72 static const RegisterID regT0
= SH4Registers::r0
;
73 static const RegisterID regT1
= SH4Registers::r1
;
75 static const RegisterID returnRegister
= SH4Registers::r0
;
76 static const RegisterID returnRegister2
= SH4Registers::r1
;
78 static const RegisterID input
= X86Registers::eax
;
79 static const RegisterID index
= X86Registers::edx
;
80 static const RegisterID length
= X86Registers::ecx
;
81 static const RegisterID output
= X86Registers::edi
;
83 static const RegisterID regT0
= X86Registers::ebx
;
84 static const RegisterID regT1
= X86Registers::esi
;
86 static const RegisterID returnRegister
= X86Registers::eax
;
87 static const RegisterID returnRegister2
= X86Registers::edx
;
89 static const RegisterID input
= X86Registers::edi
;
90 static const RegisterID index
= X86Registers::esi
;
91 static const RegisterID length
= X86Registers::edx
;
92 static const RegisterID output
= X86Registers::ecx
;
94 static const RegisterID regT0
= X86Registers::eax
;
95 static const RegisterID regT1
= X86Registers::ebx
;
97 static const RegisterID returnRegister
= X86Registers::eax
;
98 static const RegisterID returnRegister2
= X86Registers::edx
;
101 void optimizeAlternative(PatternAlternative
* alternative
)
103 if (!alternative
->m_terms
.size())
106 for (unsigned i
= 0; i
< alternative
->m_terms
.size() - 1; ++i
) {
107 PatternTerm
& term
= alternative
->m_terms
[i
];
108 PatternTerm
& nextTerm
= alternative
->m_terms
[i
+ 1];
110 if ((term
.type
== PatternTerm::TypeCharacterClass
)
111 && (term
.quantityType
== QuantifierFixedCount
)
112 && (nextTerm
.type
== PatternTerm::TypePatternCharacter
)
113 && (nextTerm
.quantityType
== QuantifierFixedCount
)) {
114 PatternTerm termCopy
= term
;
115 alternative
->m_terms
[i
] = nextTerm
;
116 alternative
->m_terms
[i
+ 1] = termCopy
;
121 void matchCharacterClassRange(RegisterID character
, JumpList
& failures
, JumpList
& matchDest
, const CharacterRange
* ranges
, unsigned count
, unsigned* matchIndex
, const UChar
* matches
, unsigned matchCount
)
124 // pick which range we're going to generate
125 int which
= count
>> 1;
126 char lo
= ranges
[which
].begin
;
127 char hi
= ranges
[which
].end
;
129 // check if there are any ranges or matches below lo. If not, just jl to failure -
130 // if there is anything else to check, check that first, if it falls through jmp to failure.
131 if ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] < lo
)) {
132 Jump loOrAbove
= branch32(GreaterThanOrEqual
, character
, Imm32((unsigned short)lo
));
134 // generate code for all ranges before this one
136 matchCharacterClassRange(character
, failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
);
138 while ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] < lo
)) {
139 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)matches
[*matchIndex
])));
142 failures
.append(jump());
144 loOrAbove
.link(this);
146 Jump loOrAbove
= branch32(GreaterThanOrEqual
, character
, Imm32((unsigned short)lo
));
148 matchCharacterClassRange(character
, failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
);
149 failures
.append(jump());
151 loOrAbove
.link(this);
153 failures
.append(branch32(LessThan
, character
, Imm32((unsigned short)lo
)));
155 while ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] <= hi
))
158 matchDest
.append(branch32(LessThanOrEqual
, character
, Imm32((unsigned short)hi
)));
159 // fall through to here, the value is above hi.
161 // shuffle along & loop around if there are any more matches to handle.
162 unsigned next
= which
+ 1;
168 void matchCharacterClass(RegisterID character
, JumpList
& matchDest
, const CharacterClass
* charClass
)
170 if (charClass
->m_table
) {
171 ExtendedAddress
tableEntry(character
, reinterpret_cast<intptr_t>(charClass
->m_table
->m_table
));
172 matchDest
.append(branchTest8(charClass
->m_table
->m_inverted
? Zero
: NonZero
, tableEntry
));
176 if (charClass
->m_matchesUnicode
.size() || charClass
->m_rangesUnicode
.size()) {
177 Jump isAscii
= branch32(LessThanOrEqual
, character
, TrustedImm32(0x7f));
179 if (charClass
->m_matchesUnicode
.size()) {
180 for (unsigned i
= 0; i
< charClass
->m_matchesUnicode
.size(); ++i
) {
181 UChar ch
= charClass
->m_matchesUnicode
[i
];
182 matchDest
.append(branch32(Equal
, character
, Imm32(ch
)));
186 if (charClass
->m_rangesUnicode
.size()) {
187 for (unsigned i
= 0; i
< charClass
->m_rangesUnicode
.size(); ++i
) {
188 UChar lo
= charClass
->m_rangesUnicode
[i
].begin
;
189 UChar hi
= charClass
->m_rangesUnicode
[i
].end
;
191 Jump below
= branch32(LessThan
, character
, Imm32(lo
));
192 matchDest
.append(branch32(LessThanOrEqual
, character
, Imm32(hi
)));
197 unicodeFail
= jump();
201 if (charClass
->m_ranges
.size()) {
202 unsigned matchIndex
= 0;
204 matchCharacterClassRange(character
, failures
, matchDest
, charClass
->m_ranges
.begin(), charClass
->m_ranges
.size(), &matchIndex
, charClass
->m_matches
.begin(), charClass
->m_matches
.size());
205 while (matchIndex
< charClass
->m_matches
.size())
206 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)charClass
->m_matches
[matchIndex
++])));
209 } else if (charClass
->m_matches
.size()) {
210 // optimization: gather 'a','A' etc back together, can mask & test once.
211 Vector
<char> matchesAZaz
;
213 for (unsigned i
= 0; i
< charClass
->m_matches
.size(); ++i
) {
214 char ch
= charClass
->m_matches
[i
];
215 if (m_pattern
.m_ignoreCase
) {
216 if (isASCIILower(ch
)) {
217 matchesAZaz
.append(ch
);
220 if (isASCIIUpper(ch
))
223 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)ch
)));
226 if (unsigned countAZaz
= matchesAZaz
.size()) {
227 or32(TrustedImm32(32), character
);
228 for (unsigned i
= 0; i
< countAZaz
; ++i
)
229 matchDest
.append(branch32(Equal
, character
, TrustedImm32(matchesAZaz
[i
])));
233 if (charClass
->m_matchesUnicode
.size() || charClass
->m_rangesUnicode
.size())
234 unicodeFail
.link(this);
237 // Jumps if input not available; will have (incorrectly) incremented already!
238 Jump
jumpIfNoAvailableInput(unsigned countToCheck
= 0)
241 add32(Imm32(countToCheck
), index
);
242 return branch32(Above
, index
, length
);
245 Jump
jumpIfAvailableInput(unsigned countToCheck
)
247 add32(Imm32(countToCheck
), index
);
248 return branch32(BelowOrEqual
, index
, length
);
253 return branch32(BelowOrEqual
, index
, length
);
258 return branch32(Equal
, index
, length
);
261 Jump
notAtEndOfInput()
263 return branch32(NotEqual
, index
, length
);
266 Jump
jumpIfCharNotEquals(UChar ch
, int inputPosition
, RegisterID character
)
268 readCharacter(inputPosition
, character
);
270 // For case-insesitive compares, non-ascii characters that have different
271 // upper & lower case representations are converted to a character class.
272 ASSERT(!m_pattern
.m_ignoreCase
|| isASCIIAlpha(ch
) || isCanonicallyUnique(ch
));
273 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
274 or32(TrustedImm32(0x20), character
);
278 return branch32(NotEqual
, character
, Imm32(ch
));
281 void readCharacter(int inputPosition
, RegisterID reg
)
283 if (m_charSize
== Char8
)
284 load8(BaseIndex(input
, index
, TimesOne
, inputPosition
* sizeof(char)), reg
);
286 load16(BaseIndex(input
, index
, TimesTwo
, inputPosition
* sizeof(UChar
)), reg
);
289 void storeToFrame(RegisterID reg
, unsigned frameLocation
)
291 poke(reg
, frameLocation
);
294 void storeToFrame(TrustedImm32 imm
, unsigned frameLocation
)
296 poke(imm
, frameLocation
);
299 DataLabelPtr
storeToFrameWithPatch(unsigned frameLocation
)
301 return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister
, frameLocation
* sizeof(void*)));
304 void loadFromFrame(unsigned frameLocation
, RegisterID reg
)
306 peek(reg
, frameLocation
);
309 void loadFromFrameAndJump(unsigned frameLocation
)
311 jump(Address(stackPointerRegister
, frameLocation
* sizeof(void*)));
316 unsigned callFrameSize
= m_pattern
.m_body
->m_callFrameSize
;
318 subPtr(Imm32(callFrameSize
* sizeof(void*)), stackPointerRegister
);
320 void removeCallFrame()
322 unsigned callFrameSize
= m_pattern
.m_body
->m_callFrameSize
;
324 addPtr(Imm32(callFrameSize
* sizeof(void*)), stackPointerRegister
);
327 // Used to record subpatters, should only be called if compileMode is IncludeSubpatterns.
328 void setSubpatternStart(RegisterID reg
, unsigned subpattern
)
331 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
332 store32(reg
, Address(output
, (subpattern
<< 1) * sizeof(int)));
334 void setSubpatternEnd(RegisterID reg
, unsigned subpattern
)
337 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
338 store32(reg
, Address(output
, ((subpattern
<< 1) + 1) * sizeof(int)));
340 void clearSubpatternStart(unsigned subpattern
)
343 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
344 store32(TrustedImm32(-1), Address(output
, (subpattern
<< 1) * sizeof(int)));
347 // We use one of three different strategies to track the start of the current match,
349 // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
350 // at the end of matching. This is irrespective of compileMode, and in this case
351 // these methods should never be called.
352 // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output
353 // vector, store the match start in the output vector.
354 // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly
356 void setMatchStart(RegisterID reg
)
358 ASSERT(!m_pattern
.m_body
->m_hasFixedSize
);
359 if (compileMode
== IncludeSubpatterns
)
360 store32(reg
, output
);
364 void getMatchStart(RegisterID reg
)
366 ASSERT(!m_pattern
.m_body
->m_hasFixedSize
);
367 if (compileMode
== IncludeSubpatterns
)
374 // These nodes wrap body alternatives - those in the main disjunction,
375 // rather than subpatterns or assertions. These are chained together in
376 // a doubly linked list, with a 'begin' node for the first alternative,
377 // a 'next' node for each subsequent alternative, and an 'end' node at
378 // the end. In the case of repeating alternatives, the 'end' node also
379 // has a reference back to 'begin'.
380 OpBodyAlternativeBegin
,
381 OpBodyAlternativeNext
,
382 OpBodyAlternativeEnd
,
383 // Similar to the body alternatives, but used for subpatterns with two
384 // or more alternatives.
385 OpNestedAlternativeBegin
,
386 OpNestedAlternativeNext
,
387 OpNestedAlternativeEnd
,
388 // Used for alternatives in subpatterns where there is only a single
389 // alternative (backtrackingis easier in these cases), or for alternatives
390 // which never need to be backtracked (those in parenthetical assertions,
391 // terminal subpatterns).
392 OpSimpleNestedAlternativeBegin
,
393 OpSimpleNestedAlternativeNext
,
394 OpSimpleNestedAlternativeEnd
,
395 // Used to wrap 'Once' subpattern matches (quantityCount == 1).
396 OpParenthesesSubpatternOnceBegin
,
397 OpParenthesesSubpatternOnceEnd
,
398 // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
399 OpParenthesesSubpatternTerminalBegin
,
400 OpParenthesesSubpatternTerminalEnd
,
401 // Used to wrap parenthetical assertions.
402 OpParentheticalAssertionBegin
,
403 OpParentheticalAssertionEnd
,
404 // Wraps all simple terms (pattern characters, character classes).
406 // Where an expression contains only 'once through' body alternatives
407 // and no repeating ones, this op is used to return match failure.
411 // This structure is used to hold the compiled opcode information,
412 // including reference back to the original PatternTerm/PatternAlternatives,
413 // and JIT compilation data structures.
415 explicit YarrOp(PatternTerm
* term
)
418 , m_isDeadCode(false)
422 explicit YarrOp(YarrOpCode op
)
424 , m_isDeadCode(false)
428 // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
432 // For alternatives, this holds the PatternAlternative and doubly linked
433 // references to this alternative's siblings. In the case of the
434 // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
435 // m_nextOp will reference the OpBodyAlternativeBegin node of the first
436 // repeating alternative.
437 PatternAlternative
* m_alternative
;
441 // Used to record a set of Jumps out of the generated code, typically
442 // used for jumps out to backtracking code, and a single reentry back
443 // into the code for a node (likely where a backtrack will trigger
448 // Used for backtracking when the prior alternative did not consume any
449 // characters but matched.
450 Jump m_zeroLengthMatch
;
452 // This flag is used to null out the second pattern character, when
453 // two are fused to match a pair together.
456 // Currently used in the case of some of the more complex management of
457 // 'm_checked', to cache the offset used in this alternative, to avoid
461 // Used by OpNestedAlternativeNext/End to hold the pointer to the
462 // value that will be pushed into the pattern's frame to return to,
463 // upon backtracking back into the disjunction.
464 DataLabelPtr m_returnAddress
;
468 // This class encapsulates information about the state of code generation
469 // whilst generating the code for backtracking, when a term fails to match.
470 // Upon entry to code generation of the backtracking code for a given node,
471 // the Backtracking state will hold references to all control flow sources
472 // that are outputs in need of further backtracking from the prior node
473 // generated (which is the subsequent operation in the regular expression,
474 // and in the m_ops Vector, since we generated backtracking backwards).
475 // These references to control flow take the form of:
476 // - A jump list of jumps, to be linked to code that will backtrack them
478 // - A set of DataLabelPtr values, to be populated with values to be
479 // treated effectively as return addresses backtracking into complex
481 // - A flag indicating that the current sequence of generated code up to
482 // this point requires backtracking.
483 class BacktrackingState
{
486 : m_pendingFallthrough(false)
490 // Add a jump or jumps, a return address, or set the flag indicating
491 // that the current 'fallthrough' control flow requires backtracking.
492 void append(const Jump
& jump
)
494 m_laterFailures
.append(jump
);
496 void append(JumpList
& jumpList
)
498 m_laterFailures
.append(jumpList
);
500 void append(const DataLabelPtr
& returnAddress
)
502 m_pendingReturns
.append(returnAddress
);
506 ASSERT(!m_pendingFallthrough
);
507 m_pendingFallthrough
= true;
510 // These methods clear the backtracking state, either linking to the
511 // current location, a provided label, or copying the backtracking out
512 // to a JumpList. All actions may require code generation to take place,
513 // and as such are passed a pointer to the assembler.
514 void link(MacroAssembler
* assembler
)
516 if (m_pendingReturns
.size()) {
517 Label
here(assembler
);
518 for (unsigned i
= 0; i
< m_pendingReturns
.size(); ++i
)
519 m_backtrackRecords
.append(ReturnAddressRecord(m_pendingReturns
[i
], here
));
520 m_pendingReturns
.clear();
522 m_laterFailures
.link(assembler
);
523 m_laterFailures
.clear();
524 m_pendingFallthrough
= false;
526 void linkTo(Label label
, MacroAssembler
* assembler
)
528 if (m_pendingReturns
.size()) {
529 for (unsigned i
= 0; i
< m_pendingReturns
.size(); ++i
)
530 m_backtrackRecords
.append(ReturnAddressRecord(m_pendingReturns
[i
], label
));
531 m_pendingReturns
.clear();
533 if (m_pendingFallthrough
)
534 assembler
->jump(label
);
535 m_laterFailures
.linkTo(label
, assembler
);
536 m_laterFailures
.clear();
537 m_pendingFallthrough
= false;
539 void takeBacktracksToJumpList(JumpList
& jumpList
, MacroAssembler
* assembler
)
541 if (m_pendingReturns
.size()) {
542 Label
here(assembler
);
543 for (unsigned i
= 0; i
< m_pendingReturns
.size(); ++i
)
544 m_backtrackRecords
.append(ReturnAddressRecord(m_pendingReturns
[i
], here
));
545 m_pendingReturns
.clear();
546 m_pendingFallthrough
= true;
548 if (m_pendingFallthrough
)
549 jumpList
.append(assembler
->jump());
550 jumpList
.append(m_laterFailures
);
551 m_laterFailures
.clear();
552 m_pendingFallthrough
= false;
557 return m_laterFailures
.empty() && m_pendingReturns
.isEmpty() && !m_pendingFallthrough
;
560 // Called at the end of code generation to link all return addresses.
561 void linkDataLabels(LinkBuffer
& linkBuffer
)
564 for (unsigned i
= 0; i
< m_backtrackRecords
.size(); ++i
)
565 linkBuffer
.patch(m_backtrackRecords
[i
].m_dataLabel
, linkBuffer
.locationOf(m_backtrackRecords
[i
].m_backtrackLocation
));
569 struct ReturnAddressRecord
{
570 ReturnAddressRecord(DataLabelPtr dataLabel
, Label backtrackLocation
)
571 : m_dataLabel(dataLabel
)
572 , m_backtrackLocation(backtrackLocation
)
576 DataLabelPtr m_dataLabel
;
577 Label m_backtrackLocation
;
580 JumpList m_laterFailures
;
581 bool m_pendingFallthrough
;
582 Vector
<DataLabelPtr
, 4> m_pendingReturns
;
583 Vector
<ReturnAddressRecord
, 4> m_backtrackRecords
;
586 // Generation methods:
587 // ===================
589 // This method provides a default implementation of backtracking common
590 // to many terms; terms commonly jump out of the forwards matching path
591 // on any failed conditions, and add these jumps to the m_jumps list. If
592 // no special handling is required we can often just backtrack to m_jumps.
593 void backtrackTermDefault(size_t opIndex
)
595 YarrOp
& op
= m_ops
[opIndex
];
596 m_backtrackingState
.append(op
.m_jumps
);
599 void generateAssertionBOL(size_t opIndex
)
601 YarrOp
& op
= m_ops
[opIndex
];
602 PatternTerm
* term
= op
.m_term
;
604 if (m_pattern
.m_multiline
) {
605 const RegisterID character
= regT0
;
608 if (!term
->inputPosition
)
609 matchDest
.append(branch32(Equal
, index
, Imm32(m_checked
)));
611 readCharacter((term
->inputPosition
- m_checked
) - 1, character
);
612 matchCharacterClass(character
, matchDest
, m_pattern
.newlineCharacterClass());
613 op
.m_jumps
.append(jump());
615 matchDest
.link(this);
617 // Erk, really should poison out these alternatives early. :-/
618 if (term
->inputPosition
)
619 op
.m_jumps
.append(jump());
621 op
.m_jumps
.append(branch32(NotEqual
, index
, Imm32(m_checked
)));
624 void backtrackAssertionBOL(size_t opIndex
)
626 backtrackTermDefault(opIndex
);
629 void generateAssertionEOL(size_t opIndex
)
631 YarrOp
& op
= m_ops
[opIndex
];
632 PatternTerm
* term
= op
.m_term
;
634 if (m_pattern
.m_multiline
) {
635 const RegisterID character
= regT0
;
638 if (term
->inputPosition
== m_checked
)
639 matchDest
.append(atEndOfInput());
641 readCharacter(term
->inputPosition
- m_checked
, character
);
642 matchCharacterClass(character
, matchDest
, m_pattern
.newlineCharacterClass());
643 op
.m_jumps
.append(jump());
645 matchDest
.link(this);
647 if (term
->inputPosition
== m_checked
)
648 op
.m_jumps
.append(notAtEndOfInput());
649 // Erk, really should poison out these alternatives early. :-/
651 op
.m_jumps
.append(jump());
654 void backtrackAssertionEOL(size_t opIndex
)
656 backtrackTermDefault(opIndex
);
659 // Also falls though on nextIsNotWordChar.
660 void matchAssertionWordchar(size_t opIndex
, JumpList
& nextIsWordChar
, JumpList
& nextIsNotWordChar
)
662 YarrOp
& op
= m_ops
[opIndex
];
663 PatternTerm
* term
= op
.m_term
;
665 const RegisterID character
= regT0
;
667 if (term
->inputPosition
== m_checked
)
668 nextIsNotWordChar
.append(atEndOfInput());
670 readCharacter((term
->inputPosition
- m_checked
), character
);
671 matchCharacterClass(character
, nextIsWordChar
, m_pattern
.wordcharCharacterClass());
674 void generateAssertionWordBoundary(size_t opIndex
)
676 YarrOp
& op
= m_ops
[opIndex
];
677 PatternTerm
* term
= op
.m_term
;
679 const RegisterID character
= regT0
;
683 if (!term
->inputPosition
)
684 atBegin
= branch32(Equal
, index
, Imm32(m_checked
));
685 readCharacter((term
->inputPosition
- m_checked
) - 1, character
);
686 matchCharacterClass(character
, matchDest
, m_pattern
.wordcharCharacterClass());
687 if (!term
->inputPosition
)
690 // We fall through to here if the last character was not a wordchar.
691 JumpList nonWordCharThenWordChar
;
692 JumpList nonWordCharThenNonWordChar
;
693 if (term
->invert()) {
694 matchAssertionWordchar(opIndex
, nonWordCharThenNonWordChar
, nonWordCharThenWordChar
);
695 nonWordCharThenWordChar
.append(jump());
697 matchAssertionWordchar(opIndex
, nonWordCharThenWordChar
, nonWordCharThenNonWordChar
);
698 nonWordCharThenNonWordChar
.append(jump());
700 op
.m_jumps
.append(nonWordCharThenNonWordChar
);
702 // We jump here if the last character was a wordchar.
703 matchDest
.link(this);
704 JumpList wordCharThenWordChar
;
705 JumpList wordCharThenNonWordChar
;
706 if (term
->invert()) {
707 matchAssertionWordchar(opIndex
, wordCharThenNonWordChar
, wordCharThenWordChar
);
708 wordCharThenWordChar
.append(jump());
710 matchAssertionWordchar(opIndex
, wordCharThenWordChar
, wordCharThenNonWordChar
);
711 // This can fall-though!
714 op
.m_jumps
.append(wordCharThenWordChar
);
716 nonWordCharThenWordChar
.link(this);
717 wordCharThenNonWordChar
.link(this);
719 void backtrackAssertionWordBoundary(size_t opIndex
)
721 backtrackTermDefault(opIndex
);
724 void generatePatternCharacterOnce(size_t opIndex
)
726 YarrOp
& op
= m_ops
[opIndex
];
731 // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
732 // node, so there must always be at least one more node.
733 ASSERT(opIndex
+ 1 < m_ops
.size());
734 YarrOp
* nextOp
= &m_ops
[opIndex
+ 1];
736 PatternTerm
* term
= op
.m_term
;
737 UChar ch
= term
->patternCharacter
;
739 if ((ch
> 0xff) && (m_charSize
== Char8
)) {
740 // Have a 16 bit pattern character and an 8 bit string - short circuit
741 op
.m_jumps
.append(jump());
745 const RegisterID character
= regT0
;
746 int maxCharactersAtOnce
= m_charSize
== Char8
? 4 : 2;
747 unsigned ignoreCaseMask
= 0;
748 int allCharacters
= ch
;
749 int numberCharacters
;
750 int startTermPosition
= term
->inputPosition
;
752 // For case-insesitive compares, non-ascii characters that have different
753 // upper & lower case representations are converted to a character class.
754 ASSERT(!m_pattern
.m_ignoreCase
|| isASCIIAlpha(ch
) || isCanonicallyUnique(ch
));
756 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
))
757 ignoreCaseMask
|= 32;
759 for (numberCharacters
= 1; numberCharacters
< maxCharactersAtOnce
&& nextOp
->m_op
== OpTerm
; ++numberCharacters
, nextOp
= &m_ops
[opIndex
+ numberCharacters
]) {
760 PatternTerm
* nextTerm
= nextOp
->m_term
;
762 if (nextTerm
->type
!= PatternTerm::TypePatternCharacter
763 || nextTerm
->quantityType
!= QuantifierFixedCount
764 || nextTerm
->quantityCount
!= 1
765 || nextTerm
->inputPosition
!= (startTermPosition
+ numberCharacters
))
768 nextOp
->m_isDeadCode
= true;
770 int shiftAmount
= (m_charSize
== Char8
? 8 : 16) * numberCharacters
;
772 UChar currentCharacter
= nextTerm
->patternCharacter
;
774 if ((currentCharacter
> 0xff) && (m_charSize
== Char8
)) {
775 // Have a 16 bit pattern character and an 8 bit string - short circuit
776 op
.m_jumps
.append(jump());
780 // For case-insesitive compares, non-ascii characters that have different
781 // upper & lower case representations are converted to a character class.
782 ASSERT(!m_pattern
.m_ignoreCase
|| isASCIIAlpha(currentCharacter
) || isCanonicallyUnique(currentCharacter
));
784 allCharacters
|= (currentCharacter
<< shiftAmount
);
786 if ((m_pattern
.m_ignoreCase
) && (isASCIIAlpha(currentCharacter
)))
787 ignoreCaseMask
|= 32 << shiftAmount
;
790 if (m_charSize
== Char8
) {
791 switch (numberCharacters
) {
793 op
.m_jumps
.append(jumpIfCharNotEquals(ch
, startTermPosition
- m_checked
, character
));
796 BaseIndex
address(input
, index
, TimesOne
, (startTermPosition
- m_checked
) * sizeof(LChar
));
797 load16Unaligned(address
, character
);
801 BaseIndex
highAddress(input
, index
, TimesOne
, (startTermPosition
- m_checked
) * sizeof(LChar
));
802 load16Unaligned(highAddress
, character
);
804 or32(Imm32(ignoreCaseMask
), character
);
805 op
.m_jumps
.append(branch32(NotEqual
, character
, Imm32((allCharacters
& 0xffff) | ignoreCaseMask
)));
806 op
.m_jumps
.append(jumpIfCharNotEquals(allCharacters
>> 16, startTermPosition
+ 2 - m_checked
, character
));
810 BaseIndex
address(input
, index
, TimesOne
, (startTermPosition
- m_checked
) * sizeof(LChar
));
811 load32WithUnalignedHalfWords(address
, character
);
816 switch (numberCharacters
) {
818 op
.m_jumps
.append(jumpIfCharNotEquals(ch
, term
->inputPosition
- m_checked
, character
));
821 BaseIndex
address(input
, index
, TimesTwo
, (term
->inputPosition
- m_checked
) * sizeof(UChar
));
822 load32WithUnalignedHalfWords(address
, character
);
828 or32(Imm32(ignoreCaseMask
), character
);
829 op
.m_jumps
.append(branch32(NotEqual
, character
, Imm32(allCharacters
| ignoreCaseMask
)));
832 void backtrackPatternCharacterOnce(size_t opIndex
)
834 backtrackTermDefault(opIndex
);
837 void generatePatternCharacterFixed(size_t opIndex
)
839 YarrOp
& op
= m_ops
[opIndex
];
840 PatternTerm
* term
= op
.m_term
;
841 UChar ch
= term
->patternCharacter
;
843 const RegisterID character
= regT0
;
844 const RegisterID countRegister
= regT1
;
846 move(index
, countRegister
);
847 sub32(Imm32(term
->quantityCount
.unsafeGet()), countRegister
);
850 BaseIndex
address(input
, countRegister
, m_charScale
, (Checked
<int>(term
->inputPosition
- m_checked
+ Checked
<int64_t>(term
->quantityCount
)) * static_cast<int>(m_charSize
== Char8
? sizeof(char) : sizeof(UChar
))).unsafeGet());
852 if (m_charSize
== Char8
)
853 load8(address
, character
);
855 load16(address
, character
);
857 // For case-insesitive compares, non-ascii characters that have different
858 // upper & lower case representations are converted to a character class.
859 ASSERT(!m_pattern
.m_ignoreCase
|| isASCIIAlpha(ch
) || isCanonicallyUnique(ch
));
860 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
861 or32(TrustedImm32(0x20), character
);
865 op
.m_jumps
.append(branch32(NotEqual
, character
, Imm32(ch
)));
866 add32(TrustedImm32(1), countRegister
);
867 branch32(NotEqual
, countRegister
, index
).linkTo(loop
, this);
869 void backtrackPatternCharacterFixed(size_t opIndex
)
871 backtrackTermDefault(opIndex
);
874 void generatePatternCharacterGreedy(size_t opIndex
)
876 YarrOp
& op
= m_ops
[opIndex
];
877 PatternTerm
* term
= op
.m_term
;
878 UChar ch
= term
->patternCharacter
;
880 const RegisterID character
= regT0
;
881 const RegisterID countRegister
= regT1
;
883 move(TrustedImm32(0), countRegister
);
885 // Unless have a 16 bit pattern character and an 8 bit string - short circuit
886 if (!((ch
> 0xff) && (m_charSize
== Char8
))) {
889 failures
.append(atEndOfInput());
890 failures
.append(jumpIfCharNotEquals(ch
, term
->inputPosition
- m_checked
, character
));
892 add32(TrustedImm32(1), countRegister
);
893 add32(TrustedImm32(1), index
);
894 if (term
->quantityCount
== quantifyInfinite
)
897 branch32(NotEqual
, countRegister
, Imm32(term
->quantityCount
.unsafeGet())).linkTo(loop
, this);
901 op
.m_reentry
= label();
903 storeToFrame(countRegister
, term
->frameLocation
);
905 void backtrackPatternCharacterGreedy(size_t opIndex
)
907 YarrOp
& op
= m_ops
[opIndex
];
908 PatternTerm
* term
= op
.m_term
;
910 const RegisterID countRegister
= regT1
;
912 m_backtrackingState
.link(this);
914 loadFromFrame(term
->frameLocation
, countRegister
);
915 m_backtrackingState
.append(branchTest32(Zero
, countRegister
));
916 sub32(TrustedImm32(1), countRegister
);
917 sub32(TrustedImm32(1), index
);
921 void generatePatternCharacterNonGreedy(size_t opIndex
)
923 YarrOp
& op
= m_ops
[opIndex
];
924 PatternTerm
* term
= op
.m_term
;
926 const RegisterID countRegister
= regT1
;
928 move(TrustedImm32(0), countRegister
);
929 op
.m_reentry
= label();
930 storeToFrame(countRegister
, term
->frameLocation
);
932 void backtrackPatternCharacterNonGreedy(size_t opIndex
)
934 YarrOp
& op
= m_ops
[opIndex
];
935 PatternTerm
* term
= op
.m_term
;
936 UChar ch
= term
->patternCharacter
;
938 const RegisterID character
= regT0
;
939 const RegisterID countRegister
= regT1
;
941 m_backtrackingState
.link(this);
943 loadFromFrame(term
->frameLocation
, countRegister
);
945 // Unless have a 16 bit pattern character and an 8 bit string - short circuit
946 if (!((ch
> 0xff) && (m_charSize
== Char8
))) {
947 JumpList nonGreedyFailures
;
948 nonGreedyFailures
.append(atEndOfInput());
949 if (term
->quantityCount
!= quantifyInfinite
)
950 nonGreedyFailures
.append(branch32(Equal
, countRegister
, Imm32(term
->quantityCount
.unsafeGet())));
951 nonGreedyFailures
.append(jumpIfCharNotEquals(ch
, term
->inputPosition
- m_checked
, character
));
953 add32(TrustedImm32(1), countRegister
);
954 add32(TrustedImm32(1), index
);
957 nonGreedyFailures
.link(this);
960 sub32(countRegister
, index
);
961 m_backtrackingState
.fallthrough();
964 void generateCharacterClassOnce(size_t opIndex
)
966 YarrOp
& op
= m_ops
[opIndex
];
967 PatternTerm
* term
= op
.m_term
;
969 const RegisterID character
= regT0
;
972 readCharacter(term
->inputPosition
- m_checked
, character
);
973 matchCharacterClass(character
, matchDest
, term
->characterClass
);
976 op
.m_jumps
.append(matchDest
);
978 op
.m_jumps
.append(jump());
979 matchDest
.link(this);
982 void backtrackCharacterClassOnce(size_t opIndex
)
984 backtrackTermDefault(opIndex
);
987 void generateCharacterClassFixed(size_t opIndex
)
989 YarrOp
& op
= m_ops
[opIndex
];
990 PatternTerm
* term
= op
.m_term
;
992 const RegisterID character
= regT0
;
993 const RegisterID countRegister
= regT1
;
995 move(index
, countRegister
);
996 sub32(Imm32(term
->quantityCount
.unsafeGet()), countRegister
);
1000 if (m_charSize
== Char8
)
1001 load8(BaseIndex(input
, countRegister
, TimesOne
, (Checked
<int>(term
->inputPosition
- m_checked
+ Checked
<int64_t>(term
->quantityCount
)) * static_cast<int>(sizeof(char))).unsafeGet()), character
);
1003 load16(BaseIndex(input
, countRegister
, TimesTwo
, (Checked
<int>(term
->inputPosition
- m_checked
+ Checked
<int64_t>(term
->quantityCount
)) * static_cast<int>(sizeof(UChar
))).unsafeGet()), character
);
1004 matchCharacterClass(character
, matchDest
, term
->characterClass
);
1007 op
.m_jumps
.append(matchDest
);
1009 op
.m_jumps
.append(jump());
1010 matchDest
.link(this);
1013 add32(TrustedImm32(1), countRegister
);
1014 branch32(NotEqual
, countRegister
, index
).linkTo(loop
, this);
1016 void backtrackCharacterClassFixed(size_t opIndex
)
1018 backtrackTermDefault(opIndex
);
1021 void generateCharacterClassGreedy(size_t opIndex
)
1023 YarrOp
& op
= m_ops
[opIndex
];
1024 PatternTerm
* term
= op
.m_term
;
1026 const RegisterID character
= regT0
;
1027 const RegisterID countRegister
= regT1
;
1029 move(TrustedImm32(0), countRegister
);
1033 failures
.append(atEndOfInput());
1035 if (term
->invert()) {
1036 readCharacter(term
->inputPosition
- m_checked
, character
);
1037 matchCharacterClass(character
, failures
, term
->characterClass
);
1040 readCharacter(term
->inputPosition
- m_checked
, character
);
1041 matchCharacterClass(character
, matchDest
, term
->characterClass
);
1042 failures
.append(jump());
1043 matchDest
.link(this);
1046 add32(TrustedImm32(1), countRegister
);
1047 add32(TrustedImm32(1), index
);
1048 if (term
->quantityCount
!= quantifyInfinite
) {
1049 branch32(NotEqual
, countRegister
, Imm32(term
->quantityCount
.unsafeGet())).linkTo(loop
, this);
1050 failures
.append(jump());
1054 failures
.link(this);
1055 op
.m_reentry
= label();
1057 storeToFrame(countRegister
, term
->frameLocation
);
1059 void backtrackCharacterClassGreedy(size_t opIndex
)
1061 YarrOp
& op
= m_ops
[opIndex
];
1062 PatternTerm
* term
= op
.m_term
;
1064 const RegisterID countRegister
= regT1
;
1066 m_backtrackingState
.link(this);
1068 loadFromFrame(term
->frameLocation
, countRegister
);
1069 m_backtrackingState
.append(branchTest32(Zero
, countRegister
));
1070 sub32(TrustedImm32(1), countRegister
);
1071 sub32(TrustedImm32(1), index
);
1075 void generateCharacterClassNonGreedy(size_t opIndex
)
1077 YarrOp
& op
= m_ops
[opIndex
];
1078 PatternTerm
* term
= op
.m_term
;
1080 const RegisterID countRegister
= regT1
;
1082 move(TrustedImm32(0), countRegister
);
1083 op
.m_reentry
= label();
1084 storeToFrame(countRegister
, term
->frameLocation
);
1086 void backtrackCharacterClassNonGreedy(size_t opIndex
)
1088 YarrOp
& op
= m_ops
[opIndex
];
1089 PatternTerm
* term
= op
.m_term
;
1091 const RegisterID character
= regT0
;
1092 const RegisterID countRegister
= regT1
;
1094 JumpList nonGreedyFailures
;
1096 m_backtrackingState
.link(this);
1098 loadFromFrame(term
->frameLocation
, countRegister
);
1100 nonGreedyFailures
.append(atEndOfInput());
1101 nonGreedyFailures
.append(branch32(Equal
, countRegister
, Imm32(term
->quantityCount
.unsafeGet())));
1104 readCharacter(term
->inputPosition
- m_checked
, character
);
1105 matchCharacterClass(character
, matchDest
, term
->characterClass
);
1108 nonGreedyFailures
.append(matchDest
);
1110 nonGreedyFailures
.append(jump());
1111 matchDest
.link(this);
1114 add32(TrustedImm32(1), countRegister
);
1115 add32(TrustedImm32(1), index
);
1119 nonGreedyFailures
.link(this);
1120 sub32(countRegister
, index
);
1121 m_backtrackingState
.fallthrough();
1124 void generateDotStarEnclosure(size_t opIndex
)
1126 YarrOp
& op
= m_ops
[opIndex
];
1127 PatternTerm
* term
= op
.m_term
;
1129 const RegisterID character
= regT0
;
1130 const RegisterID matchPos
= regT1
;
1132 JumpList foundBeginningNewLine
;
1133 JumpList saveStartIndex
;
1134 JumpList foundEndingNewLine
;
1136 ASSERT(!m_pattern
.m_body
->m_hasFixedSize
);
1137 getMatchStart(matchPos
);
1139 saveStartIndex
.append(branchTest32(Zero
, matchPos
));
1140 Label
findBOLLoop(this);
1141 sub32(TrustedImm32(1), matchPos
);
1142 if (m_charSize
== Char8
)
1143 load8(BaseIndex(input
, matchPos
, TimesOne
, 0), character
);
1145 load16(BaseIndex(input
, matchPos
, TimesTwo
, 0), character
);
1146 matchCharacterClass(character
, foundBeginningNewLine
, m_pattern
.newlineCharacterClass());
1147 branchTest32(NonZero
, matchPos
).linkTo(findBOLLoop
, this);
1148 saveStartIndex
.append(jump());
1150 foundBeginningNewLine
.link(this);
1151 add32(TrustedImm32(1), matchPos
); // Advance past newline
1152 saveStartIndex
.link(this);
1154 if (!m_pattern
.m_multiline
&& term
->anchors
.bolAnchor
)
1155 op
.m_jumps
.append(branchTest32(NonZero
, matchPos
));
1157 ASSERT(!m_pattern
.m_body
->m_hasFixedSize
);
1158 setMatchStart(matchPos
);
1160 move(index
, matchPos
);
1162 Label
findEOLLoop(this);
1163 foundEndingNewLine
.append(branch32(Equal
, matchPos
, length
));
1164 if (m_charSize
== Char8
)
1165 load8(BaseIndex(input
, matchPos
, TimesOne
, 0), character
);
1167 load16(BaseIndex(input
, matchPos
, TimesTwo
, 0), character
);
1168 matchCharacterClass(character
, foundEndingNewLine
, m_pattern
.newlineCharacterClass());
1169 add32(TrustedImm32(1), matchPos
);
1172 foundEndingNewLine
.link(this);
1174 if (!m_pattern
.m_multiline
&& term
->anchors
.eolAnchor
)
1175 op
.m_jumps
.append(branch32(NotEqual
, matchPos
, length
));
1177 move(matchPos
, index
);
1180 void backtrackDotStarEnclosure(size_t opIndex
)
1182 backtrackTermDefault(opIndex
);
1185 // Code generation/backtracking for simple terms
1186 // (pattern characters, character classes, and assertions).
1187 // These methods farm out work to the set of functions above.
1188 void generateTerm(size_t opIndex
)
1190 YarrOp
& op
= m_ops
[opIndex
];
1191 PatternTerm
* term
= op
.m_term
;
1193 switch (term
->type
) {
1194 case PatternTerm::TypePatternCharacter
:
1195 switch (term
->quantityType
) {
1196 case QuantifierFixedCount
:
1197 if (term
->quantityCount
== 1)
1198 generatePatternCharacterOnce(opIndex
);
1200 generatePatternCharacterFixed(opIndex
);
1202 case QuantifierGreedy
:
1203 generatePatternCharacterGreedy(opIndex
);
1205 case QuantifierNonGreedy
:
1206 generatePatternCharacterNonGreedy(opIndex
);
1211 case PatternTerm::TypeCharacterClass
:
1212 switch (term
->quantityType
) {
1213 case QuantifierFixedCount
:
1214 if (term
->quantityCount
== 1)
1215 generateCharacterClassOnce(opIndex
);
1217 generateCharacterClassFixed(opIndex
);
1219 case QuantifierGreedy
:
1220 generateCharacterClassGreedy(opIndex
);
1222 case QuantifierNonGreedy
:
1223 generateCharacterClassNonGreedy(opIndex
);
1228 case PatternTerm::TypeAssertionBOL
:
1229 generateAssertionBOL(opIndex
);
1232 case PatternTerm::TypeAssertionEOL
:
1233 generateAssertionEOL(opIndex
);
1236 case PatternTerm::TypeAssertionWordBoundary
:
1237 generateAssertionWordBoundary(opIndex
);
1240 case PatternTerm::TypeForwardReference
:
1243 case PatternTerm::TypeParenthesesSubpattern
:
1244 case PatternTerm::TypeParentheticalAssertion
:
1245 ASSERT_NOT_REACHED();
1246 case PatternTerm::TypeBackReference
:
1247 m_shouldFallBack
= true;
1249 case PatternTerm::TypeDotStarEnclosure
:
1250 generateDotStarEnclosure(opIndex
);
1254 void backtrackTerm(size_t opIndex
)
1256 YarrOp
& op
= m_ops
[opIndex
];
1257 PatternTerm
* term
= op
.m_term
;
1259 switch (term
->type
) {
1260 case PatternTerm::TypePatternCharacter
:
1261 switch (term
->quantityType
) {
1262 case QuantifierFixedCount
:
1263 if (term
->quantityCount
== 1)
1264 backtrackPatternCharacterOnce(opIndex
);
1266 backtrackPatternCharacterFixed(opIndex
);
1268 case QuantifierGreedy
:
1269 backtrackPatternCharacterGreedy(opIndex
);
1271 case QuantifierNonGreedy
:
1272 backtrackPatternCharacterNonGreedy(opIndex
);
1277 case PatternTerm::TypeCharacterClass
:
1278 switch (term
->quantityType
) {
1279 case QuantifierFixedCount
:
1280 if (term
->quantityCount
== 1)
1281 backtrackCharacterClassOnce(opIndex
);
1283 backtrackCharacterClassFixed(opIndex
);
1285 case QuantifierGreedy
:
1286 backtrackCharacterClassGreedy(opIndex
);
1288 case QuantifierNonGreedy
:
1289 backtrackCharacterClassNonGreedy(opIndex
);
1294 case PatternTerm::TypeAssertionBOL
:
1295 backtrackAssertionBOL(opIndex
);
1298 case PatternTerm::TypeAssertionEOL
:
1299 backtrackAssertionEOL(opIndex
);
1302 case PatternTerm::TypeAssertionWordBoundary
:
1303 backtrackAssertionWordBoundary(opIndex
);
1306 case PatternTerm::TypeForwardReference
:
1309 case PatternTerm::TypeParenthesesSubpattern
:
1310 case PatternTerm::TypeParentheticalAssertion
:
1311 ASSERT_NOT_REACHED();
1313 case PatternTerm::TypeDotStarEnclosure
:
1314 backtrackDotStarEnclosure(opIndex
);
1317 case PatternTerm::TypeBackReference
:
1318 m_shouldFallBack
= true;
1325 // Forwards generate the matching code.
1326 ASSERT(m_ops
.size());
1330 YarrOp
& op
= m_ops
[opIndex
];
1334 generateTerm(opIndex
);
1337 // OpBodyAlternativeBegin/Next/End
1339 // These nodes wrap the set of alternatives in the body of the regular expression.
1340 // There may be either one or two chains of OpBodyAlternative nodes, one representing
1341 // the 'once through' sequence of alternatives (if any exist), and one representing
1342 // the repeating alternatives (again, if any exist).
1344 // Upon normal entry to the Begin alternative, we will check that input is available.
1345 // Reentry to the Begin alternative will take place after the check has taken place,
1346 // and will assume that the input position has already been progressed as appropriate.
1348 // Entry to subsequent Next/End alternatives occurs when the prior alternative has
1349 // successfully completed a match - return a success state from JIT code.
1351 // Next alternatives allow for reentry optimized to suit backtracking from its
1352 // preceding alternative. It expects the input position to still be set to a position
1353 // appropriate to its predecessor, and it will only perform an input check if the
1354 // predecessor had a minimum size less than its own.
1356 // In the case 'once through' expressions, the End node will also have a reentry
1357 // point to jump to when the last alternative fails. Again, this expects the input
1358 // position to still reflect that expected by the prior alternative.
1359 case OpBodyAlternativeBegin
: {
1360 PatternAlternative
* alternative
= op
.m_alternative
;
1362 // Upon entry at the head of the set of alternatives, check if input is available
1363 // to run the first alternative. (This progresses the input position).
1364 op
.m_jumps
.append(jumpIfNoAvailableInput(alternative
->m_minimumSize
));
1365 // We will reenter after the check, and assume the input position to have been
1366 // set as appropriate to this alternative.
1367 op
.m_reentry
= label();
1369 m_checked
+= alternative
->m_minimumSize
;
1372 case OpBodyAlternativeNext
:
1373 case OpBodyAlternativeEnd
: {
1374 PatternAlternative
* priorAlternative
= m_ops
[op
.m_previousOp
].m_alternative
;
1375 PatternAlternative
* alternative
= op
.m_alternative
;
1377 // If we get here, the prior alternative matched - return success.
1379 // Adjust the stack pointer to remove the pattern's frame.
1382 // Load appropriate values into the return register and the first output
1383 // slot, and return. In the case of pattern with a fixed size, we will
1384 // not have yet set the value in the first
1385 ASSERT(index
!= returnRegister
);
1386 if (m_pattern
.m_body
->m_hasFixedSize
) {
1387 move(index
, returnRegister
);
1388 if (priorAlternative
->m_minimumSize
)
1389 sub32(Imm32(priorAlternative
->m_minimumSize
), returnRegister
);
1390 if (compileMode
== IncludeSubpatterns
)
1391 store32(returnRegister
, output
);
1393 getMatchStart(returnRegister
);
1394 if (compileMode
== IncludeSubpatterns
)
1395 store32(index
, Address(output
, 4));
1396 move(index
, returnRegister2
);
1400 // This is the divide between the tail of the prior alternative, above, and
1401 // the head of the subsequent alternative, below.
1403 if (op
.m_op
== OpBodyAlternativeNext
) {
1404 // This is the reentry point for the Next alternative. We expect any code
1405 // that jumps here to do so with the input position matching that of the
1406 // PRIOR alteranative, and we will only check input availability if we
1407 // need to progress it forwards.
1408 op
.m_reentry
= label();
1409 if (alternative
->m_minimumSize
> priorAlternative
->m_minimumSize
) {
1410 add32(Imm32(alternative
->m_minimumSize
- priorAlternative
->m_minimumSize
), index
);
1411 op
.m_jumps
.append(jumpIfNoAvailableInput());
1412 } else if (priorAlternative
->m_minimumSize
> alternative
->m_minimumSize
)
1413 sub32(Imm32(priorAlternative
->m_minimumSize
- alternative
->m_minimumSize
), index
);
1414 } else if (op
.m_nextOp
== notFound
) {
1415 // This is the reentry point for the End of 'once through' alternatives,
1416 // jumped to when the last alternative fails to match.
1417 op
.m_reentry
= label();
1418 sub32(Imm32(priorAlternative
->m_minimumSize
), index
);
1421 if (op
.m_op
== OpBodyAlternativeNext
)
1422 m_checked
+= alternative
->m_minimumSize
;
1423 m_checked
-= priorAlternative
->m_minimumSize
;
1427 // OpSimpleNestedAlternativeBegin/Next/End
1428 // OpNestedAlternativeBegin/Next/End
1430 // These nodes are used to handle sets of alternatives that are nested within
1431 // subpatterns and parenthetical assertions. The 'simple' forms are used where
1432 // we do not need to be able to backtrack back into any alternative other than
1433 // the last, the normal forms allow backtracking into any alternative.
1435 // Each Begin/Next node is responsible for planting an input check to ensure
1436 // sufficient input is available on entry. Next nodes additionally need to
1437 // jump to the end - Next nodes use the End node's m_jumps list to hold this
1440 // In the non-simple forms, successful alternative matches must store a
1441 // 'return address' using a DataLabelPtr, used to store the address to jump
1442 // to when backtracking, to get to the code for the appropriate alternative.
1443 case OpSimpleNestedAlternativeBegin
:
1444 case OpNestedAlternativeBegin
: {
1445 PatternTerm
* term
= op
.m_term
;
1446 PatternAlternative
* alternative
= op
.m_alternative
;
1447 PatternDisjunction
* disjunction
= term
->parentheses
.disjunction
;
1449 // Calculate how much input we need to check for, and if non-zero check.
1450 op
.m_checkAdjust
= alternative
->m_minimumSize
;
1451 if ((term
->quantityType
== QuantifierFixedCount
) && (term
->type
!= PatternTerm::TypeParentheticalAssertion
))
1452 op
.m_checkAdjust
-= disjunction
->m_minimumSize
;
1453 if (op
.m_checkAdjust
)
1454 op
.m_jumps
.append(jumpIfNoAvailableInput(op
.m_checkAdjust
));
1456 m_checked
+= op
.m_checkAdjust
;
1459 case OpSimpleNestedAlternativeNext
:
1460 case OpNestedAlternativeNext
: {
1461 PatternTerm
* term
= op
.m_term
;
1462 PatternAlternative
* alternative
= op
.m_alternative
;
1463 PatternDisjunction
* disjunction
= term
->parentheses
.disjunction
;
1465 // In the non-simple case, store a 'return address' so we can backtrack correctly.
1466 if (op
.m_op
== OpNestedAlternativeNext
) {
1467 unsigned parenthesesFrameLocation
= term
->frameLocation
;
1468 unsigned alternativeFrameLocation
= parenthesesFrameLocation
;
1469 if (term
->quantityType
!= QuantifierFixedCount
)
1470 alternativeFrameLocation
+= YarrStackSpaceForBackTrackInfoParenthesesOnce
;
1471 op
.m_returnAddress
= storeToFrameWithPatch(alternativeFrameLocation
);
1474 if (term
->quantityType
!= QuantifierFixedCount
&& !m_ops
[op
.m_previousOp
].m_alternative
->m_minimumSize
) {
1475 // If the previous alternative matched without consuming characters then
1476 // backtrack to try to match while consumming some input.
1477 op
.m_zeroLengthMatch
= branch32(Equal
, index
, Address(stackPointerRegister
, term
->frameLocation
* sizeof(void*)));
1480 // If we reach here then the last alternative has matched - jump to the
1481 // End node, to skip over any further alternatives.
1483 // FIXME: this is logically O(N^2) (though N can be expected to be very
1484 // small). We could avoid this either by adding an extra jump to the JIT
1485 // data structures, or by making backtracking code that jumps to Next
1486 // alternatives are responsible for checking that input is available (if
1487 // we didn't need to plant the input checks, then m_jumps would be free).
1488 YarrOp
* endOp
= &m_ops
[op
.m_nextOp
];
1489 while (endOp
->m_nextOp
!= notFound
) {
1490 ASSERT(endOp
->m_op
== OpSimpleNestedAlternativeNext
|| endOp
->m_op
== OpNestedAlternativeNext
);
1491 endOp
= &m_ops
[endOp
->m_nextOp
];
1493 ASSERT(endOp
->m_op
== OpSimpleNestedAlternativeEnd
|| endOp
->m_op
== OpNestedAlternativeEnd
);
1494 endOp
->m_jumps
.append(jump());
1496 // This is the entry point for the next alternative.
1497 op
.m_reentry
= label();
1499 // Calculate how much input we need to check for, and if non-zero check.
1500 op
.m_checkAdjust
= alternative
->m_minimumSize
;
1501 if ((term
->quantityType
== QuantifierFixedCount
) && (term
->type
!= PatternTerm::TypeParentheticalAssertion
))
1502 op
.m_checkAdjust
-= disjunction
->m_minimumSize
;
1503 if (op
.m_checkAdjust
)
1504 op
.m_jumps
.append(jumpIfNoAvailableInput(op
.m_checkAdjust
));
1506 YarrOp
& lastOp
= m_ops
[op
.m_previousOp
];
1507 m_checked
-= lastOp
.m_checkAdjust
;
1508 m_checked
+= op
.m_checkAdjust
;
1511 case OpSimpleNestedAlternativeEnd
:
1512 case OpNestedAlternativeEnd
: {
1513 PatternTerm
* term
= op
.m_term
;
1515 // In the non-simple case, store a 'return address' so we can backtrack correctly.
1516 if (op
.m_op
== OpNestedAlternativeEnd
) {
1517 unsigned parenthesesFrameLocation
= term
->frameLocation
;
1518 unsigned alternativeFrameLocation
= parenthesesFrameLocation
;
1519 if (term
->quantityType
!= QuantifierFixedCount
)
1520 alternativeFrameLocation
+= YarrStackSpaceForBackTrackInfoParenthesesOnce
;
1521 op
.m_returnAddress
= storeToFrameWithPatch(alternativeFrameLocation
);
1524 if (term
->quantityType
!= QuantifierFixedCount
&& !m_ops
[op
.m_previousOp
].m_alternative
->m_minimumSize
) {
1525 // If the previous alternative matched without consuming characters then
1526 // backtrack to try to match while consumming some input.
1527 op
.m_zeroLengthMatch
= branch32(Equal
, index
, Address(stackPointerRegister
, term
->frameLocation
* sizeof(void*)));
1530 // If this set of alternatives contains more than one alternative,
1531 // then the Next nodes will have planted jumps to the End, and added
1532 // them to this node's m_jumps list.
1533 op
.m_jumps
.link(this);
1536 YarrOp
& lastOp
= m_ops
[op
.m_previousOp
];
1537 m_checked
-= lastOp
.m_checkAdjust
;
1541 // OpParenthesesSubpatternOnceBegin/End
1543 // These nodes support (optionally) capturing subpatterns, that have a
1544 // quantity count of 1 (this covers fixed once, and ?/?? quantifiers).
1545 case OpParenthesesSubpatternOnceBegin
: {
1546 PatternTerm
* term
= op
.m_term
;
1547 unsigned parenthesesFrameLocation
= term
->frameLocation
;
1548 const RegisterID indexTemporary
= regT0
;
1549 ASSERT(term
->quantityCount
== 1);
1551 // Upon entry to a Greedy quantified set of parenthese store the index.
1552 // We'll use this for two purposes:
1553 // - To indicate which iteration we are on of mathing the remainder of
1554 // the expression after the parentheses - the first, including the
1555 // match within the parentheses, or the second having skipped over them.
1556 // - To check for empty matches, which must be rejected.
1558 // At the head of a NonGreedy set of parentheses we'll immediately set the
1559 // value on the stack to -1 (indicating a match skipping the subpattern),
1560 // and plant a jump to the end. We'll also plant a label to backtrack to
1561 // to reenter the subpattern later, with a store to set up index on the
1562 // second iteration.
1564 // FIXME: for capturing parens, could use the index in the capture array?
1565 if (term
->quantityType
== QuantifierGreedy
)
1566 storeToFrame(index
, parenthesesFrameLocation
);
1567 else if (term
->quantityType
== QuantifierNonGreedy
) {
1568 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation
);
1569 op
.m_jumps
.append(jump());
1570 op
.m_reentry
= label();
1571 storeToFrame(index
, parenthesesFrameLocation
);
1574 // If the parenthese are capturing, store the starting index value to the
1575 // captures array, offsetting as necessary.
1577 // FIXME: could avoid offsetting this value in JIT code, apply
1578 // offsets only afterwards, at the point the results array is
1580 if (term
->capture() && compileMode
== IncludeSubpatterns
) {
1581 int inputOffset
= term
->inputPosition
- m_checked
;
1582 if (term
->quantityType
== QuantifierFixedCount
)
1583 inputOffset
-= term
->parentheses
.disjunction
->m_minimumSize
;
1585 move(index
, indexTemporary
);
1586 add32(Imm32(inputOffset
), indexTemporary
);
1587 setSubpatternStart(indexTemporary
, term
->parentheses
.subpatternId
);
1589 setSubpatternStart(index
, term
->parentheses
.subpatternId
);
1593 case OpParenthesesSubpatternOnceEnd
: {
1594 PatternTerm
* term
= op
.m_term
;
1595 const RegisterID indexTemporary
= regT0
;
1596 ASSERT(term
->quantityCount
== 1);
1599 // Runtime ASSERT to make sure that the nested alternative handled the
1600 // "no input consumed" check.
1601 if (term
->quantityType
!= QuantifierFixedCount
&& !term
->parentheses
.disjunction
->m_minimumSize
) {
1602 Jump pastBreakpoint
;
1603 pastBreakpoint
= branch32(NotEqual
, index
, Address(stackPointerRegister
, term
->frameLocation
* sizeof(void*)));
1605 pastBreakpoint
.link(this);
1609 // If the parenthese are capturing, store the ending index value to the
1610 // captures array, offsetting as necessary.
1612 // FIXME: could avoid offsetting this value in JIT code, apply
1613 // offsets only afterwards, at the point the results array is
1615 if (term
->capture() && compileMode
== IncludeSubpatterns
) {
1616 int inputOffset
= term
->inputPosition
- m_checked
;
1618 move(index
, indexTemporary
);
1619 add32(Imm32(inputOffset
), indexTemporary
);
1620 setSubpatternEnd(indexTemporary
, term
->parentheses
.subpatternId
);
1622 setSubpatternEnd(index
, term
->parentheses
.subpatternId
);
1625 // If the parentheses are quantified Greedy then add a label to jump back
1626 // to if get a failed match from after the parentheses. For NonGreedy
1627 // parentheses, link the jump from before the subpattern to here.
1628 if (term
->quantityType
== QuantifierGreedy
)
1629 op
.m_reentry
= label();
1630 else if (term
->quantityType
== QuantifierNonGreedy
) {
1631 YarrOp
& beginOp
= m_ops
[op
.m_previousOp
];
1632 beginOp
.m_jumps
.link(this);
1637 // OpParenthesesSubpatternTerminalBegin/End
1638 case OpParenthesesSubpatternTerminalBegin
: {
1639 PatternTerm
* term
= op
.m_term
;
1640 ASSERT(term
->quantityType
== QuantifierGreedy
);
1641 ASSERT(term
->quantityCount
== quantifyInfinite
);
1642 ASSERT(!term
->capture());
1644 // Upon entry set a label to loop back to.
1645 op
.m_reentry
= label();
1647 // Store the start index of the current match; we need to reject zero
1649 storeToFrame(index
, term
->frameLocation
);
1652 case OpParenthesesSubpatternTerminalEnd
: {
1653 YarrOp
& beginOp
= m_ops
[op
.m_previousOp
];
1655 PatternTerm
* term
= op
.m_term
;
1657 // Runtime ASSERT to make sure that the nested alternative handled the
1658 // "no input consumed" check.
1659 Jump pastBreakpoint
;
1660 pastBreakpoint
= branch32(NotEqual
, index
, Address(stackPointerRegister
, term
->frameLocation
* sizeof(void*)));
1662 pastBreakpoint
.link(this);
1665 // We know that the match is non-zero, we can accept it and
1666 // loop back up to the head of the subpattern.
1667 jump(beginOp
.m_reentry
);
1669 // This is the entry point to jump to when we stop matching - we will
1670 // do so once the subpattern cannot match any more.
1671 op
.m_reentry
= label();
1675 // OpParentheticalAssertionBegin/End
1676 case OpParentheticalAssertionBegin
: {
1677 PatternTerm
* term
= op
.m_term
;
1679 // Store the current index - assertions should not update index, so
1680 // we will need to restore it upon a successful match.
1681 unsigned parenthesesFrameLocation
= term
->frameLocation
;
1682 storeToFrame(index
, parenthesesFrameLocation
);
1685 op
.m_checkAdjust
= m_checked
- term
->inputPosition
;
1686 if (op
.m_checkAdjust
)
1687 sub32(Imm32(op
.m_checkAdjust
), index
);
1689 m_checked
-= op
.m_checkAdjust
;
1692 case OpParentheticalAssertionEnd
: {
1693 PatternTerm
* term
= op
.m_term
;
1695 // Restore the input index value.
1696 unsigned parenthesesFrameLocation
= term
->frameLocation
;
1697 loadFromFrame(parenthesesFrameLocation
, index
);
1699 // If inverted, a successful match of the assertion must be treated
1700 // as a failure, so jump to backtracking.
1701 if (term
->invert()) {
1702 op
.m_jumps
.append(jump());
1703 op
.m_reentry
= label();
1706 YarrOp
& lastOp
= m_ops
[op
.m_previousOp
];
1707 m_checked
+= lastOp
.m_checkAdjust
;
1713 move(TrustedImmPtr((void*)WTF::notFound
), returnRegister
);
1714 move(TrustedImm32(0), returnRegister2
);
1720 } while (opIndex
< m_ops
.size());
1725 // Backwards generate the backtracking code.
1726 size_t opIndex
= m_ops
.size();
1731 YarrOp
& op
= m_ops
[opIndex
];
1735 backtrackTerm(opIndex
);
1738 // OpBodyAlternativeBegin/Next/End
1740 // For each Begin/Next node representing an alternative, we need to decide what to do
1741 // in two circumstances:
1742 // - If we backtrack back into this node, from within the alternative.
1743 // - If the input check at the head of the alternative fails (if this exists).
1745 // We treat these two cases differently since in the former case we have slightly
1746 // more information - since we are backtracking out of a prior alternative we know
1747 // that at least enough input was available to run it. For example, given the regular
1748 // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
1749 // character match of 'a'), then we need not perform an additional input availability
1750 // check before running the second alternative.
1752 // Backtracking required differs for the last alternative, which in the case of the
1753 // repeating set of alternatives must loop. The code generated for the last alternative
1754 // will also be used to handle all input check failures from any prior alternatives -
1755 // these require similar functionality, in seeking the next available alternative for
1756 // which there is sufficient input.
1758 // Since backtracking of all other alternatives simply requires us to link backtracks
1759 // to the reentry point for the subsequent alternative, we will only be generating any
1760 // code when backtracking the last alternative.
1761 case OpBodyAlternativeBegin
:
1762 case OpBodyAlternativeNext
: {
1763 PatternAlternative
* alternative
= op
.m_alternative
;
1765 if (op
.m_op
== OpBodyAlternativeNext
) {
1766 PatternAlternative
* priorAlternative
= m_ops
[op
.m_previousOp
].m_alternative
;
1767 m_checked
+= priorAlternative
->m_minimumSize
;
1769 m_checked
-= alternative
->m_minimumSize
;
1771 // Is this the last alternative? If not, then if we backtrack to this point we just
1772 // need to jump to try to match the next alternative.
1773 if (m_ops
[op
.m_nextOp
].m_op
!= OpBodyAlternativeEnd
) {
1774 m_backtrackingState
.linkTo(m_ops
[op
.m_nextOp
].m_reentry
, this);
1777 YarrOp
& endOp
= m_ops
[op
.m_nextOp
];
1779 YarrOp
* beginOp
= &op
;
1780 while (beginOp
->m_op
!= OpBodyAlternativeBegin
) {
1781 ASSERT(beginOp
->m_op
== OpBodyAlternativeNext
);
1782 beginOp
= &m_ops
[beginOp
->m_previousOp
];
1785 bool onceThrough
= endOp
.m_nextOp
== notFound
;
1787 // First, generate code to handle cases where we backtrack out of an attempted match
1788 // of the last alternative. If this is a 'once through' set of alternatives then we
1789 // have nothing to do - link this straight through to the End.
1791 m_backtrackingState
.linkTo(endOp
.m_reentry
, this);
1793 // If we don't need to move the input poistion, and the pattern has a fixed size
1794 // (in which case we omit the store of the start index until the pattern has matched)
1795 // then we can just link the backtrack out of the last alternative straight to the
1796 // head of the first alternative.
1797 if (m_pattern
.m_body
->m_hasFixedSize
1798 && (alternative
->m_minimumSize
> beginOp
->m_alternative
->m_minimumSize
)
1799 && (alternative
->m_minimumSize
- beginOp
->m_alternative
->m_minimumSize
== 1))
1800 m_backtrackingState
.linkTo(beginOp
->m_reentry
, this);
1802 // We need to generate a trampoline of code to execute before looping back
1803 // around to the first alternative.
1804 m_backtrackingState
.link(this);
1806 // If the pattern size is not fixed, then store the start index, for use if we match.
1807 if (!m_pattern
.m_body
->m_hasFixedSize
) {
1808 if (alternative
->m_minimumSize
== 1)
1809 setMatchStart(index
);
1812 if (alternative
->m_minimumSize
)
1813 sub32(Imm32(alternative
->m_minimumSize
- 1), regT0
);
1815 add32(TrustedImm32(1), regT0
);
1816 setMatchStart(regT0
);
1820 // Generate code to loop. Check whether the last alternative is longer than the
1821 // first (e.g. /a|xy/ or /a|xyz/).
1822 if (alternative
->m_minimumSize
> beginOp
->m_alternative
->m_minimumSize
) {
1823 // We want to loop, and increment input position. If the delta is 1, it is
1824 // already correctly incremented, if more than one then decrement as appropriate.
1825 unsigned delta
= alternative
->m_minimumSize
- beginOp
->m_alternative
->m_minimumSize
;
1828 sub32(Imm32(delta
- 1), index
);
1829 jump(beginOp
->m_reentry
);
1831 // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
1832 // be sufficent input available to handle this, so just fall through.
1833 unsigned delta
= beginOp
->m_alternative
->m_minimumSize
- alternative
->m_minimumSize
;
1834 if (delta
!= 0xFFFFFFFFu
) {
1835 // We need to check input because we are incrementing the input.
1836 add32(Imm32(delta
+ 1), index
);
1837 checkInput().linkTo(beginOp
->m_reentry
, this);
1843 // We can reach this point in the code in two ways:
1844 // - Fallthrough from the code above (a repeating alternative backtracked out of its
1845 // last alternative, and did not have sufficent input to run the first).
1846 // - We will loop back up to the following label when a releating alternative loops,
1847 // following a failed input check.
1849 // Either way, we have just failed the input check for the first alternative.
1850 Label
firstInputCheckFailed(this);
1852 // Generate code to handle input check failures from alternatives except the last.
1853 // prevOp is the alternative we're handling a bail out from (initially Begin), and
1854 // nextOp is the alternative we will be attempting to reenter into.
1856 // We will link input check failures from the forwards matching path back to the code
1857 // that can handle them.
1858 YarrOp
* prevOp
= beginOp
;
1859 YarrOp
* nextOp
= &m_ops
[beginOp
->m_nextOp
];
1860 while (nextOp
->m_op
!= OpBodyAlternativeEnd
) {
1861 prevOp
->m_jumps
.link(this);
1863 // We only get here if an input check fails, it is only worth checking again
1864 // if the next alternative has a minimum size less than the last.
1865 if (prevOp
->m_alternative
->m_minimumSize
> nextOp
->m_alternative
->m_minimumSize
) {
1866 // FIXME: if we added an extra label to YarrOp, we could avoid needing to
1867 // subtract delta back out, and reduce this code. Should performance test
1868 // the benefit of this.
1869 unsigned delta
= prevOp
->m_alternative
->m_minimumSize
- nextOp
->m_alternative
->m_minimumSize
;
1870 sub32(Imm32(delta
), index
);
1871 Jump fail
= jumpIfNoAvailableInput();
1872 add32(Imm32(delta
), index
);
1873 jump(nextOp
->m_reentry
);
1875 } else if (prevOp
->m_alternative
->m_minimumSize
< nextOp
->m_alternative
->m_minimumSize
)
1876 add32(Imm32(nextOp
->m_alternative
->m_minimumSize
- prevOp
->m_alternative
->m_minimumSize
), index
);
1878 nextOp
= &m_ops
[nextOp
->m_nextOp
];
1881 // We fall through to here if there is insufficient input to run the last alternative.
1883 // If there is insufficient input to run the last alternative, then for 'once through'
1884 // alternatives we are done - just jump back up into the forwards matching path at the End.
1886 op
.m_jumps
.linkTo(endOp
.m_reentry
, this);
1887 jump(endOp
.m_reentry
);
1891 // For repeating alternatives, link any input check failure from the last alternative to
1893 op
.m_jumps
.link(this);
1895 bool needsToUpdateMatchStart
= !m_pattern
.m_body
->m_hasFixedSize
;
1897 // Check for cases where input position is already incremented by 1 for the last
1898 // alternative (this is particularly useful where the minimum size of the body
1899 // disjunction is 0, e.g. /a*|b/).
1900 if (needsToUpdateMatchStart
&& alternative
->m_minimumSize
== 1) {
1901 // index is already incremented by 1, so just store it now!
1902 setMatchStart(index
);
1903 needsToUpdateMatchStart
= false;
1906 // Check whether there is sufficient input to loop. Increment the input position by
1907 // one, and check. Also add in the minimum disjunction size before checking - there
1908 // is no point in looping if we're just going to fail all the input checks around
1909 // the next iteration.
1910 ASSERT(alternative
->m_minimumSize
>= m_pattern
.m_body
->m_minimumSize
);
1911 if (alternative
->m_minimumSize
== m_pattern
.m_body
->m_minimumSize
) {
1912 // If the last alternative had the same minimum size as the disjunction,
1913 // just simply increment input pos by 1, no adjustment based on minimum size.
1914 add32(TrustedImm32(1), index
);
1916 // If the minumum for the last alternative was one greater than than that
1917 // for the disjunction, we're already progressed by 1, nothing to do!
1918 unsigned delta
= (alternative
->m_minimumSize
- m_pattern
.m_body
->m_minimumSize
) - 1;
1920 sub32(Imm32(delta
), index
);
1922 Jump matchFailed
= jumpIfNoAvailableInput();
1924 if (needsToUpdateMatchStart
) {
1925 if (!m_pattern
.m_body
->m_minimumSize
)
1926 setMatchStart(index
);
1929 sub32(Imm32(m_pattern
.m_body
->m_minimumSize
), regT0
);
1930 setMatchStart(regT0
);
1934 // Calculate how much more input the first alternative requires than the minimum
1935 // for the body as a whole. If no more is needed then we dont need an additional
1936 // input check here - jump straight back up to the start of the first alternative.
1937 if (beginOp
->m_alternative
->m_minimumSize
== m_pattern
.m_body
->m_minimumSize
)
1938 jump(beginOp
->m_reentry
);
1940 if (beginOp
->m_alternative
->m_minimumSize
> m_pattern
.m_body
->m_minimumSize
)
1941 add32(Imm32(beginOp
->m_alternative
->m_minimumSize
- m_pattern
.m_body
->m_minimumSize
), index
);
1943 sub32(Imm32(m_pattern
.m_body
->m_minimumSize
- beginOp
->m_alternative
->m_minimumSize
), index
);
1944 checkInput().linkTo(beginOp
->m_reentry
, this);
1945 jump(firstInputCheckFailed
);
1948 // We jump to here if we iterate to the point that there is insufficient input to
1949 // run any matches, and need to return a failure state from JIT code.
1950 matchFailed
.link(this);
1953 move(TrustedImmPtr((void*)WTF::notFound
), returnRegister
);
1954 move(TrustedImm32(0), returnRegister2
);
1958 case OpBodyAlternativeEnd
: {
1959 // We should never backtrack back into a body disjunction.
1960 ASSERT(m_backtrackingState
.isEmpty());
1962 PatternAlternative
* priorAlternative
= m_ops
[op
.m_previousOp
].m_alternative
;
1963 m_checked
+= priorAlternative
->m_minimumSize
;
1967 // OpSimpleNestedAlternativeBegin/Next/End
1968 // OpNestedAlternativeBegin/Next/End
1970 // Generate code for when we backtrack back out of an alternative into
1971 // a Begin or Next node, or when the entry input count check fails. If
1972 // there are more alternatives we need to jump to the next alternative,
1973 // if not we backtrack back out of the current set of parentheses.
1975 // In the case of non-simple nested assertions we need to also link the
1976 // 'return address' appropriately to backtrack back out into the correct
1978 case OpSimpleNestedAlternativeBegin
:
1979 case OpSimpleNestedAlternativeNext
:
1980 case OpNestedAlternativeBegin
:
1981 case OpNestedAlternativeNext
: {
1982 YarrOp
& nextOp
= m_ops
[op
.m_nextOp
];
1983 bool isBegin
= op
.m_previousOp
== notFound
;
1984 bool isLastAlternative
= nextOp
.m_nextOp
== notFound
;
1985 ASSERT(isBegin
== (op
.m_op
== OpSimpleNestedAlternativeBegin
|| op
.m_op
== OpNestedAlternativeBegin
));
1986 ASSERT(isLastAlternative
== (nextOp
.m_op
== OpSimpleNestedAlternativeEnd
|| nextOp
.m_op
== OpNestedAlternativeEnd
));
1988 // Treat an input check failure the same as a failed match.
1989 m_backtrackingState
.append(op
.m_jumps
);
1991 // Set the backtracks to jump to the appropriate place. We may need
1992 // to link the backtracks in one of three different way depending on
1993 // the type of alternative we are dealing with:
1994 // - A single alternative, with no simplings.
1995 // - The last alternative of a set of two or more.
1996 // - An alternative other than the last of a set of two or more.
1998 // In the case of a single alternative on its own, we don't need to
1999 // jump anywhere - if the alternative fails to match we can just
2000 // continue to backtrack out of the parentheses without jumping.
2002 // In the case of the last alternative in a set of more than one, we
2003 // need to jump to return back out to the beginning. We'll do so by
2004 // adding a jump to the End node's m_jumps list, and linking this
2005 // when we come to generate the Begin node. For alternatives other
2006 // than the last, we need to jump to the next alternative.
2008 // If the alternative had adjusted the input position we must link
2009 // backtracking to here, correct, and then jump on. If not we can
2010 // link the backtracks directly to their destination.
2011 if (op
.m_checkAdjust
) {
2012 // Handle the cases where we need to link the backtracks here.
2013 m_backtrackingState
.link(this);
2014 sub32(Imm32(op
.m_checkAdjust
), index
);
2015 if (!isLastAlternative
) {
2016 // An alternative that is not the last should jump to its successor.
2017 jump(nextOp
.m_reentry
);
2018 } else if (!isBegin
) {
2019 // The last of more than one alternatives must jump back to the beginning.
2020 nextOp
.m_jumps
.append(jump());
2022 // A single alternative on its own can fall through.
2023 m_backtrackingState
.fallthrough();
2026 // Handle the cases where we can link the backtracks directly to their destinations.
2027 if (!isLastAlternative
) {
2028 // An alternative that is not the last should jump to its successor.
2029 m_backtrackingState
.linkTo(nextOp
.m_reentry
, this);
2030 } else if (!isBegin
) {
2031 // The last of more than one alternatives must jump back to the beginning.
2032 m_backtrackingState
.takeBacktracksToJumpList(nextOp
.m_jumps
, this);
2034 // In the case of a single alternative on its own do nothing - it can fall through.
2037 // If there is a backtrack jump from a zero length match link it here.
2038 if (op
.m_zeroLengthMatch
.isSet())
2039 m_backtrackingState
.append(op
.m_zeroLengthMatch
);
2041 // At this point we've handled the backtracking back into this node.
2042 // Now link any backtracks that need to jump to here.
2044 // For non-simple alternatives, link the alternative's 'return address'
2045 // so that we backtrack back out into the previous alternative.
2046 if (op
.m_op
== OpNestedAlternativeNext
)
2047 m_backtrackingState
.append(op
.m_returnAddress
);
2049 // If there is more than one alternative, then the last alternative will
2050 // have planted a jump to be linked to the end. This jump was added to the
2051 // End node's m_jumps list. If we are back at the beginning, link it here.
2053 YarrOp
* endOp
= &m_ops
[op
.m_nextOp
];
2054 while (endOp
->m_nextOp
!= notFound
) {
2055 ASSERT(endOp
->m_op
== OpSimpleNestedAlternativeNext
|| endOp
->m_op
== OpNestedAlternativeNext
);
2056 endOp
= &m_ops
[endOp
->m_nextOp
];
2058 ASSERT(endOp
->m_op
== OpSimpleNestedAlternativeEnd
|| endOp
->m_op
== OpNestedAlternativeEnd
);
2059 m_backtrackingState
.append(endOp
->m_jumps
);
2063 YarrOp
& lastOp
= m_ops
[op
.m_previousOp
];
2064 m_checked
+= lastOp
.m_checkAdjust
;
2066 m_checked
-= op
.m_checkAdjust
;
2069 case OpSimpleNestedAlternativeEnd
:
2070 case OpNestedAlternativeEnd
: {
2071 PatternTerm
* term
= op
.m_term
;
2073 // If there is a backtrack jump from a zero length match link it here.
2074 if (op
.m_zeroLengthMatch
.isSet())
2075 m_backtrackingState
.append(op
.m_zeroLengthMatch
);
2077 // If we backtrack into the end of a simple subpattern do nothing;
2078 // just continue through into the last alternative. If we backtrack
2079 // into the end of a non-simple set of alterntives we need to jump
2080 // to the backtracking return address set up during generation.
2081 if (op
.m_op
== OpNestedAlternativeEnd
) {
2082 m_backtrackingState
.link(this);
2084 // Plant a jump to the return address.
2085 unsigned parenthesesFrameLocation
= term
->frameLocation
;
2086 unsigned alternativeFrameLocation
= parenthesesFrameLocation
;
2087 if (term
->quantityType
!= QuantifierFixedCount
)
2088 alternativeFrameLocation
+= YarrStackSpaceForBackTrackInfoParenthesesOnce
;
2089 loadFromFrameAndJump(alternativeFrameLocation
);
2091 // Link the DataLabelPtr associated with the end of the last
2092 // alternative to this point.
2093 m_backtrackingState
.append(op
.m_returnAddress
);
2096 YarrOp
& lastOp
= m_ops
[op
.m_previousOp
];
2097 m_checked
+= lastOp
.m_checkAdjust
;
2101 // OpParenthesesSubpatternOnceBegin/End
2103 // When we are backtracking back out of a capturing subpattern we need
2104 // to clear the start index in the matches output array, to record that
2105 // this subpattern has not been captured.
2107 // When backtracking back out of a Greedy quantified subpattern we need
2108 // to catch this, and try running the remainder of the alternative after
2109 // the subpattern again, skipping the parentheses.
2111 // Upon backtracking back into a quantified set of parentheses we need to
2112 // check whether we were currently skipping the subpattern. If not, we
2113 // can backtrack into them, if we were we need to either backtrack back
2114 // out of the start of the parentheses, or jump back to the forwards
2115 // matching start, depending of whether the match is Greedy or NonGreedy.
2116 case OpParenthesesSubpatternOnceBegin
: {
2117 PatternTerm
* term
= op
.m_term
;
2118 ASSERT(term
->quantityCount
== 1);
2120 // We only need to backtrack to thispoint if capturing or greedy.
2121 if ((term
->capture() && compileMode
== IncludeSubpatterns
) || term
->quantityType
== QuantifierGreedy
) {
2122 m_backtrackingState
.link(this);
2124 // If capturing, clear the capture (we only need to reset start).
2125 if (term
->capture() && compileMode
== IncludeSubpatterns
)
2126 clearSubpatternStart(term
->parentheses
.subpatternId
);
2128 // If Greedy, jump to the end.
2129 if (term
->quantityType
== QuantifierGreedy
) {
2130 // Clear the flag in the stackframe indicating we ran through the subpattern.
2131 unsigned parenthesesFrameLocation
= term
->frameLocation
;
2132 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation
);
2133 // Jump to after the parentheses, skipping the subpattern.
2134 jump(m_ops
[op
.m_nextOp
].m_reentry
);
2135 // A backtrack from after the parentheses, when skipping the subpattern,
2136 // will jump back to here.
2137 op
.m_jumps
.link(this);
2140 m_backtrackingState
.fallthrough();
2144 case OpParenthesesSubpatternOnceEnd
: {
2145 PatternTerm
* term
= op
.m_term
;
2147 if (term
->quantityType
!= QuantifierFixedCount
) {
2148 m_backtrackingState
.link(this);
2150 // Check whether we should backtrack back into the parentheses, or if we
2151 // are currently in a state where we had skipped over the subpattern
2152 // (in which case the flag value on the stack will be -1).
2153 unsigned parenthesesFrameLocation
= term
->frameLocation
;
2154 Jump hadSkipped
= branch32(Equal
, Address(stackPointerRegister
, parenthesesFrameLocation
* sizeof(void*)), TrustedImm32(-1));
2156 if (term
->quantityType
== QuantifierGreedy
) {
2157 // For Greedy parentheses, we skip after having already tried going
2158 // through the subpattern, so if we get here we're done.
2159 YarrOp
& beginOp
= m_ops
[op
.m_previousOp
];
2160 beginOp
.m_jumps
.append(hadSkipped
);
2162 // For NonGreedy parentheses, we try skipping the subpattern first,
2163 // so if we get here we need to try running through the subpattern
2164 // next. Jump back to the start of the parentheses in the forwards
2166 ASSERT(term
->quantityType
== QuantifierNonGreedy
);
2167 YarrOp
& beginOp
= m_ops
[op
.m_previousOp
];
2168 hadSkipped
.linkTo(beginOp
.m_reentry
, this);
2171 m_backtrackingState
.fallthrough();
2174 m_backtrackingState
.append(op
.m_jumps
);
2178 // OpParenthesesSubpatternTerminalBegin/End
2180 // Terminal subpatterns will always match - there is nothing after them to
2181 // force a backtrack, and they have a minimum count of 0, and as such will
2182 // always produce an acceptable result.
2183 case OpParenthesesSubpatternTerminalBegin
: {
2184 // We will backtrack to this point once the subpattern cannot match any
2185 // more. Since no match is accepted as a successful match (we are Greedy
2186 // quantified with a minimum of zero) jump back to the forwards matching
2188 YarrOp
& endOp
= m_ops
[op
.m_nextOp
];
2189 m_backtrackingState
.linkTo(endOp
.m_reentry
, this);
2192 case OpParenthesesSubpatternTerminalEnd
:
2193 // We should never be backtracking to here (hence the 'terminal' in the name).
2194 ASSERT(m_backtrackingState
.isEmpty());
2195 m_backtrackingState
.append(op
.m_jumps
);
2198 // OpParentheticalAssertionBegin/End
2199 case OpParentheticalAssertionBegin
: {
2200 PatternTerm
* term
= op
.m_term
;
2201 YarrOp
& endOp
= m_ops
[op
.m_nextOp
];
2203 // We need to handle the backtracks upon backtracking back out
2204 // of a parenthetical assertion if either we need to correct
2205 // the input index, or the assertion was inverted.
2206 if (op
.m_checkAdjust
|| term
->invert()) {
2207 m_backtrackingState
.link(this);
2209 if (op
.m_checkAdjust
)
2210 add32(Imm32(op
.m_checkAdjust
), index
);
2212 // In an inverted assertion failure to match the subpattern
2213 // is treated as a successful match - jump to the end of the
2214 // subpattern. We already have adjusted the input position
2215 // back to that before the assertion, which is correct.
2217 jump(endOp
.m_reentry
);
2219 m_backtrackingState
.fallthrough();
2222 // The End node's jump list will contain any backtracks into
2223 // the end of the assertion. Also, if inverted, we will have
2224 // added the failure caused by a successful match to this.
2225 m_backtrackingState
.append(endOp
.m_jumps
);
2227 m_checked
+= op
.m_checkAdjust
;
2230 case OpParentheticalAssertionEnd
: {
2231 // FIXME: We should really be clearing any nested subpattern
2232 // matches on bailing out from after the pattern. Firefox has
2233 // this bug too (presumably because they use YARR!)
2235 // Never backtrack into an assertion; later failures bail to before the begin.
2236 m_backtrackingState
.takeBacktracksToJumpList(op
.m_jumps
, this);
2238 YarrOp
& lastOp
= m_ops
[op
.m_previousOp
];
2239 m_checked
-= lastOp
.m_checkAdjust
;
2250 // Compilation methods:
2251 // ====================
2253 // opCompileParenthesesSubpattern
2254 // Emits ops for a subpattern (set of parentheses). These consist
2255 // of a set of alternatives wrapped in an outer set of nodes for
2257 // Supported types of parentheses are 'Once' (quantityCount == 1)
2258 // and 'Terminal' (non-capturing parentheses quantified as greedy
2260 // Alternatives will use the 'Simple' set of ops if either the
2261 // subpattern is terminal (in which case we will never need to
2262 // backtrack), or if the subpattern only contains one alternative.
2263 void opCompileParenthesesSubpattern(PatternTerm
* term
)
2265 YarrOpCode parenthesesBeginOpCode
;
2266 YarrOpCode parenthesesEndOpCode
;
2267 YarrOpCode alternativeBeginOpCode
= OpSimpleNestedAlternativeBegin
;
2268 YarrOpCode alternativeNextOpCode
= OpSimpleNestedAlternativeNext
;
2269 YarrOpCode alternativeEndOpCode
= OpSimpleNestedAlternativeEnd
;
2271 // We can currently only compile quantity 1 subpatterns that are
2272 // not copies. We generate a copy in the case of a range quantifier,
2273 // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
2274 // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
2275 // comes where the subpattern is capturing, in which case we would
2276 // need to restore the capture from the first subpattern upon a
2277 // failure in the second.
2278 if (term
->quantityCount
== 1 && !term
->parentheses
.isCopy
) {
2279 // Select the 'Once' nodes.
2280 parenthesesBeginOpCode
= OpParenthesesSubpatternOnceBegin
;
2281 parenthesesEndOpCode
= OpParenthesesSubpatternOnceEnd
;
2283 // If there is more than one alternative we cannot use the 'simple' nodes.
2284 if (term
->parentheses
.disjunction
->m_alternatives
.size() != 1) {
2285 alternativeBeginOpCode
= OpNestedAlternativeBegin
;
2286 alternativeNextOpCode
= OpNestedAlternativeNext
;
2287 alternativeEndOpCode
= OpNestedAlternativeEnd
;
2289 } else if (term
->parentheses
.isTerminal
) {
2290 // Select the 'Terminal' nodes.
2291 parenthesesBeginOpCode
= OpParenthesesSubpatternTerminalBegin
;
2292 parenthesesEndOpCode
= OpParenthesesSubpatternTerminalEnd
;
2294 // This subpattern is not supported by the JIT.
2295 m_shouldFallBack
= true;
2299 size_t parenBegin
= m_ops
.size();
2300 m_ops
.append(parenthesesBeginOpCode
);
2302 m_ops
.append(alternativeBeginOpCode
);
2303 m_ops
.last().m_previousOp
= notFound
;
2304 m_ops
.last().m_term
= term
;
2305 Vector
<PatternAlternative
*>& alternatives
= term
->parentheses
.disjunction
->m_alternatives
;
2306 for (unsigned i
= 0; i
< alternatives
.size(); ++i
) {
2307 size_t lastOpIndex
= m_ops
.size() - 1;
2309 PatternAlternative
* nestedAlternative
= alternatives
[i
];
2310 opCompileAlternative(nestedAlternative
);
2312 size_t thisOpIndex
= m_ops
.size();
2313 m_ops
.append(YarrOp(alternativeNextOpCode
));
2315 YarrOp
& lastOp
= m_ops
[lastOpIndex
];
2316 YarrOp
& thisOp
= m_ops
[thisOpIndex
];
2318 lastOp
.m_alternative
= nestedAlternative
;
2319 lastOp
.m_nextOp
= thisOpIndex
;
2320 thisOp
.m_previousOp
= lastOpIndex
;
2321 thisOp
.m_term
= term
;
2323 YarrOp
& lastOp
= m_ops
.last();
2324 ASSERT(lastOp
.m_op
== alternativeNextOpCode
);
2325 lastOp
.m_op
= alternativeEndOpCode
;
2326 lastOp
.m_alternative
= 0;
2327 lastOp
.m_nextOp
= notFound
;
2329 size_t parenEnd
= m_ops
.size();
2330 m_ops
.append(parenthesesEndOpCode
);
2332 m_ops
[parenBegin
].m_term
= term
;
2333 m_ops
[parenBegin
].m_previousOp
= notFound
;
2334 m_ops
[parenBegin
].m_nextOp
= parenEnd
;
2335 m_ops
[parenEnd
].m_term
= term
;
2336 m_ops
[parenEnd
].m_previousOp
= parenBegin
;
2337 m_ops
[parenEnd
].m_nextOp
= notFound
;
2340 // opCompileParentheticalAssertion
2341 // Emits ops for a parenthetical assertion. These consist of an
2342 // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
2343 // the alternatives, with these wrapped by an outer pair of
2344 // OpParentheticalAssertionBegin/End nodes.
2345 // We can always use the OpSimpleNestedAlternative nodes in the
2346 // case of parenthetical assertions since these only ever match
2347 // once, and will never backtrack back into the assertion.
2348 void opCompileParentheticalAssertion(PatternTerm
* term
)
2350 size_t parenBegin
= m_ops
.size();
2351 m_ops
.append(OpParentheticalAssertionBegin
);
2353 m_ops
.append(OpSimpleNestedAlternativeBegin
);
2354 m_ops
.last().m_previousOp
= notFound
;
2355 m_ops
.last().m_term
= term
;
2356 Vector
<PatternAlternative
*>& alternatives
= term
->parentheses
.disjunction
->m_alternatives
;
2357 for (unsigned i
= 0; i
< alternatives
.size(); ++i
) {
2358 size_t lastOpIndex
= m_ops
.size() - 1;
2360 PatternAlternative
* nestedAlternative
= alternatives
[i
];
2361 opCompileAlternative(nestedAlternative
);
2363 size_t thisOpIndex
= m_ops
.size();
2364 m_ops
.append(YarrOp(OpSimpleNestedAlternativeNext
));
2366 YarrOp
& lastOp
= m_ops
[lastOpIndex
];
2367 YarrOp
& thisOp
= m_ops
[thisOpIndex
];
2369 lastOp
.m_alternative
= nestedAlternative
;
2370 lastOp
.m_nextOp
= thisOpIndex
;
2371 thisOp
.m_previousOp
= lastOpIndex
;
2372 thisOp
.m_term
= term
;
2374 YarrOp
& lastOp
= m_ops
.last();
2375 ASSERT(lastOp
.m_op
== OpSimpleNestedAlternativeNext
);
2376 lastOp
.m_op
= OpSimpleNestedAlternativeEnd
;
2377 lastOp
.m_alternative
= 0;
2378 lastOp
.m_nextOp
= notFound
;
2380 size_t parenEnd
= m_ops
.size();
2381 m_ops
.append(OpParentheticalAssertionEnd
);
2383 m_ops
[parenBegin
].m_term
= term
;
2384 m_ops
[parenBegin
].m_previousOp
= notFound
;
2385 m_ops
[parenBegin
].m_nextOp
= parenEnd
;
2386 m_ops
[parenEnd
].m_term
= term
;
2387 m_ops
[parenEnd
].m_previousOp
= parenBegin
;
2388 m_ops
[parenEnd
].m_nextOp
= notFound
;
2391 // opCompileAlternative
2392 // Called to emit nodes for all terms in an alternative.
2393 void opCompileAlternative(PatternAlternative
* alternative
)
2395 optimizeAlternative(alternative
);
2397 for (unsigned i
= 0; i
< alternative
->m_terms
.size(); ++i
) {
2398 PatternTerm
* term
= &alternative
->m_terms
[i
];
2400 switch (term
->type
) {
2401 case PatternTerm::TypeParenthesesSubpattern
:
2402 opCompileParenthesesSubpattern(term
);
2405 case PatternTerm::TypeParentheticalAssertion
:
2406 opCompileParentheticalAssertion(term
);
2416 // This method compiles the body disjunction of the regular expression.
2417 // The body consists of two sets of alternatives - zero or more 'once
2418 // through' (BOL anchored) alternatives, followed by zero or more
2419 // repeated alternatives.
2420 // For each of these two sets of alteratives, if not empty they will be
2421 // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
2422 // 'begin' node referencing the first alternative, and 'next' nodes
2423 // referencing any further alternatives. The begin/next/end nodes are
2424 // linked together in a doubly linked list. In the case of repeating
2425 // alternatives, the end node is also linked back to the beginning.
2426 // If no repeating alternatives exist, then a OpMatchFailed node exists
2427 // to return the failing result.
2428 void opCompileBody(PatternDisjunction
* disjunction
)
2430 Vector
<PatternAlternative
*>& alternatives
= disjunction
->m_alternatives
;
2431 size_t currentAlternativeIndex
= 0;
2433 // Emit the 'once through' alternatives.
2434 if (alternatives
.size() && alternatives
[0]->onceThrough()) {
2435 m_ops
.append(YarrOp(OpBodyAlternativeBegin
));
2436 m_ops
.last().m_previousOp
= notFound
;
2439 size_t lastOpIndex
= m_ops
.size() - 1;
2440 PatternAlternative
* alternative
= alternatives
[currentAlternativeIndex
];
2441 opCompileAlternative(alternative
);
2443 size_t thisOpIndex
= m_ops
.size();
2444 m_ops
.append(YarrOp(OpBodyAlternativeNext
));
2446 YarrOp
& lastOp
= m_ops
[lastOpIndex
];
2447 YarrOp
& thisOp
= m_ops
[thisOpIndex
];
2449 lastOp
.m_alternative
= alternative
;
2450 lastOp
.m_nextOp
= thisOpIndex
;
2451 thisOp
.m_previousOp
= lastOpIndex
;
2453 ++currentAlternativeIndex
;
2454 } while (currentAlternativeIndex
< alternatives
.size() && alternatives
[currentAlternativeIndex
]->onceThrough());
2456 YarrOp
& lastOp
= m_ops
.last();
2458 ASSERT(lastOp
.m_op
== OpBodyAlternativeNext
);
2459 lastOp
.m_op
= OpBodyAlternativeEnd
;
2460 lastOp
.m_alternative
= 0;
2461 lastOp
.m_nextOp
= notFound
;
2464 if (currentAlternativeIndex
== alternatives
.size()) {
2465 m_ops
.append(YarrOp(OpMatchFailed
));
2469 // Emit the repeated alternatives.
2470 size_t repeatLoop
= m_ops
.size();
2471 m_ops
.append(YarrOp(OpBodyAlternativeBegin
));
2472 m_ops
.last().m_previousOp
= notFound
;
2474 size_t lastOpIndex
= m_ops
.size() - 1;
2475 PatternAlternative
* alternative
= alternatives
[currentAlternativeIndex
];
2476 ASSERT(!alternative
->onceThrough());
2477 opCompileAlternative(alternative
);
2479 size_t thisOpIndex
= m_ops
.size();
2480 m_ops
.append(YarrOp(OpBodyAlternativeNext
));
2482 YarrOp
& lastOp
= m_ops
[lastOpIndex
];
2483 YarrOp
& thisOp
= m_ops
[thisOpIndex
];
2485 lastOp
.m_alternative
= alternative
;
2486 lastOp
.m_nextOp
= thisOpIndex
;
2487 thisOp
.m_previousOp
= lastOpIndex
;
2489 ++currentAlternativeIndex
;
2490 } while (currentAlternativeIndex
< alternatives
.size());
2491 YarrOp
& lastOp
= m_ops
.last();
2492 ASSERT(lastOp
.m_op
== OpBodyAlternativeNext
);
2493 lastOp
.m_op
= OpBodyAlternativeEnd
;
2494 lastOp
.m_alternative
= 0;
2495 lastOp
.m_nextOp
= repeatLoop
;
2498 void generateEnter()
2501 push(X86Registers::ebp
);
2502 move(stackPointerRegister
, X86Registers::ebp
);
2503 push(X86Registers::ebx
);
2505 push(X86Registers::ebp
);
2506 move(stackPointerRegister
, X86Registers::ebp
);
2507 // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
2508 push(X86Registers::ebx
);
2509 push(X86Registers::edi
);
2510 push(X86Registers::esi
);
2511 // load output into edi (2 = saved ebp + return address).
2513 loadPtr(Address(X86Registers::ebp
, 2 * sizeof(void*)), input
);
2514 loadPtr(Address(X86Registers::ebp
, 3 * sizeof(void*)), index
);
2515 loadPtr(Address(X86Registers::ebp
, 4 * sizeof(void*)), length
);
2516 if (compileMode
== IncludeSubpatterns
)
2517 loadPtr(Address(X86Registers::ebp
, 5 * sizeof(void*)), output
);
2519 if (compileMode
== IncludeSubpatterns
)
2520 loadPtr(Address(X86Registers::ebp
, 2 * sizeof(void*)), output
);
2523 push(ARMRegisters::r4
);
2524 push(ARMRegisters::r5
);
2525 push(ARMRegisters::r6
);
2526 #if CPU(ARM_TRADITIONAL)
2527 push(ARMRegisters::r8
); // scratch register
2529 if (compileMode
== IncludeSubpatterns
)
2530 move(ARMRegisters::r3
, output
);
2532 push(SH4Registers::r11
);
2533 push(SH4Registers::r13
);
2539 void generateReturn()
2542 pop(X86Registers::ebx
);
2543 pop(X86Registers::ebp
);
2545 pop(X86Registers::esi
);
2546 pop(X86Registers::edi
);
2547 pop(X86Registers::ebx
);
2548 pop(X86Registers::ebp
);
2550 #if CPU(ARM_TRADITIONAL)
2551 pop(ARMRegisters::r8
); // scratch register
2553 pop(ARMRegisters::r6
);
2554 pop(ARMRegisters::r5
);
2555 pop(ARMRegisters::r4
);
2557 pop(SH4Registers::r13
);
2558 pop(SH4Registers::r11
);
2566 YarrGenerator(YarrPattern
& pattern
, YarrCharSize charSize
)
2567 : m_pattern(pattern
)
2568 , m_charSize(charSize
)
2569 , m_charScale(m_charSize
== Char8
? TimesOne
: TimesTwo
)
2570 , m_shouldFallBack(false)
2575 void compile(JSGlobalData
* globalData
, YarrCodeBlock
& jitObject
)
2579 Jump hasInput
= checkInput();
2580 move(TrustedImmPtr((void*)WTF::notFound
), returnRegister
);
2581 move(TrustedImm32(0), returnRegister2
);
2583 hasInput
.link(this);
2585 if (compileMode
== IncludeSubpatterns
) {
2586 for (unsigned i
= 0; i
< m_pattern
.m_numSubpatterns
+ 1; ++i
)
2587 store32(TrustedImm32(-1), Address(output
, (i
<< 1) * sizeof(int)));
2590 if (!m_pattern
.m_body
->m_hasFixedSize
)
2591 setMatchStart(index
);
2595 // Compile the pattern to the internal 'YarrOp' representation.
2596 opCompileBody(m_pattern
.m_body
);
2598 // If we encountered anything we can't handle in the JIT code
2599 // (e.g. backreferences) then return early.
2600 if (m_shouldFallBack
) {
2601 jitObject
.setFallBack(true);
2608 // Link & finalize the code.
2609 LinkBuffer
linkBuffer(*globalData
, this, REGEXP_CODE_ID
);
2610 m_backtrackingState
.linkDataLabels(linkBuffer
);
2612 if (compileMode
== MatchOnly
) {
2613 if (m_charSize
== Char8
)
2614 jitObject
.set8BitCodeMatchOnly(linkBuffer
.finalizeCode());
2616 jitObject
.set16BitCodeMatchOnly(linkBuffer
.finalizeCode());
2618 if (m_charSize
== Char8
)
2619 jitObject
.set8BitCode(linkBuffer
.finalizeCode());
2621 jitObject
.set16BitCode(linkBuffer
.finalizeCode());
2623 jitObject
.setFallBack(m_shouldFallBack
);
2627 YarrPattern
& m_pattern
;
2629 YarrCharSize m_charSize
;
2633 // Used to detect regular expression constructs that are not currently
2634 // supported in the JIT; fall back to the interpreter when this is detected.
2635 bool m_shouldFallBack
;
2637 // The regular expression expressed as a linear sequence of operations.
2638 Vector
<YarrOp
, 128> m_ops
;
2640 // This records the current input offset being applied due to the current
2641 // set of alternatives we are nested within. E.g. when matching the
2642 // character 'b' within the regular expression /abc/, we will know that
2643 // the minimum size for the alternative is 3, checked upon entry to the
2644 // alternative, and that 'b' is at offset 1 from the start, and as such
2645 // when matching 'b' we need to apply an offset of -2 to the load.
2647 // FIXME: This should go away. Rather than tracking this value throughout
2648 // code generation, we should gather this information up front & store it
2649 // on the YarrOp structure.
2652 // This class records state whilst generating the backtracking path of code.
2653 BacktrackingState m_backtrackingState
;
2656 void jitCompile(YarrPattern
& pattern
, YarrCharSize charSize
, JSGlobalData
* globalData
, YarrCodeBlock
& jitObject
, YarrJITCompileMode mode
)
2658 if (mode
== MatchOnly
)
2659 YarrGenerator
<MatchOnly
>(pattern
, charSize
).compile(globalData
, jitObject
);
2661 YarrGenerator
<IncludeSubpatterns
>(pattern
, charSize
).compile(globalData
, jitObject
);