2 * Copyright (C) 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include "ASCIICType.h"
30 #include "JSGlobalData.h"
31 #include "LinkBuffer.h"
32 #include "MacroAssembler.h"
33 #include "RegexCompiler.h"
35 #include "pcre.h" // temporary, remove when fallback is removed.
41 namespace JSC
{ namespace Yarr
{
43 class RegexGenerator
: private MacroAssembler
{
44 friend void jitCompileRegex(JSGlobalData
* globalData
, RegexCodeBlock
& jitObject
, const UString
& pattern
, unsigned& numSubpatterns
, const char*& error
, bool ignoreCase
, bool multiline
);
47 static const RegisterID input
= ARMRegisters::r0
;
48 static const RegisterID index
= ARMRegisters::r1
;
49 static const RegisterID length
= ARMRegisters::r2
;
50 static const RegisterID output
= ARMRegisters::r4
;
52 static const RegisterID regT0
= ARMRegisters::r5
;
53 static const RegisterID regT1
= ARMRegisters::r6
;
55 static const RegisterID returnRegister
= ARMRegisters::r0
;
57 static const RegisterID input
= MIPSRegisters::a0
;
58 static const RegisterID index
= MIPSRegisters::a1
;
59 static const RegisterID length
= MIPSRegisters::a2
;
60 static const RegisterID output
= MIPSRegisters::a3
;
62 static const RegisterID regT0
= MIPSRegisters::t4
;
63 static const RegisterID regT1
= MIPSRegisters::t5
;
65 static const RegisterID returnRegister
= MIPSRegisters::v0
;
67 static const RegisterID input
= X86Registers::eax
;
68 static const RegisterID index
= X86Registers::edx
;
69 static const RegisterID length
= X86Registers::ecx
;
70 static const RegisterID output
= X86Registers::edi
;
72 static const RegisterID regT0
= X86Registers::ebx
;
73 static const RegisterID regT1
= X86Registers::esi
;
75 static const RegisterID returnRegister
= X86Registers::eax
;
77 static const RegisterID input
= X86Registers::edi
;
78 static const RegisterID index
= X86Registers::esi
;
79 static const RegisterID length
= X86Registers::edx
;
80 static const RegisterID output
= X86Registers::ecx
;
82 static const RegisterID regT0
= X86Registers::eax
;
83 static const RegisterID regT1
= X86Registers::ebx
;
85 static const RegisterID returnRegister
= X86Registers::eax
;
88 void optimizeAlternative(PatternAlternative
* alternative
)
90 if (!alternative
->m_terms
.size())
93 for (unsigned i
= 0; i
< alternative
->m_terms
.size() - 1; ++i
) {
94 PatternTerm
& term
= alternative
->m_terms
[i
];
95 PatternTerm
& nextTerm
= alternative
->m_terms
[i
+ 1];
97 if ((term
.type
== PatternTerm::TypeCharacterClass
)
98 && (term
.quantityType
== QuantifierFixedCount
)
99 && (nextTerm
.type
== PatternTerm::TypePatternCharacter
)
100 && (nextTerm
.quantityType
== QuantifierFixedCount
)) {
101 PatternTerm termCopy
= term
;
102 alternative
->m_terms
[i
] = nextTerm
;
103 alternative
->m_terms
[i
+ 1] = termCopy
;
108 void matchCharacterClassRange(RegisterID character
, JumpList
& failures
, JumpList
& matchDest
, const CharacterRange
* ranges
, unsigned count
, unsigned* matchIndex
, const UChar
* matches
, unsigned matchCount
)
111 // pick which range we're going to generate
112 int which
= count
>> 1;
113 char lo
= ranges
[which
].begin
;
114 char hi
= ranges
[which
].end
;
116 // check if there are any ranges or matches below lo. If not, just jl to failure -
117 // if there is anything else to check, check that first, if it falls through jmp to failure.
118 if ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] < lo
)) {
119 Jump loOrAbove
= branch32(GreaterThanOrEqual
, character
, Imm32((unsigned short)lo
));
121 // generate code for all ranges before this one
123 matchCharacterClassRange(character
, failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
);
125 while ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] < lo
)) {
126 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)matches
[*matchIndex
])));
129 failures
.append(jump());
131 loOrAbove
.link(this);
133 Jump loOrAbove
= branch32(GreaterThanOrEqual
, character
, Imm32((unsigned short)lo
));
135 matchCharacterClassRange(character
, failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
);
136 failures
.append(jump());
138 loOrAbove
.link(this);
140 failures
.append(branch32(LessThan
, character
, Imm32((unsigned short)lo
)));
142 while ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] <= hi
))
145 matchDest
.append(branch32(LessThanOrEqual
, character
, Imm32((unsigned short)hi
)));
146 // fall through to here, the value is above hi.
148 // shuffle along & loop around if there are any more matches to handle.
149 unsigned next
= which
+ 1;
155 void matchCharacterClass(RegisterID character
, JumpList
& matchDest
, const CharacterClass
* charClass
)
157 if (charClass
->m_table
) {
158 ExtendedAddress
tableEntry(character
, reinterpret_cast<intptr_t>(charClass
->m_table
->m_table
));
159 matchDest
.append(branchTest8(charClass
->m_table
->m_inverted
? Zero
: NonZero
, tableEntry
));
163 if (charClass
->m_matchesUnicode
.size() || charClass
->m_rangesUnicode
.size()) {
164 Jump isAscii
= branch32(LessThanOrEqual
, character
, Imm32(0x7f));
166 if (charClass
->m_matchesUnicode
.size()) {
167 for (unsigned i
= 0; i
< charClass
->m_matchesUnicode
.size(); ++i
) {
168 UChar ch
= charClass
->m_matchesUnicode
[i
];
169 matchDest
.append(branch32(Equal
, character
, Imm32(ch
)));
173 if (charClass
->m_rangesUnicode
.size()) {
174 for (unsigned i
= 0; i
< charClass
->m_rangesUnicode
.size(); ++i
) {
175 UChar lo
= charClass
->m_rangesUnicode
[i
].begin
;
176 UChar hi
= charClass
->m_rangesUnicode
[i
].end
;
178 Jump below
= branch32(LessThan
, character
, Imm32(lo
));
179 matchDest
.append(branch32(LessThanOrEqual
, character
, Imm32(hi
)));
184 unicodeFail
= jump();
188 if (charClass
->m_ranges
.size()) {
189 unsigned matchIndex
= 0;
191 matchCharacterClassRange(character
, failures
, matchDest
, charClass
->m_ranges
.begin(), charClass
->m_ranges
.size(), &matchIndex
, charClass
->m_matches
.begin(), charClass
->m_matches
.size());
192 while (matchIndex
< charClass
->m_matches
.size())
193 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)charClass
->m_matches
[matchIndex
++])));
196 } else if (charClass
->m_matches
.size()) {
197 // optimization: gather 'a','A' etc back together, can mask & test once.
198 Vector
<char> matchesAZaz
;
200 for (unsigned i
= 0; i
< charClass
->m_matches
.size(); ++i
) {
201 char ch
= charClass
->m_matches
[i
];
202 if (m_pattern
.m_ignoreCase
) {
203 if (isASCIILower(ch
)) {
204 matchesAZaz
.append(ch
);
207 if (isASCIIUpper(ch
))
210 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)ch
)));
213 if (unsigned countAZaz
= matchesAZaz
.size()) {
214 or32(Imm32(32), character
);
215 for (unsigned i
= 0; i
< countAZaz
; ++i
)
216 matchDest
.append(branch32(Equal
, character
, Imm32(matchesAZaz
[i
])));
220 if (charClass
->m_matchesUnicode
.size() || charClass
->m_rangesUnicode
.size())
221 unicodeFail
.link(this);
224 // Jumps if input not available; will have (incorrectly) incremented already!
225 Jump
jumpIfNoAvailableInput(unsigned countToCheck
)
227 add32(Imm32(countToCheck
), index
);
228 return branch32(Above
, index
, length
);
231 Jump
jumpIfAvailableInput(unsigned countToCheck
)
233 add32(Imm32(countToCheck
), index
);
234 return branch32(BelowOrEqual
, index
, length
);
239 return branch32(BelowOrEqual
, index
, length
);
244 return branch32(Equal
, index
, length
);
247 Jump
notAtEndOfInput()
249 return branch32(NotEqual
, index
, length
);
252 Jump
jumpIfCharEquals(UChar ch
, int inputPosition
)
254 return branch16(Equal
, BaseIndex(input
, index
, TimesTwo
, inputPosition
* sizeof(UChar
)), Imm32(ch
));
257 Jump
jumpIfCharNotEquals(UChar ch
, int inputPosition
)
259 return branch16(NotEqual
, BaseIndex(input
, index
, TimesTwo
, inputPosition
* sizeof(UChar
)), Imm32(ch
));
262 void readCharacter(int inputPosition
, RegisterID reg
)
264 load16(BaseIndex(input
, index
, TimesTwo
, inputPosition
* sizeof(UChar
)), reg
);
267 void storeToFrame(RegisterID reg
, unsigned frameLocation
)
269 poke(reg
, frameLocation
);
272 void storeToFrame(Imm32 imm
, unsigned frameLocation
)
274 poke(imm
, frameLocation
);
277 DataLabelPtr
storeToFrameWithPatch(unsigned frameLocation
)
279 return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister
, frameLocation
* sizeof(void*)));
282 void loadFromFrame(unsigned frameLocation
, RegisterID reg
)
284 peek(reg
, frameLocation
);
287 void loadFromFrameAndJump(unsigned frameLocation
)
289 jump(Address(stackPointerRegister
, frameLocation
* sizeof(void*)));
292 struct AlternativeBacktrackRecord
{
293 DataLabelPtr dataLabel
;
294 Label backtrackLocation
;
296 AlternativeBacktrackRecord(DataLabelPtr dataLabel
, Label backtrackLocation
)
297 : dataLabel(dataLabel
)
298 , backtrackLocation(backtrackLocation
)
303 struct TermGenerationState
{
304 TermGenerationState(PatternDisjunction
* disjunction
, unsigned checkedTotal
)
305 : disjunction(disjunction
)
306 , checkedTotal(checkedTotal
)
310 void resetAlternative()
312 isBackTrackGenerated
= false;
315 bool alternativeValid()
317 return alt
< disjunction
->m_alternatives
.size();
319 void nextAlternative()
323 PatternAlternative
* alternative()
325 return disjunction
->m_alternatives
[alt
];
330 ASSERT(alternativeValid());
335 ASSERT(alternativeValid());
336 return t
< alternative()->m_terms
.size();
340 ASSERT(alternativeValid());
345 ASSERT(alternativeValid());
346 return alternative()->m_terms
[t
];
349 PatternTerm
& lookaheadTerm()
351 ASSERT(alternativeValid());
352 ASSERT((t
+ 1) < alternative()->m_terms
.size());
353 return alternative()->m_terms
[t
+ 1];
355 bool isSinglePatternCharacterLookaheadTerm()
357 ASSERT(alternativeValid());
358 return ((t
+ 1) < alternative()->m_terms
.size())
359 && (lookaheadTerm().type
== PatternTerm::TypePatternCharacter
)
360 && (lookaheadTerm().quantityType
== QuantifierFixedCount
)
361 && (lookaheadTerm().quantityCount
== 1);
366 return term().inputPosition
- checkedTotal
;
369 void jumpToBacktrack(Jump jump
, MacroAssembler
* masm
)
371 if (isBackTrackGenerated
)
372 jump
.linkTo(backtrackLabel
, masm
);
374 backTrackJumps
.append(jump
);
376 void jumpToBacktrack(JumpList
& jumps
, MacroAssembler
* masm
)
378 if (isBackTrackGenerated
)
379 jumps
.linkTo(backtrackLabel
, masm
);
381 backTrackJumps
.append(jumps
);
383 bool plantJumpToBacktrackIfExists(MacroAssembler
* masm
)
385 if (isBackTrackGenerated
) {
386 masm
->jump(backtrackLabel
);
391 void addBacktrackJump(Jump jump
)
393 backTrackJumps
.append(jump
);
395 void setBacktrackGenerated(Label label
)
397 isBackTrackGenerated
= true;
398 backtrackLabel
= label
;
400 void linkAlternativeBacktracks(MacroAssembler
* masm
)
402 isBackTrackGenerated
= false;
403 backTrackJumps
.link(masm
);
405 void linkAlternativeBacktracksTo(Label label
, MacroAssembler
* masm
)
407 isBackTrackGenerated
= false;
408 backTrackJumps
.linkTo(label
, masm
);
410 void propagateBacktrackingFrom(TermGenerationState
& nestedParenthesesState
, MacroAssembler
* masm
)
412 jumpToBacktrack(nestedParenthesesState
.backTrackJumps
, masm
);
413 if (nestedParenthesesState
.isBackTrackGenerated
)
414 setBacktrackGenerated(nestedParenthesesState
.backtrackLabel
);
417 PatternDisjunction
* disjunction
;
422 JumpList backTrackJumps
;
423 Label backtrackLabel
;
424 bool isBackTrackGenerated
;
427 void generateAssertionBOL(TermGenerationState
& state
)
429 PatternTerm
& term
= state
.term();
431 if (m_pattern
.m_multiline
) {
432 const RegisterID character
= regT0
;
435 if (!term
.inputPosition
)
436 matchDest
.append(branch32(Equal
, index
, Imm32(state
.checkedTotal
)));
438 readCharacter(state
.inputOffset() - 1, character
);
439 matchCharacterClass(character
, matchDest
, m_pattern
.newlineCharacterClass());
440 state
.jumpToBacktrack(jump(), this);
442 matchDest
.link(this);
444 // Erk, really should poison out these alternatives early. :-/
445 if (term
.inputPosition
)
446 state
.jumpToBacktrack(jump(), this);
448 state
.jumpToBacktrack(branch32(NotEqual
, index
, Imm32(state
.checkedTotal
)), this);
452 void generateAssertionEOL(TermGenerationState
& state
)
454 PatternTerm
& term
= state
.term();
456 if (m_pattern
.m_multiline
) {
457 const RegisterID character
= regT0
;
460 if (term
.inputPosition
== state
.checkedTotal
)
461 matchDest
.append(atEndOfInput());
463 readCharacter(state
.inputOffset(), character
);
464 matchCharacterClass(character
, matchDest
, m_pattern
.newlineCharacterClass());
465 state
.jumpToBacktrack(jump(), this);
467 matchDest
.link(this);
469 if (term
.inputPosition
== state
.checkedTotal
)
470 state
.jumpToBacktrack(notAtEndOfInput(), this);
471 // Erk, really should poison out these alternatives early. :-/
473 state
.jumpToBacktrack(jump(), this);
477 // Also falls though on nextIsNotWordChar.
478 void matchAssertionWordchar(TermGenerationState
& state
, JumpList
& nextIsWordChar
, JumpList
& nextIsNotWordChar
)
480 const RegisterID character
= regT0
;
481 PatternTerm
& term
= state
.term();
483 if (term
.inputPosition
== state
.checkedTotal
)
484 nextIsNotWordChar
.append(atEndOfInput());
486 readCharacter(state
.inputOffset(), character
);
487 matchCharacterClass(character
, nextIsWordChar
, m_pattern
.wordcharCharacterClass());
490 void generateAssertionWordBoundary(TermGenerationState
& state
)
492 const RegisterID character
= regT0
;
493 PatternTerm
& term
= state
.term();
497 if (!term
.inputPosition
)
498 atBegin
= branch32(Equal
, index
, Imm32(state
.checkedTotal
));
499 readCharacter(state
.inputOffset() - 1, character
);
500 matchCharacterClass(character
, matchDest
, m_pattern
.wordcharCharacterClass());
501 if (!term
.inputPosition
)
504 // We fall through to here if the last character was not a wordchar.
505 JumpList nonWordCharThenWordChar
;
506 JumpList nonWordCharThenNonWordChar
;
507 if (term
.invertOrCapture
) {
508 matchAssertionWordchar(state
, nonWordCharThenNonWordChar
, nonWordCharThenWordChar
);
509 nonWordCharThenWordChar
.append(jump());
511 matchAssertionWordchar(state
, nonWordCharThenWordChar
, nonWordCharThenNonWordChar
);
512 nonWordCharThenNonWordChar
.append(jump());
514 state
.jumpToBacktrack(nonWordCharThenNonWordChar
, this);
516 // We jump here if the last character was a wordchar.
517 matchDest
.link(this);
518 JumpList wordCharThenWordChar
;
519 JumpList wordCharThenNonWordChar
;
520 if (term
.invertOrCapture
) {
521 matchAssertionWordchar(state
, wordCharThenNonWordChar
, wordCharThenWordChar
);
522 wordCharThenWordChar
.append(jump());
524 matchAssertionWordchar(state
, wordCharThenWordChar
, wordCharThenNonWordChar
);
525 // This can fall-though!
528 state
.jumpToBacktrack(wordCharThenWordChar
, this);
530 nonWordCharThenWordChar
.link(this);
531 wordCharThenNonWordChar
.link(this);
534 void generatePatternCharacterSingle(TermGenerationState
& state
)
536 const RegisterID character
= regT0
;
537 UChar ch
= state
.term().patternCharacter
;
539 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
540 readCharacter(state
.inputOffset(), character
);
541 or32(Imm32(32), character
);
542 state
.jumpToBacktrack(branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))), this);
544 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
545 state
.jumpToBacktrack(jumpIfCharNotEquals(ch
, state
.inputOffset()), this);
549 void generatePatternCharacterPair(TermGenerationState
& state
)
551 const RegisterID character
= regT0
;
552 UChar ch1
= state
.term().patternCharacter
;
553 UChar ch2
= state
.lookaheadTerm().patternCharacter
;
556 int chPair
= ch1
| (ch2
<< 16);
558 if (m_pattern
.m_ignoreCase
) {
559 if (isASCIIAlpha(ch1
))
561 if (isASCIIAlpha(ch2
))
566 load32WithUnalignedHalfWords(BaseIndex(input
, index
, TimesTwo
, state
.inputOffset() * sizeof(UChar
)), character
);
567 or32(Imm32(mask
), character
);
568 state
.jumpToBacktrack(branch32(NotEqual
, character
, Imm32(chPair
| mask
)), this);
570 state
.jumpToBacktrack(branch32WithUnalignedHalfWords(NotEqual
, BaseIndex(input
, index
, TimesTwo
, state
.inputOffset() * sizeof(UChar
)), Imm32(chPair
)), this);
573 void generatePatternCharacterFixed(TermGenerationState
& state
)
575 const RegisterID character
= regT0
;
576 const RegisterID countRegister
= regT1
;
577 PatternTerm
& term
= state
.term();
578 UChar ch
= term
.patternCharacter
;
580 move(index
, countRegister
);
581 sub32(Imm32(term
.quantityCount
), countRegister
);
584 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
585 load16(BaseIndex(input
, countRegister
, TimesTwo
, (state
.inputOffset() + term
.quantityCount
) * sizeof(UChar
)), character
);
586 or32(Imm32(32), character
);
587 state
.jumpToBacktrack(branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))), this);
589 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
590 state
.jumpToBacktrack(branch16(NotEqual
, BaseIndex(input
, countRegister
, TimesTwo
, (state
.inputOffset() + term
.quantityCount
) * sizeof(UChar
)), Imm32(ch
)), this);
592 add32(Imm32(1), countRegister
);
593 branch32(NotEqual
, countRegister
, index
).linkTo(loop
, this);
596 void generatePatternCharacterGreedy(TermGenerationState
& state
)
598 const RegisterID character
= regT0
;
599 const RegisterID countRegister
= regT1
;
600 PatternTerm
& term
= state
.term();
601 UChar ch
= term
.patternCharacter
;
603 move(Imm32(0), countRegister
);
607 failures
.append(atEndOfInput());
608 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
609 readCharacter(state
.inputOffset(), character
);
610 or32(Imm32(32), character
);
611 failures
.append(branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))));
613 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
614 failures
.append(jumpIfCharNotEquals(ch
, state
.inputOffset()));
617 add32(Imm32(1), countRegister
);
618 add32(Imm32(1), index
);
619 if (term
.quantityCount
!= 0xffffffff)
620 branch32(NotEqual
, countRegister
, Imm32(term
.quantityCount
)).linkTo(loop
, this);
624 failures
.append(jump());
626 Label
backtrackBegin(this);
627 loadFromFrame(term
.frameLocation
, countRegister
);
628 state
.jumpToBacktrack(branchTest32(Zero
, countRegister
), this);
629 sub32(Imm32(1), countRegister
);
630 sub32(Imm32(1), index
);
634 storeToFrame(countRegister
, term
.frameLocation
);
636 state
.setBacktrackGenerated(backtrackBegin
);
639 void generatePatternCharacterNonGreedy(TermGenerationState
& state
)
641 const RegisterID character
= regT0
;
642 const RegisterID countRegister
= regT1
;
643 PatternTerm
& term
= state
.term();
644 UChar ch
= term
.patternCharacter
;
646 move(Imm32(0), countRegister
);
648 Jump firstTimeDoNothing
= jump();
650 Label
hardFail(this);
651 sub32(countRegister
, index
);
652 state
.jumpToBacktrack(jump(), this);
654 Label
backtrackBegin(this);
655 loadFromFrame(term
.frameLocation
, countRegister
);
657 atEndOfInput().linkTo(hardFail
, this);
658 if (term
.quantityCount
!= 0xffffffff)
659 branch32(Equal
, countRegister
, Imm32(term
.quantityCount
), hardFail
);
660 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
661 readCharacter(state
.inputOffset(), character
);
662 or32(Imm32(32), character
);
663 branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))).linkTo(hardFail
, this);
665 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
666 jumpIfCharNotEquals(ch
, state
.inputOffset()).linkTo(hardFail
, this);
669 add32(Imm32(1), countRegister
);
670 add32(Imm32(1), index
);
672 firstTimeDoNothing
.link(this);
673 storeToFrame(countRegister
, term
.frameLocation
);
675 state
.setBacktrackGenerated(backtrackBegin
);
678 void generateCharacterClassSingle(TermGenerationState
& state
)
680 const RegisterID character
= regT0
;
681 PatternTerm
& term
= state
.term();
684 readCharacter(state
.inputOffset(), character
);
685 matchCharacterClass(character
, matchDest
, term
.characterClass
);
687 if (term
.invertOrCapture
)
688 state
.jumpToBacktrack(matchDest
, this);
690 state
.jumpToBacktrack(jump(), this);
691 matchDest
.link(this);
695 void generateCharacterClassFixed(TermGenerationState
& state
)
697 const RegisterID character
= regT0
;
698 const RegisterID countRegister
= regT1
;
699 PatternTerm
& term
= state
.term();
701 move(index
, countRegister
);
702 sub32(Imm32(term
.quantityCount
), countRegister
);
706 load16(BaseIndex(input
, countRegister
, TimesTwo
, (state
.inputOffset() + term
.quantityCount
) * sizeof(UChar
)), character
);
707 matchCharacterClass(character
, matchDest
, term
.characterClass
);
709 if (term
.invertOrCapture
)
710 state
.jumpToBacktrack(matchDest
, this);
712 state
.jumpToBacktrack(jump(), this);
713 matchDest
.link(this);
716 add32(Imm32(1), countRegister
);
717 branch32(NotEqual
, countRegister
, index
).linkTo(loop
, this);
720 void generateCharacterClassGreedy(TermGenerationState
& state
)
722 const RegisterID character
= regT0
;
723 const RegisterID countRegister
= regT1
;
724 PatternTerm
& term
= state
.term();
726 move(Imm32(0), countRegister
);
730 failures
.append(atEndOfInput());
732 if (term
.invertOrCapture
) {
733 readCharacter(state
.inputOffset(), character
);
734 matchCharacterClass(character
, failures
, term
.characterClass
);
737 readCharacter(state
.inputOffset(), character
);
738 matchCharacterClass(character
, matchDest
, term
.characterClass
);
739 failures
.append(jump());
740 matchDest
.link(this);
743 add32(Imm32(1), countRegister
);
744 add32(Imm32(1), index
);
745 if (term
.quantityCount
!= 0xffffffff)
746 branch32(NotEqual
, countRegister
, Imm32(term
.quantityCount
)).linkTo(loop
, this);
750 failures
.append(jump());
752 Label
backtrackBegin(this);
753 loadFromFrame(term
.frameLocation
, countRegister
);
754 state
.jumpToBacktrack(branchTest32(Zero
, countRegister
), this);
755 sub32(Imm32(1), countRegister
);
756 sub32(Imm32(1), index
);
760 storeToFrame(countRegister
, term
.frameLocation
);
762 state
.setBacktrackGenerated(backtrackBegin
);
765 void generateCharacterClassNonGreedy(TermGenerationState
& state
)
767 const RegisterID character
= regT0
;
768 const RegisterID countRegister
= regT1
;
769 PatternTerm
& term
= state
.term();
771 move(Imm32(0), countRegister
);
773 Jump firstTimeDoNothing
= jump();
775 Label
hardFail(this);
776 sub32(countRegister
, index
);
777 state
.jumpToBacktrack(jump(), this);
779 Label
backtrackBegin(this);
780 loadFromFrame(term
.frameLocation
, countRegister
);
782 atEndOfInput().linkTo(hardFail
, this);
783 branch32(Equal
, countRegister
, Imm32(term
.quantityCount
), hardFail
);
786 readCharacter(state
.inputOffset(), character
);
787 matchCharacterClass(character
, matchDest
, term
.characterClass
);
789 if (term
.invertOrCapture
)
790 matchDest
.linkTo(hardFail
, this);
793 matchDest
.link(this);
796 add32(Imm32(1), countRegister
);
797 add32(Imm32(1), index
);
799 firstTimeDoNothing
.link(this);
800 storeToFrame(countRegister
, term
.frameLocation
);
802 state
.setBacktrackGenerated(backtrackBegin
);
805 void generateParenthesesDisjunction(PatternTerm
& parenthesesTerm
, TermGenerationState
& state
, unsigned alternativeFrameLocation
)
807 ASSERT((parenthesesTerm
.type
== PatternTerm::TypeParenthesesSubpattern
) || (parenthesesTerm
.type
== PatternTerm::TypeParentheticalAssertion
));
808 ASSERT(parenthesesTerm
.quantityCount
== 1);
810 PatternDisjunction
* disjunction
= parenthesesTerm
.parentheses
.disjunction
;
811 unsigned preCheckedCount
= ((parenthesesTerm
.quantityType
== QuantifierFixedCount
) && (parenthesesTerm
.type
!= PatternTerm::TypeParentheticalAssertion
)) ? disjunction
->m_minimumSize
: 0;
813 if (disjunction
->m_alternatives
.size() == 1) {
814 state
.resetAlternative();
815 ASSERT(state
.alternativeValid());
816 PatternAlternative
* alternative
= state
.alternative();
817 optimizeAlternative(alternative
);
819 int countToCheck
= alternative
->m_minimumSize
- preCheckedCount
;
821 ASSERT((parenthesesTerm
.type
== PatternTerm::TypeParentheticalAssertion
) || (parenthesesTerm
.quantityType
!= QuantifierFixedCount
));
823 // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists'
824 // will be forced to always trampoline into here, just to decrement the index.
828 Label
backtrackBegin(this);
829 sub32(Imm32(countToCheck
), index
);
830 state
.addBacktrackJump(jump());
834 state
.setBacktrackGenerated(backtrackBegin
);
836 state
.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck
), this);
837 state
.checkedTotal
+= countToCheck
;
840 for (state
.resetTerm(); state
.termValid(); state
.nextTerm())
843 state
.checkedTotal
-= countToCheck
;
847 for (state
.resetAlternative(); state
.alternativeValid(); state
.nextAlternative()) {
849 PatternAlternative
* alternative
= state
.alternative();
850 optimizeAlternative(alternative
);
852 ASSERT(alternative
->m_minimumSize
>= preCheckedCount
);
853 int countToCheck
= alternative
->m_minimumSize
- preCheckedCount
;
855 state
.addBacktrackJump(jumpIfNoAvailableInput(countToCheck
));
856 state
.checkedTotal
+= countToCheck
;
859 for (state
.resetTerm(); state
.termValid(); state
.nextTerm())
862 // Matched an alternative.
863 DataLabelPtr dataLabel
= storeToFrameWithPatch(alternativeFrameLocation
);
864 successes
.append(jump());
866 // Alternative did not match.
867 Label
backtrackLocation(this);
869 // Can we backtrack the alternative? - if so, do so. If not, just fall through to the next one.
870 state
.plantJumpToBacktrackIfExists(this);
872 state
.linkAlternativeBacktracks(this);
875 sub32(Imm32(countToCheck
), index
);
876 state
.checkedTotal
-= countToCheck
;
879 m_backtrackRecords
.append(AlternativeBacktrackRecord(dataLabel
, backtrackLocation
));
881 // We fall through to here when the last alternative fails.
882 // Add a backtrack out of here for the parenthese handling code to link up.
883 state
.addBacktrackJump(jump());
885 // Generate a trampoline for the parens code to backtrack to, to retry the
887 state
.setBacktrackGenerated(label());
888 loadFromFrameAndJump(alternativeFrameLocation
);
890 // FIXME: both of the above hooks are a little inefficient, in that you
891 // may end up trampolining here, just to trampoline back out to the
892 // parentheses code, or vice versa. We can probably eliminate a jump
893 // by restructuring, but coding this way for now for simplicity during
896 successes
.link(this);
900 void generateParenthesesSingle(TermGenerationState
& state
)
902 const RegisterID indexTemporary
= regT0
;
903 PatternTerm
& term
= state
.term();
904 PatternDisjunction
* disjunction
= term
.parentheses
.disjunction
;
905 ASSERT(term
.quantityCount
== 1);
907 unsigned preCheckedCount
= ((term
.quantityCount
== 1) && (term
.quantityType
== QuantifierFixedCount
)) ? disjunction
->m_minimumSize
: 0;
909 unsigned parenthesesFrameLocation
= term
.frameLocation
;
910 unsigned alternativeFrameLocation
= parenthesesFrameLocation
;
911 if (term
.quantityType
!= QuantifierFixedCount
)
912 alternativeFrameLocation
+= RegexStackSpaceForBackTrackInfoParenthesesOnce
;
914 // optimized case - no capture & no quantifier can be handled in a light-weight manner.
915 if (!term
.invertOrCapture
&& (term
.quantityType
== QuantifierFixedCount
)) {
916 TermGenerationState
parenthesesState(disjunction
, state
.checkedTotal
);
917 generateParenthesesDisjunction(state
.term(), parenthesesState
, alternativeFrameLocation
);
918 // this expects that any backtracks back out of the parentheses will be in the
919 // parenthesesState's backTrackJumps vector, and that if they need backtracking
920 // they will have set an entry point on the parenthesesState's backtrackLabel.
921 state
.propagateBacktrackingFrom(parenthesesState
, this);
923 Jump nonGreedySkipParentheses
;
924 Label nonGreedyTryParentheses
;
925 if (term
.quantityType
== QuantifierGreedy
)
926 storeToFrame(Imm32(1), parenthesesFrameLocation
);
927 else if (term
.quantityType
== QuantifierNonGreedy
) {
928 storeToFrame(Imm32(0), parenthesesFrameLocation
);
929 nonGreedySkipParentheses
= jump();
930 nonGreedyTryParentheses
= label();
931 storeToFrame(Imm32(1), parenthesesFrameLocation
);
934 // store the match start index
935 if (term
.invertOrCapture
) {
936 int inputOffset
= state
.inputOffset() - preCheckedCount
;
938 move(index
, indexTemporary
);
939 add32(Imm32(inputOffset
), indexTemporary
);
940 store32(indexTemporary
, Address(output
, (term
.parentheses
.subpatternId
<< 1) * sizeof(int)));
942 store32(index
, Address(output
, (term
.parentheses
.subpatternId
<< 1) * sizeof(int)));
945 // generate the body of the parentheses
946 TermGenerationState
parenthesesState(disjunction
, state
.checkedTotal
);
947 generateParenthesesDisjunction(state
.term(), parenthesesState
, alternativeFrameLocation
);
949 // store the match end index
950 if (term
.invertOrCapture
) {
951 int inputOffset
= state
.inputOffset();
953 move(index
, indexTemporary
);
954 add32(Imm32(state
.inputOffset()), indexTemporary
);
955 store32(indexTemporary
, Address(output
, ((term
.parentheses
.subpatternId
<< 1) + 1) * sizeof(int)));
957 store32(index
, Address(output
, ((term
.parentheses
.subpatternId
<< 1) + 1) * sizeof(int)));
959 Jump success
= jump();
961 // A failure AFTER the parens jumps here
962 Label
backtrackFromAfterParens(this);
964 if (term
.quantityType
== QuantifierGreedy
) {
965 // If this is zero we have now tested with both with and without the parens.
966 loadFromFrame(parenthesesFrameLocation
, indexTemporary
);
967 state
.jumpToBacktrack(branchTest32(Zero
, indexTemporary
), this);
968 } else if (term
.quantityType
== QuantifierNonGreedy
) {
969 // If this is zero we have now tested with both with and without the parens.
970 loadFromFrame(parenthesesFrameLocation
, indexTemporary
);
971 branchTest32(Zero
, indexTemporary
).linkTo(nonGreedyTryParentheses
, this);
974 parenthesesState
.plantJumpToBacktrackIfExists(this);
975 // A failure WITHIN the parens jumps here
976 parenthesesState
.linkAlternativeBacktracks(this);
977 if (term
.invertOrCapture
) {
978 store32(Imm32(-1), Address(output
, (term
.parentheses
.subpatternId
<< 1) * sizeof(int)));
979 store32(Imm32(-1), Address(output
, ((term
.parentheses
.subpatternId
<< 1) + 1) * sizeof(int)));
982 if (term
.quantityType
== QuantifierGreedy
)
983 storeToFrame(Imm32(0), parenthesesFrameLocation
);
985 state
.jumpToBacktrack(jump(), this);
987 state
.setBacktrackGenerated(backtrackFromAfterParens
);
988 if (term
.quantityType
== QuantifierNonGreedy
)
989 nonGreedySkipParentheses
.link(this);
994 void generateParentheticalAssertion(TermGenerationState
& state
)
996 PatternTerm
& term
= state
.term();
997 PatternDisjunction
* disjunction
= term
.parentheses
.disjunction
;
998 ASSERT(term
.quantityCount
== 1);
999 ASSERT(term
.quantityType
== QuantifierFixedCount
);
1001 unsigned parenthesesFrameLocation
= term
.frameLocation
;
1002 unsigned alternativeFrameLocation
= parenthesesFrameLocation
+ RegexStackSpaceForBackTrackInfoParentheticalAssertion
;
1004 int countCheckedAfterAssertion
= state
.checkedTotal
- term
.inputPosition
;
1006 if (term
.invertOrCapture
) {
1008 storeToFrame(index
, parenthesesFrameLocation
);
1010 state
.checkedTotal
-= countCheckedAfterAssertion
;
1011 if (countCheckedAfterAssertion
)
1012 sub32(Imm32(countCheckedAfterAssertion
), index
);
1014 TermGenerationState
parenthesesState(disjunction
, state
.checkedTotal
);
1015 generateParenthesesDisjunction(state
.term(), parenthesesState
, alternativeFrameLocation
);
1016 // Success! - which means - Fail!
1017 loadFromFrame(parenthesesFrameLocation
, index
);
1018 state
.jumpToBacktrack(jump(), this);
1020 // And fail means success.
1021 parenthesesState
.linkAlternativeBacktracks(this);
1022 loadFromFrame(parenthesesFrameLocation
, index
);
1024 state
.checkedTotal
+= countCheckedAfterAssertion
;
1027 storeToFrame(index
, parenthesesFrameLocation
);
1029 state
.checkedTotal
-= countCheckedAfterAssertion
;
1030 if (countCheckedAfterAssertion
)
1031 sub32(Imm32(countCheckedAfterAssertion
), index
);
1033 TermGenerationState
parenthesesState(disjunction
, state
.checkedTotal
);
1034 generateParenthesesDisjunction(state
.term(), parenthesesState
, alternativeFrameLocation
);
1035 // Success! - which means - Success!
1036 loadFromFrame(parenthesesFrameLocation
, index
);
1037 Jump success
= jump();
1039 parenthesesState
.linkAlternativeBacktracks(this);
1040 loadFromFrame(parenthesesFrameLocation
, index
);
1041 state
.jumpToBacktrack(jump(), this);
1045 state
.checkedTotal
+= countCheckedAfterAssertion
;
1049 void generateTerm(TermGenerationState
& state
)
1051 PatternTerm
& term
= state
.term();
1053 switch (term
.type
) {
1054 case PatternTerm::TypeAssertionBOL
:
1055 generateAssertionBOL(state
);
1058 case PatternTerm::TypeAssertionEOL
:
1059 generateAssertionEOL(state
);
1062 case PatternTerm::TypeAssertionWordBoundary
:
1063 generateAssertionWordBoundary(state
);
1066 case PatternTerm::TypePatternCharacter
:
1067 switch (term
.quantityType
) {
1068 case QuantifierFixedCount
:
1069 if (term
.quantityCount
== 1) {
1070 if (state
.isSinglePatternCharacterLookaheadTerm() && (state
.lookaheadTerm().inputPosition
== (term
.inputPosition
+ 1))) {
1071 generatePatternCharacterPair(state
);
1074 generatePatternCharacterSingle(state
);
1076 generatePatternCharacterFixed(state
);
1078 case QuantifierGreedy
:
1079 generatePatternCharacterGreedy(state
);
1081 case QuantifierNonGreedy
:
1082 generatePatternCharacterNonGreedy(state
);
1087 case PatternTerm::TypeCharacterClass
:
1088 switch (term
.quantityType
) {
1089 case QuantifierFixedCount
:
1090 if (term
.quantityCount
== 1)
1091 generateCharacterClassSingle(state
);
1093 generateCharacterClassFixed(state
);
1095 case QuantifierGreedy
:
1096 generateCharacterClassGreedy(state
);
1098 case QuantifierNonGreedy
:
1099 generateCharacterClassNonGreedy(state
);
1104 case PatternTerm::TypeBackReference
:
1105 ASSERT_NOT_REACHED();
1108 case PatternTerm::TypeForwardReference
:
1111 case PatternTerm::TypeParenthesesSubpattern
:
1112 ASSERT((term
.quantityCount
== 1) && !term
.parentheses
.isCopy
); // must fallback to pcre before this point
1113 generateParenthesesSingle(state
);
1116 case PatternTerm::TypeParentheticalAssertion
:
1117 generateParentheticalAssertion(state
);
1122 void generateDisjunction(PatternDisjunction
* disjunction
)
1124 TermGenerationState
state(disjunction
, 0);
1125 state
.resetAlternative();
1127 // Plant a check to see if there is sufficient input available to run the first alternative.
1128 // Jumping back to the label 'firstAlternative' will get to this check, jumping to
1129 // 'firstAlternativeInputChecked' will jump directly to matching the alternative having
1130 // skipped this check.
1132 Label
firstAlternative(this);
1134 // check availability for the next alternative
1135 int countCheckedForCurrentAlternative
= 0;
1136 int countToCheckForFirstAlternative
= 0;
1137 bool hasShorterAlternatives
= false;
1138 JumpList notEnoughInputForPreviousAlternative
;
1140 if (state
.alternativeValid()) {
1141 PatternAlternative
* alternative
= state
.alternative();
1142 countToCheckForFirstAlternative
= alternative
->m_minimumSize
;
1143 state
.checkedTotal
+= countToCheckForFirstAlternative
;
1144 if (countToCheckForFirstAlternative
)
1145 notEnoughInputForPreviousAlternative
.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative
));
1146 countCheckedForCurrentAlternative
= countToCheckForFirstAlternative
;
1149 Label
firstAlternativeInputChecked(this);
1151 while (state
.alternativeValid()) {
1152 // Track whether any alternatives are shorter than the first one.
1153 hasShorterAlternatives
= hasShorterAlternatives
|| (countCheckedForCurrentAlternative
< countToCheckForFirstAlternative
);
1155 PatternAlternative
* alternative
= state
.alternative();
1156 optimizeAlternative(alternative
);
1158 for (state
.resetTerm(); state
.termValid(); state
.nextTerm())
1159 generateTerm(state
);
1161 // If we get here, the alternative matched.
1162 if (m_pattern
.m_body
->m_callFrameSize
)
1163 addPtr(Imm32(m_pattern
.m_body
->m_callFrameSize
* sizeof(void*)), stackPointerRegister
);
1165 ASSERT(index
!= returnRegister
);
1166 if (m_pattern
.m_body
->m_hasFixedSize
) {
1167 move(index
, returnRegister
);
1168 if (alternative
->m_minimumSize
)
1169 sub32(Imm32(alternative
->m_minimumSize
), returnRegister
);
1171 pop(returnRegister
);
1172 store32(index
, Address(output
, 4));
1173 store32(returnRegister
, output
);
1177 state
.nextAlternative();
1179 // if there are any more alternatives, plant the check for input before looping.
1180 if (state
.alternativeValid()) {
1181 PatternAlternative
* nextAlternative
= state
.alternative();
1182 int countToCheckForNextAlternative
= nextAlternative
->m_minimumSize
;
1184 if (countCheckedForCurrentAlternative
> countToCheckForNextAlternative
) { // CASE 1: current alternative was longer than the next one.
1185 // If we get here, there the last input checked failed.
1186 notEnoughInputForPreviousAlternative
.link(this);
1188 // Check if sufficent input available to run the next alternative
1189 notEnoughInputForPreviousAlternative
.append(jumpIfNoAvailableInput(countToCheckForNextAlternative
- countCheckedForCurrentAlternative
));
1190 // We are now in the correct state to enter the next alternative; this add is only required
1191 // to mirror and revert operation of the sub32, just below.
1192 add32(Imm32(countCheckedForCurrentAlternative
- countToCheckForNextAlternative
), index
);
1194 // If we get here, there the last input checked passed.
1195 state
.linkAlternativeBacktracks(this);
1196 // No need to check if we can run the next alternative, since it is shorter -
1197 // just update index.
1198 sub32(Imm32(countCheckedForCurrentAlternative
- countToCheckForNextAlternative
), index
);
1199 } else if (countCheckedForCurrentAlternative
< countToCheckForNextAlternative
) { // CASE 2: next alternative is longer than the current one.
1200 // If we get here, there the last input checked failed.
1201 // If there is insufficient input to run the current alternative, and the next alternative is longer,
1202 // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
1204 notEnoughInputForPreviousAlternative
.link(this);
1205 add32(Imm32(countToCheckForNextAlternative
- countCheckedForCurrentAlternative
), index
);
1206 notEnoughInputForPreviousAlternative
.append(jump());
1208 // The next alternative is longer than the current one; check the difference.
1209 state
.linkAlternativeBacktracks(this);
1210 notEnoughInputForPreviousAlternative
.append(jumpIfNoAvailableInput(countToCheckForNextAlternative
- countCheckedForCurrentAlternative
));
1211 } else { // CASE 3: Both alternatives are the same length.
1212 ASSERT(countCheckedForCurrentAlternative
== countToCheckForNextAlternative
);
1214 // If the next alterative is the same length as this one, then no need to check the input -
1215 // if there was sufficent input to run the current alternative then there is sufficient
1216 // input to run the next one; if not, there isn't.
1217 state
.linkAlternativeBacktracks(this);
1220 state
.checkedTotal
-= countCheckedForCurrentAlternative
;
1221 countCheckedForCurrentAlternative
= countToCheckForNextAlternative
;
1222 state
.checkedTotal
+= countCheckedForCurrentAlternative
;
1226 // If we get here, all Alternatives failed...
1228 state
.checkedTotal
-= countCheckedForCurrentAlternative
;
1230 // How much more input need there be to be able to retry from the first alternative?
1232 // /yarr_jit/ or /wrec|pcre/
1233 // In these examples we need check for one more input before looping.
1235 // In this case we need check for 5 more input to loop (+4 to allow for the first alterative
1236 // being four longer than the last alternative checked, and another +1 to effectively move
1237 // the start position along by one).
1238 // /yarr|rules/ or /wrec|notsomuch/
1239 // In these examples, provided that there was sufficient input to have just been matching for
1240 // the second alternative we can loop without checking for available input (since the second
1241 // alternative is longer than the first). In the latter example we need to decrement index
1242 // (by 4) so the start position is only progressed by 1 from the last iteration.
1243 int incrementForNextIter
= (countToCheckForFirstAlternative
- countCheckedForCurrentAlternative
) + 1;
1245 // First, deal with the cases where there was sufficient input to try the last alternative.
1246 if (incrementForNextIter
> 0) // We need to check for more input anyway, fall through to the checking below.
1247 state
.linkAlternativeBacktracks(this);
1248 else if (m_pattern
.m_body
->m_hasFixedSize
&& !incrementForNextIter
) // No need to update anything, link these backtracks straight to the to pof the loop!
1249 state
.linkAlternativeBacktracksTo(firstAlternativeInputChecked
, this);
1250 else { // no need to check the input, but we do have some bookkeeping to do first.
1251 state
.linkAlternativeBacktracks(this);
1253 // Where necessary update our preserved start position.
1254 if (!m_pattern
.m_body
->m_hasFixedSize
) {
1256 sub32(Imm32(countCheckedForCurrentAlternative
- 1), regT0
);
1257 poke(regT0
, m_pattern
.m_body
->m_callFrameSize
);
1260 // Update index if necessary, and loop (without checking).
1261 if (incrementForNextIter
)
1262 add32(Imm32(incrementForNextIter
), index
);
1263 jump().linkTo(firstAlternativeInputChecked
, this);
1266 notEnoughInputForPreviousAlternative
.link(this);
1267 // Update our idea of the start position, if we're tracking this.
1268 if (!m_pattern
.m_body
->m_hasFixedSize
) {
1269 if (countCheckedForCurrentAlternative
- 1) {
1271 sub32(Imm32(countCheckedForCurrentAlternative
- 1), regT0
);
1272 poke(regT0
, m_pattern
.m_body
->m_callFrameSize
);
1274 poke(index
, m_pattern
.m_body
->m_callFrameSize
);
1276 // Check if there is sufficent input to run the first alternative again.
1277 jumpIfAvailableInput(incrementForNextIter
).linkTo(firstAlternativeInputChecked
, this);
1278 // No - insufficent input to run the first alteranative, are there any other alternatives we
1279 // might need to check? If so, the last check will have left the index incremented by
1280 // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
1281 // LESS input is available, to have the effect of just progressing the start position by 1
1282 // from the last iteration. If this check passes we can just jump up to the check associated
1283 // with the first alternative in the loop. This is a bit sad, since we'll end up trying the
1284 // first alternative again, and this check will fail (otherwise the check planted just above
1285 // here would have passed). This is a bit sad, however it saves trying to do something more
1286 // complex here in compilation, and in the common case we should end up coallescing the checks.
1288 // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
1289 // of the minimum-alternative-lengths. E.g. if I have two alternatives of length 200 and 150,
1290 // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
1291 // is sufficient input to run either alternative (constantly failing). If there had been only
1292 // one alternative, or if the shorter alternative had come first, we would have terminated
1294 if (hasShorterAlternatives
)
1295 jumpIfAvailableInput(-countToCheckForFirstAlternative
).linkTo(firstAlternative
, this);
1296 // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
1297 // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ...
1298 // but since we're about to return a failure this doesn't really matter!)
1300 unsigned frameSize
= m_pattern
.m_body
->m_callFrameSize
;
1301 if (!m_pattern
.m_body
->m_hasFixedSize
)
1304 addPtr(Imm32(frameSize
* sizeof(void*)), stackPointerRegister
);
1306 move(Imm32(-1), returnRegister
);
1311 void generateEnter()
1314 push(X86Registers::ebp
);
1315 move(stackPointerRegister
, X86Registers::ebp
);
1316 push(X86Registers::ebx
);
1318 push(X86Registers::ebp
);
1319 move(stackPointerRegister
, X86Registers::ebp
);
1320 // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
1321 push(X86Registers::ebx
);
1322 push(X86Registers::edi
);
1323 push(X86Registers::esi
);
1324 // load output into edi (2 = saved ebp + return address).
1326 loadPtr(Address(X86Registers::ebp
, 2 * sizeof(void*)), input
);
1327 loadPtr(Address(X86Registers::ebp
, 3 * sizeof(void*)), index
);
1328 loadPtr(Address(X86Registers::ebp
, 4 * sizeof(void*)), length
);
1329 loadPtr(Address(X86Registers::ebp
, 5 * sizeof(void*)), output
);
1331 loadPtr(Address(X86Registers::ebp
, 2 * sizeof(void*)), output
);
1334 push(ARMRegisters::r4
);
1335 push(ARMRegisters::r5
);
1336 push(ARMRegisters::r6
);
1337 move(ARMRegisters::r3
, output
);
1343 void generateReturn()
1346 pop(X86Registers::ebx
);
1347 pop(X86Registers::ebp
);
1349 pop(X86Registers::esi
);
1350 pop(X86Registers::edi
);
1351 pop(X86Registers::ebx
);
1352 pop(X86Registers::ebp
);
1354 pop(ARMRegisters::r6
);
1355 pop(ARMRegisters::r5
);
1356 pop(ARMRegisters::r4
);
1364 RegexGenerator(RegexPattern
& pattern
)
1365 : m_pattern(pattern
)
1373 // TODO: do I really want this on the stack?
1374 if (!m_pattern
.m_body
->m_hasFixedSize
)
1377 if (m_pattern
.m_body
->m_callFrameSize
)
1378 subPtr(Imm32(m_pattern
.m_body
->m_callFrameSize
* sizeof(void*)), stackPointerRegister
);
1380 generateDisjunction(m_pattern
.m_body
);
1383 void compile(JSGlobalData
* globalData
, RegexCodeBlock
& jitObject
)
1387 LinkBuffer
patchBuffer(this, globalData
->executableAllocator
.poolForSize(size()));
1389 for (unsigned i
= 0; i
< m_backtrackRecords
.size(); ++i
)
1390 patchBuffer
.patch(m_backtrackRecords
[i
].dataLabel
, patchBuffer
.locationOf(m_backtrackRecords
[i
].backtrackLocation
));
1392 jitObject
.set(patchBuffer
.finalizeCode());
1396 RegexPattern
& m_pattern
;
1397 Vector
<AlternativeBacktrackRecord
> m_backtrackRecords
;
1400 void jitCompileRegex(JSGlobalData
* globalData
, RegexCodeBlock
& jitObject
, const UString
& patternString
, unsigned& numSubpatterns
, const char*& error
, bool ignoreCase
, bool multiline
)
1402 RegexPattern
pattern(ignoreCase
, multiline
);
1403 if ((error
= compileRegex(patternString
, pattern
)))
1405 numSubpatterns
= pattern
.m_numSubpatterns
;
1407 if (!pattern
.m_shouldFallBack
&& globalData
->canUseJIT()) {
1408 RegexGenerator
generator(pattern
);
1409 generator
.compile(globalData
, jitObject
);
1413 JSRegExpIgnoreCaseOption ignoreCaseOption
= ignoreCase
? JSRegExpIgnoreCase
: JSRegExpDoNotIgnoreCase
;
1414 JSRegExpMultilineOption multilineOption
= multiline
? JSRegExpMultiline
: JSRegExpSingleLine
;
1415 jitObject
.setFallback(jsRegExpCompile(reinterpret_cast<const UChar
*>(patternString
.data()), patternString
.size(), ignoreCaseOption
, multilineOption
, &numSubpatterns
, &error
));