2 * Copyright (C) 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include "ASCIICType.h"
30 #include "JSGlobalData.h"
31 #include "LinkBuffer.h"
32 #include "MacroAssembler.h"
33 #include "RegExpCache.h"
34 #include "RegexCompiler.h"
36 #include "pcre.h" // temporary, remove when fallback is removed.
42 namespace JSC
{ namespace Yarr
{
44 class RegexGenerator
: private MacroAssembler
{
45 friend void jitCompileRegex(JSGlobalData
* globalData
, RegexCodeBlock
& jitObject
, const UString
& pattern
, unsigned& numSubpatterns
, const char*& error
, bool ignoreCase
, bool multiline
);
48 static const RegisterID input
= ARMRegisters::r0
;
49 static const RegisterID index
= ARMRegisters::r1
;
50 static const RegisterID length
= ARMRegisters::r2
;
51 static const RegisterID output
= ARMRegisters::r4
;
53 static const RegisterID regT0
= ARMRegisters::r5
;
54 static const RegisterID regT1
= ARMRegisters::r6
;
56 static const RegisterID returnRegister
= ARMRegisters::r0
;
58 static const RegisterID input
= MIPSRegisters::a0
;
59 static const RegisterID index
= MIPSRegisters::a1
;
60 static const RegisterID length
= MIPSRegisters::a2
;
61 static const RegisterID output
= MIPSRegisters::a3
;
63 static const RegisterID regT0
= MIPSRegisters::t4
;
64 static const RegisterID regT1
= MIPSRegisters::t5
;
66 static const RegisterID returnRegister
= MIPSRegisters::v0
;
68 static const RegisterID input
= X86Registers::eax
;
69 static const RegisterID index
= X86Registers::edx
;
70 static const RegisterID length
= X86Registers::ecx
;
71 static const RegisterID output
= X86Registers::edi
;
73 static const RegisterID regT0
= X86Registers::ebx
;
74 static const RegisterID regT1
= X86Registers::esi
;
76 static const RegisterID returnRegister
= X86Registers::eax
;
78 static const RegisterID input
= X86Registers::edi
;
79 static const RegisterID index
= X86Registers::esi
;
80 static const RegisterID length
= X86Registers::edx
;
81 static const RegisterID output
= X86Registers::ecx
;
83 static const RegisterID regT0
= X86Registers::eax
;
84 static const RegisterID regT1
= X86Registers::ebx
;
86 static const RegisterID returnRegister
= X86Registers::eax
;
89 void optimizeAlternative(PatternAlternative
* alternative
)
91 if (!alternative
->m_terms
.size())
94 for (unsigned i
= 0; i
< alternative
->m_terms
.size() - 1; ++i
) {
95 PatternTerm
& term
= alternative
->m_terms
[i
];
96 PatternTerm
& nextTerm
= alternative
->m_terms
[i
+ 1];
98 if ((term
.type
== PatternTerm::TypeCharacterClass
)
99 && (term
.quantityType
== QuantifierFixedCount
)
100 && (nextTerm
.type
== PatternTerm::TypePatternCharacter
)
101 && (nextTerm
.quantityType
== QuantifierFixedCount
)) {
102 PatternTerm termCopy
= term
;
103 alternative
->m_terms
[i
] = nextTerm
;
104 alternative
->m_terms
[i
+ 1] = termCopy
;
109 void matchCharacterClassRange(RegisterID character
, JumpList
& failures
, JumpList
& matchDest
, const CharacterRange
* ranges
, unsigned count
, unsigned* matchIndex
, const UChar
* matches
, unsigned matchCount
)
112 // pick which range we're going to generate
113 int which
= count
>> 1;
114 char lo
= ranges
[which
].begin
;
115 char hi
= ranges
[which
].end
;
117 // check if there are any ranges or matches below lo. If not, just jl to failure -
118 // if there is anything else to check, check that first, if it falls through jmp to failure.
119 if ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] < lo
)) {
120 Jump loOrAbove
= branch32(GreaterThanOrEqual
, character
, Imm32((unsigned short)lo
));
122 // generate code for all ranges before this one
124 matchCharacterClassRange(character
, failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
);
126 while ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] < lo
)) {
127 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)matches
[*matchIndex
])));
130 failures
.append(jump());
132 loOrAbove
.link(this);
134 Jump loOrAbove
= branch32(GreaterThanOrEqual
, character
, Imm32((unsigned short)lo
));
136 matchCharacterClassRange(character
, failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
);
137 failures
.append(jump());
139 loOrAbove
.link(this);
141 failures
.append(branch32(LessThan
, character
, Imm32((unsigned short)lo
)));
143 while ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] <= hi
))
146 matchDest
.append(branch32(LessThanOrEqual
, character
, Imm32((unsigned short)hi
)));
147 // fall through to here, the value is above hi.
149 // shuffle along & loop around if there are any more matches to handle.
150 unsigned next
= which
+ 1;
156 void matchCharacterClass(RegisterID character
, JumpList
& matchDest
, const CharacterClass
* charClass
)
158 if (charClass
->m_table
) {
159 ExtendedAddress
tableEntry(character
, reinterpret_cast<intptr_t>(charClass
->m_table
->m_table
));
160 matchDest
.append(branchTest8(charClass
->m_table
->m_inverted
? Zero
: NonZero
, tableEntry
));
164 if (charClass
->m_matchesUnicode
.size() || charClass
->m_rangesUnicode
.size()) {
165 Jump isAscii
= branch32(LessThanOrEqual
, character
, Imm32(0x7f));
167 if (charClass
->m_matchesUnicode
.size()) {
168 for (unsigned i
= 0; i
< charClass
->m_matchesUnicode
.size(); ++i
) {
169 UChar ch
= charClass
->m_matchesUnicode
[i
];
170 matchDest
.append(branch32(Equal
, character
, Imm32(ch
)));
174 if (charClass
->m_rangesUnicode
.size()) {
175 for (unsigned i
= 0; i
< charClass
->m_rangesUnicode
.size(); ++i
) {
176 UChar lo
= charClass
->m_rangesUnicode
[i
].begin
;
177 UChar hi
= charClass
->m_rangesUnicode
[i
].end
;
179 Jump below
= branch32(LessThan
, character
, Imm32(lo
));
180 matchDest
.append(branch32(LessThanOrEqual
, character
, Imm32(hi
)));
185 unicodeFail
= jump();
189 if (charClass
->m_ranges
.size()) {
190 unsigned matchIndex
= 0;
192 matchCharacterClassRange(character
, failures
, matchDest
, charClass
->m_ranges
.begin(), charClass
->m_ranges
.size(), &matchIndex
, charClass
->m_matches
.begin(), charClass
->m_matches
.size());
193 while (matchIndex
< charClass
->m_matches
.size())
194 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)charClass
->m_matches
[matchIndex
++])));
197 } else if (charClass
->m_matches
.size()) {
198 // optimization: gather 'a','A' etc back together, can mask & test once.
199 Vector
<char> matchesAZaz
;
201 for (unsigned i
= 0; i
< charClass
->m_matches
.size(); ++i
) {
202 char ch
= charClass
->m_matches
[i
];
203 if (m_pattern
.m_ignoreCase
) {
204 if (isASCIILower(ch
)) {
205 matchesAZaz
.append(ch
);
208 if (isASCIIUpper(ch
))
211 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)ch
)));
214 if (unsigned countAZaz
= matchesAZaz
.size()) {
215 or32(Imm32(32), character
);
216 for (unsigned i
= 0; i
< countAZaz
; ++i
)
217 matchDest
.append(branch32(Equal
, character
, Imm32(matchesAZaz
[i
])));
221 if (charClass
->m_matchesUnicode
.size() || charClass
->m_rangesUnicode
.size())
222 unicodeFail
.link(this);
225 // Jumps if input not available; will have (incorrectly) incremented already!
226 Jump
jumpIfNoAvailableInput(unsigned countToCheck
)
228 add32(Imm32(countToCheck
), index
);
229 return branch32(Above
, index
, length
);
232 Jump
jumpIfAvailableInput(unsigned countToCheck
)
234 add32(Imm32(countToCheck
), index
);
235 return branch32(BelowOrEqual
, index
, length
);
240 return branch32(BelowOrEqual
, index
, length
);
245 return branch32(Equal
, index
, length
);
248 Jump
notAtEndOfInput()
250 return branch32(NotEqual
, index
, length
);
253 Jump
jumpIfCharEquals(UChar ch
, int inputPosition
)
255 return branch16(Equal
, BaseIndex(input
, index
, TimesTwo
, inputPosition
* sizeof(UChar
)), Imm32(ch
));
258 Jump
jumpIfCharNotEquals(UChar ch
, int inputPosition
)
260 return branch16(NotEqual
, BaseIndex(input
, index
, TimesTwo
, inputPosition
* sizeof(UChar
)), Imm32(ch
));
263 void readCharacter(int inputPosition
, RegisterID reg
)
265 load16(BaseIndex(input
, index
, TimesTwo
, inputPosition
* sizeof(UChar
)), reg
);
268 void storeToFrame(RegisterID reg
, unsigned frameLocation
)
270 poke(reg
, frameLocation
);
273 void storeToFrame(Imm32 imm
, unsigned frameLocation
)
275 poke(imm
, frameLocation
);
278 DataLabelPtr
storeToFrameWithPatch(unsigned frameLocation
)
280 return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister
, frameLocation
* sizeof(void*)));
283 void loadFromFrame(unsigned frameLocation
, RegisterID reg
)
285 peek(reg
, frameLocation
);
288 void loadFromFrameAndJump(unsigned frameLocation
)
290 jump(Address(stackPointerRegister
, frameLocation
* sizeof(void*)));
293 struct AlternativeBacktrackRecord
{
294 DataLabelPtr dataLabel
;
295 Label backtrackLocation
;
297 AlternativeBacktrackRecord(DataLabelPtr dataLabel
, Label backtrackLocation
)
298 : dataLabel(dataLabel
)
299 , backtrackLocation(backtrackLocation
)
304 struct TermGenerationState
{
305 TermGenerationState(PatternDisjunction
* disjunction
, unsigned checkedTotal
)
306 : disjunction(disjunction
)
307 , checkedTotal(checkedTotal
)
311 void resetAlternative()
313 isBackTrackGenerated
= false;
316 bool alternativeValid()
318 return alt
< disjunction
->m_alternatives
.size();
320 void nextAlternative()
324 PatternAlternative
* alternative()
326 return disjunction
->m_alternatives
[alt
];
331 ASSERT(alternativeValid());
336 ASSERT(alternativeValid());
337 return t
< alternative()->m_terms
.size();
341 ASSERT(alternativeValid());
346 ASSERT(alternativeValid());
347 return alternative()->m_terms
[t
];
350 PatternTerm
& lookaheadTerm()
352 ASSERT(alternativeValid());
353 ASSERT((t
+ 1) < alternative()->m_terms
.size());
354 return alternative()->m_terms
[t
+ 1];
356 bool isSinglePatternCharacterLookaheadTerm()
358 ASSERT(alternativeValid());
359 return ((t
+ 1) < alternative()->m_terms
.size())
360 && (lookaheadTerm().type
== PatternTerm::TypePatternCharacter
)
361 && (lookaheadTerm().quantityType
== QuantifierFixedCount
)
362 && (lookaheadTerm().quantityCount
== 1);
367 return term().inputPosition
- checkedTotal
;
370 void jumpToBacktrack(Jump jump
, MacroAssembler
* masm
)
372 if (isBackTrackGenerated
)
373 jump
.linkTo(backtrackLabel
, masm
);
375 backTrackJumps
.append(jump
);
377 void jumpToBacktrack(JumpList
& jumps
, MacroAssembler
* masm
)
379 if (isBackTrackGenerated
)
380 jumps
.linkTo(backtrackLabel
, masm
);
382 backTrackJumps
.append(jumps
);
384 bool plantJumpToBacktrackIfExists(MacroAssembler
* masm
)
386 if (isBackTrackGenerated
) {
387 masm
->jump(backtrackLabel
);
392 void addBacktrackJump(Jump jump
)
394 backTrackJumps
.append(jump
);
396 void setBacktrackGenerated(Label label
)
398 isBackTrackGenerated
= true;
399 backtrackLabel
= label
;
401 void linkAlternativeBacktracks(MacroAssembler
* masm
)
403 isBackTrackGenerated
= false;
404 backTrackJumps
.link(masm
);
406 void linkAlternativeBacktracksTo(Label label
, MacroAssembler
* masm
)
408 isBackTrackGenerated
= false;
409 backTrackJumps
.linkTo(label
, masm
);
411 void propagateBacktrackingFrom(TermGenerationState
& nestedParenthesesState
, MacroAssembler
* masm
)
413 jumpToBacktrack(nestedParenthesesState
.backTrackJumps
, masm
);
414 if (nestedParenthesesState
.isBackTrackGenerated
)
415 setBacktrackGenerated(nestedParenthesesState
.backtrackLabel
);
418 PatternDisjunction
* disjunction
;
423 JumpList backTrackJumps
;
424 Label backtrackLabel
;
425 bool isBackTrackGenerated
;
428 void generateAssertionBOL(TermGenerationState
& state
)
430 PatternTerm
& term
= state
.term();
432 if (m_pattern
.m_multiline
) {
433 const RegisterID character
= regT0
;
436 if (!term
.inputPosition
)
437 matchDest
.append(branch32(Equal
, index
, Imm32(state
.checkedTotal
)));
439 readCharacter(state
.inputOffset() - 1, character
);
440 matchCharacterClass(character
, matchDest
, m_pattern
.newlineCharacterClass());
441 state
.jumpToBacktrack(jump(), this);
443 matchDest
.link(this);
445 // Erk, really should poison out these alternatives early. :-/
446 if (term
.inputPosition
)
447 state
.jumpToBacktrack(jump(), this);
449 state
.jumpToBacktrack(branch32(NotEqual
, index
, Imm32(state
.checkedTotal
)), this);
453 void generateAssertionEOL(TermGenerationState
& state
)
455 PatternTerm
& term
= state
.term();
457 if (m_pattern
.m_multiline
) {
458 const RegisterID character
= regT0
;
461 if (term
.inputPosition
== state
.checkedTotal
)
462 matchDest
.append(atEndOfInput());
464 readCharacter(state
.inputOffset(), character
);
465 matchCharacterClass(character
, matchDest
, m_pattern
.newlineCharacterClass());
466 state
.jumpToBacktrack(jump(), this);
468 matchDest
.link(this);
470 if (term
.inputPosition
== state
.checkedTotal
)
471 state
.jumpToBacktrack(notAtEndOfInput(), this);
472 // Erk, really should poison out these alternatives early. :-/
474 state
.jumpToBacktrack(jump(), this);
478 // Also falls though on nextIsNotWordChar.
479 void matchAssertionWordchar(TermGenerationState
& state
, JumpList
& nextIsWordChar
, JumpList
& nextIsNotWordChar
)
481 const RegisterID character
= regT0
;
482 PatternTerm
& term
= state
.term();
484 if (term
.inputPosition
== state
.checkedTotal
)
485 nextIsNotWordChar
.append(atEndOfInput());
487 readCharacter(state
.inputOffset(), character
);
488 matchCharacterClass(character
, nextIsWordChar
, m_pattern
.wordcharCharacterClass());
491 void generateAssertionWordBoundary(TermGenerationState
& state
)
493 const RegisterID character
= regT0
;
494 PatternTerm
& term
= state
.term();
498 if (!term
.inputPosition
)
499 atBegin
= branch32(Equal
, index
, Imm32(state
.checkedTotal
));
500 readCharacter(state
.inputOffset() - 1, character
);
501 matchCharacterClass(character
, matchDest
, m_pattern
.wordcharCharacterClass());
502 if (!term
.inputPosition
)
505 // We fall through to here if the last character was not a wordchar.
506 JumpList nonWordCharThenWordChar
;
507 JumpList nonWordCharThenNonWordChar
;
508 if (term
.invertOrCapture
) {
509 matchAssertionWordchar(state
, nonWordCharThenNonWordChar
, nonWordCharThenWordChar
);
510 nonWordCharThenWordChar
.append(jump());
512 matchAssertionWordchar(state
, nonWordCharThenWordChar
, nonWordCharThenNonWordChar
);
513 nonWordCharThenNonWordChar
.append(jump());
515 state
.jumpToBacktrack(nonWordCharThenNonWordChar
, this);
517 // We jump here if the last character was a wordchar.
518 matchDest
.link(this);
519 JumpList wordCharThenWordChar
;
520 JumpList wordCharThenNonWordChar
;
521 if (term
.invertOrCapture
) {
522 matchAssertionWordchar(state
, wordCharThenNonWordChar
, wordCharThenWordChar
);
523 wordCharThenWordChar
.append(jump());
525 matchAssertionWordchar(state
, wordCharThenWordChar
, wordCharThenNonWordChar
);
526 // This can fall-though!
529 state
.jumpToBacktrack(wordCharThenWordChar
, this);
531 nonWordCharThenWordChar
.link(this);
532 wordCharThenNonWordChar
.link(this);
535 void generatePatternCharacterSingle(TermGenerationState
& state
)
537 const RegisterID character
= regT0
;
538 UChar ch
= state
.term().patternCharacter
;
540 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
541 readCharacter(state
.inputOffset(), character
);
542 or32(Imm32(32), character
);
543 state
.jumpToBacktrack(branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))), this);
545 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
546 state
.jumpToBacktrack(jumpIfCharNotEquals(ch
, state
.inputOffset()), this);
550 void generatePatternCharacterPair(TermGenerationState
& state
)
552 const RegisterID character
= regT0
;
553 UChar ch1
= state
.term().patternCharacter
;
554 UChar ch2
= state
.lookaheadTerm().patternCharacter
;
557 int chPair
= ch1
| (ch2
<< 16);
559 if (m_pattern
.m_ignoreCase
) {
560 if (isASCIIAlpha(ch1
))
562 if (isASCIIAlpha(ch2
))
567 load32WithUnalignedHalfWords(BaseIndex(input
, index
, TimesTwo
, state
.inputOffset() * sizeof(UChar
)), character
);
568 or32(Imm32(mask
), character
);
569 state
.jumpToBacktrack(branch32(NotEqual
, character
, Imm32(chPair
| mask
)), this);
571 state
.jumpToBacktrack(branch32WithUnalignedHalfWords(NotEqual
, BaseIndex(input
, index
, TimesTwo
, state
.inputOffset() * sizeof(UChar
)), Imm32(chPair
)), this);
574 void generatePatternCharacterFixed(TermGenerationState
& state
)
576 const RegisterID character
= regT0
;
577 const RegisterID countRegister
= regT1
;
578 PatternTerm
& term
= state
.term();
579 UChar ch
= term
.patternCharacter
;
581 move(index
, countRegister
);
582 sub32(Imm32(term
.quantityCount
), countRegister
);
585 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
586 load16(BaseIndex(input
, countRegister
, TimesTwo
, (state
.inputOffset() + term
.quantityCount
) * sizeof(UChar
)), character
);
587 or32(Imm32(32), character
);
588 state
.jumpToBacktrack(branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))), this);
590 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
591 state
.jumpToBacktrack(branch16(NotEqual
, BaseIndex(input
, countRegister
, TimesTwo
, (state
.inputOffset() + term
.quantityCount
) * sizeof(UChar
)), Imm32(ch
)), this);
593 add32(Imm32(1), countRegister
);
594 branch32(NotEqual
, countRegister
, index
).linkTo(loop
, this);
597 void generatePatternCharacterGreedy(TermGenerationState
& state
)
599 const RegisterID character
= regT0
;
600 const RegisterID countRegister
= regT1
;
601 PatternTerm
& term
= state
.term();
602 UChar ch
= term
.patternCharacter
;
604 move(Imm32(0), countRegister
);
608 failures
.append(atEndOfInput());
609 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
610 readCharacter(state
.inputOffset(), character
);
611 or32(Imm32(32), character
);
612 failures
.append(branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))));
614 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
615 failures
.append(jumpIfCharNotEquals(ch
, state
.inputOffset()));
618 add32(Imm32(1), countRegister
);
619 add32(Imm32(1), index
);
620 if (term
.quantityCount
!= 0xffffffff)
621 branch32(NotEqual
, countRegister
, Imm32(term
.quantityCount
)).linkTo(loop
, this);
625 failures
.append(jump());
627 Label
backtrackBegin(this);
628 loadFromFrame(term
.frameLocation
, countRegister
);
629 state
.jumpToBacktrack(branchTest32(Zero
, countRegister
), this);
630 sub32(Imm32(1), countRegister
);
631 sub32(Imm32(1), index
);
635 storeToFrame(countRegister
, term
.frameLocation
);
637 state
.setBacktrackGenerated(backtrackBegin
);
640 void generatePatternCharacterNonGreedy(TermGenerationState
& state
)
642 const RegisterID character
= regT0
;
643 const RegisterID countRegister
= regT1
;
644 PatternTerm
& term
= state
.term();
645 UChar ch
= term
.patternCharacter
;
647 move(Imm32(0), countRegister
);
649 Jump firstTimeDoNothing
= jump();
651 Label
hardFail(this);
652 sub32(countRegister
, index
);
653 state
.jumpToBacktrack(jump(), this);
655 Label
backtrackBegin(this);
656 loadFromFrame(term
.frameLocation
, countRegister
);
658 atEndOfInput().linkTo(hardFail
, this);
659 if (term
.quantityCount
!= 0xffffffff)
660 branch32(Equal
, countRegister
, Imm32(term
.quantityCount
), hardFail
);
661 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
662 readCharacter(state
.inputOffset(), character
);
663 or32(Imm32(32), character
);
664 branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))).linkTo(hardFail
, this);
666 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
667 jumpIfCharNotEquals(ch
, state
.inputOffset()).linkTo(hardFail
, this);
670 add32(Imm32(1), countRegister
);
671 add32(Imm32(1), index
);
673 firstTimeDoNothing
.link(this);
674 storeToFrame(countRegister
, term
.frameLocation
);
676 state
.setBacktrackGenerated(backtrackBegin
);
679 void generateCharacterClassSingle(TermGenerationState
& state
)
681 const RegisterID character
= regT0
;
682 PatternTerm
& term
= state
.term();
685 readCharacter(state
.inputOffset(), character
);
686 matchCharacterClass(character
, matchDest
, term
.characterClass
);
688 if (term
.invertOrCapture
)
689 state
.jumpToBacktrack(matchDest
, this);
691 state
.jumpToBacktrack(jump(), this);
692 matchDest
.link(this);
696 void generateCharacterClassFixed(TermGenerationState
& state
)
698 const RegisterID character
= regT0
;
699 const RegisterID countRegister
= regT1
;
700 PatternTerm
& term
= state
.term();
702 move(index
, countRegister
);
703 sub32(Imm32(term
.quantityCount
), countRegister
);
707 load16(BaseIndex(input
, countRegister
, TimesTwo
, (state
.inputOffset() + term
.quantityCount
) * sizeof(UChar
)), character
);
708 matchCharacterClass(character
, matchDest
, term
.characterClass
);
710 if (term
.invertOrCapture
)
711 state
.jumpToBacktrack(matchDest
, this);
713 state
.jumpToBacktrack(jump(), this);
714 matchDest
.link(this);
717 add32(Imm32(1), countRegister
);
718 branch32(NotEqual
, countRegister
, index
).linkTo(loop
, this);
721 void generateCharacterClassGreedy(TermGenerationState
& state
)
723 const RegisterID character
= regT0
;
724 const RegisterID countRegister
= regT1
;
725 PatternTerm
& term
= state
.term();
727 move(Imm32(0), countRegister
);
731 failures
.append(atEndOfInput());
733 if (term
.invertOrCapture
) {
734 readCharacter(state
.inputOffset(), character
);
735 matchCharacterClass(character
, failures
, term
.characterClass
);
738 readCharacter(state
.inputOffset(), character
);
739 matchCharacterClass(character
, matchDest
, term
.characterClass
);
740 failures
.append(jump());
741 matchDest
.link(this);
744 add32(Imm32(1), countRegister
);
745 add32(Imm32(1), index
);
746 if (term
.quantityCount
!= 0xffffffff)
747 branch32(NotEqual
, countRegister
, Imm32(term
.quantityCount
)).linkTo(loop
, this);
751 failures
.append(jump());
753 Label
backtrackBegin(this);
754 loadFromFrame(term
.frameLocation
, countRegister
);
755 state
.jumpToBacktrack(branchTest32(Zero
, countRegister
), this);
756 sub32(Imm32(1), countRegister
);
757 sub32(Imm32(1), index
);
761 storeToFrame(countRegister
, term
.frameLocation
);
763 state
.setBacktrackGenerated(backtrackBegin
);
766 void generateCharacterClassNonGreedy(TermGenerationState
& state
)
768 const RegisterID character
= regT0
;
769 const RegisterID countRegister
= regT1
;
770 PatternTerm
& term
= state
.term();
772 move(Imm32(0), countRegister
);
774 Jump firstTimeDoNothing
= jump();
776 Label
hardFail(this);
777 sub32(countRegister
, index
);
778 state
.jumpToBacktrack(jump(), this);
780 Label
backtrackBegin(this);
781 loadFromFrame(term
.frameLocation
, countRegister
);
783 atEndOfInput().linkTo(hardFail
, this);
784 branch32(Equal
, countRegister
, Imm32(term
.quantityCount
), hardFail
);
787 readCharacter(state
.inputOffset(), character
);
788 matchCharacterClass(character
, matchDest
, term
.characterClass
);
790 if (term
.invertOrCapture
)
791 matchDest
.linkTo(hardFail
, this);
794 matchDest
.link(this);
797 add32(Imm32(1), countRegister
);
798 add32(Imm32(1), index
);
800 firstTimeDoNothing
.link(this);
801 storeToFrame(countRegister
, term
.frameLocation
);
803 state
.setBacktrackGenerated(backtrackBegin
);
806 void generateParenthesesDisjunction(PatternTerm
& parenthesesTerm
, TermGenerationState
& state
, unsigned alternativeFrameLocation
)
808 ASSERT((parenthesesTerm
.type
== PatternTerm::TypeParenthesesSubpattern
) || (parenthesesTerm
.type
== PatternTerm::TypeParentheticalAssertion
));
809 ASSERT(parenthesesTerm
.quantityCount
== 1);
811 PatternDisjunction
* disjunction
= parenthesesTerm
.parentheses
.disjunction
;
812 unsigned preCheckedCount
= ((parenthesesTerm
.quantityType
== QuantifierFixedCount
) && (parenthesesTerm
.type
!= PatternTerm::TypeParentheticalAssertion
)) ? disjunction
->m_minimumSize
: 0;
814 if (disjunction
->m_alternatives
.size() == 1) {
815 state
.resetAlternative();
816 ASSERT(state
.alternativeValid());
817 PatternAlternative
* alternative
= state
.alternative();
818 optimizeAlternative(alternative
);
820 int countToCheck
= alternative
->m_minimumSize
- preCheckedCount
;
822 ASSERT((parenthesesTerm
.type
== PatternTerm::TypeParentheticalAssertion
) || (parenthesesTerm
.quantityType
!= QuantifierFixedCount
));
824 // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists'
825 // will be forced to always trampoline into here, just to decrement the index.
829 Label
backtrackBegin(this);
830 sub32(Imm32(countToCheck
), index
);
831 state
.addBacktrackJump(jump());
835 state
.setBacktrackGenerated(backtrackBegin
);
837 state
.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck
), this);
838 state
.checkedTotal
+= countToCheck
;
841 for (state
.resetTerm(); state
.termValid(); state
.nextTerm())
844 state
.checkedTotal
-= countToCheck
;
848 for (state
.resetAlternative(); state
.alternativeValid(); state
.nextAlternative()) {
850 PatternAlternative
* alternative
= state
.alternative();
851 optimizeAlternative(alternative
);
853 ASSERT(alternative
->m_minimumSize
>= preCheckedCount
);
854 int countToCheck
= alternative
->m_minimumSize
- preCheckedCount
;
856 state
.addBacktrackJump(jumpIfNoAvailableInput(countToCheck
));
857 state
.checkedTotal
+= countToCheck
;
860 for (state
.resetTerm(); state
.termValid(); state
.nextTerm())
863 // Matched an alternative.
864 DataLabelPtr dataLabel
= storeToFrameWithPatch(alternativeFrameLocation
);
865 successes
.append(jump());
867 // Alternative did not match.
868 Label
backtrackLocation(this);
870 // Can we backtrack the alternative? - if so, do so. If not, just fall through to the next one.
871 state
.plantJumpToBacktrackIfExists(this);
873 state
.linkAlternativeBacktracks(this);
876 sub32(Imm32(countToCheck
), index
);
877 state
.checkedTotal
-= countToCheck
;
880 m_backtrackRecords
.append(AlternativeBacktrackRecord(dataLabel
, backtrackLocation
));
882 // We fall through to here when the last alternative fails.
883 // Add a backtrack out of here for the parenthese handling code to link up.
884 state
.addBacktrackJump(jump());
886 // Generate a trampoline for the parens code to backtrack to, to retry the
888 state
.setBacktrackGenerated(label());
889 loadFromFrameAndJump(alternativeFrameLocation
);
891 // FIXME: both of the above hooks are a little inefficient, in that you
892 // may end up trampolining here, just to trampoline back out to the
893 // parentheses code, or vice versa. We can probably eliminate a jump
894 // by restructuring, but coding this way for now for simplicity during
897 successes
.link(this);
901 void generateParenthesesSingle(TermGenerationState
& state
)
903 const RegisterID indexTemporary
= regT0
;
904 PatternTerm
& term
= state
.term();
905 PatternDisjunction
* disjunction
= term
.parentheses
.disjunction
;
906 ASSERT(term
.quantityCount
== 1);
908 unsigned preCheckedCount
= ((term
.quantityCount
== 1) && (term
.quantityType
== QuantifierFixedCount
)) ? disjunction
->m_minimumSize
: 0;
910 unsigned parenthesesFrameLocation
= term
.frameLocation
;
911 unsigned alternativeFrameLocation
= parenthesesFrameLocation
;
912 if (term
.quantityType
!= QuantifierFixedCount
)
913 alternativeFrameLocation
+= RegexStackSpaceForBackTrackInfoParenthesesOnce
;
915 // optimized case - no capture & no quantifier can be handled in a light-weight manner.
916 if (!term
.invertOrCapture
&& (term
.quantityType
== QuantifierFixedCount
)) {
917 TermGenerationState
parenthesesState(disjunction
, state
.checkedTotal
);
918 generateParenthesesDisjunction(state
.term(), parenthesesState
, alternativeFrameLocation
);
919 // this expects that any backtracks back out of the parentheses will be in the
920 // parenthesesState's backTrackJumps vector, and that if they need backtracking
921 // they will have set an entry point on the parenthesesState's backtrackLabel.
922 state
.propagateBacktrackingFrom(parenthesesState
, this);
924 Jump nonGreedySkipParentheses
;
925 Label nonGreedyTryParentheses
;
926 if (term
.quantityType
== QuantifierGreedy
)
927 storeToFrame(Imm32(1), parenthesesFrameLocation
);
928 else if (term
.quantityType
== QuantifierNonGreedy
) {
929 storeToFrame(Imm32(0), parenthesesFrameLocation
);
930 nonGreedySkipParentheses
= jump();
931 nonGreedyTryParentheses
= label();
932 storeToFrame(Imm32(1), parenthesesFrameLocation
);
935 // store the match start index
936 if (term
.invertOrCapture
) {
937 int inputOffset
= state
.inputOffset() - preCheckedCount
;
939 move(index
, indexTemporary
);
940 add32(Imm32(inputOffset
), indexTemporary
);
941 store32(indexTemporary
, Address(output
, (term
.parentheses
.subpatternId
<< 1) * sizeof(int)));
943 store32(index
, Address(output
, (term
.parentheses
.subpatternId
<< 1) * sizeof(int)));
946 // generate the body of the parentheses
947 TermGenerationState
parenthesesState(disjunction
, state
.checkedTotal
);
948 generateParenthesesDisjunction(state
.term(), parenthesesState
, alternativeFrameLocation
);
950 // store the match end index
951 if (term
.invertOrCapture
) {
952 int inputOffset
= state
.inputOffset();
954 move(index
, indexTemporary
);
955 add32(Imm32(state
.inputOffset()), indexTemporary
);
956 store32(indexTemporary
, Address(output
, ((term
.parentheses
.subpatternId
<< 1) + 1) * sizeof(int)));
958 store32(index
, Address(output
, ((term
.parentheses
.subpatternId
<< 1) + 1) * sizeof(int)));
960 Jump success
= jump();
962 // A failure AFTER the parens jumps here
963 Label
backtrackFromAfterParens(this);
965 if (term
.quantityType
== QuantifierGreedy
) {
966 // If this is zero we have now tested with both with and without the parens.
967 loadFromFrame(parenthesesFrameLocation
, indexTemporary
);
968 state
.jumpToBacktrack(branchTest32(Zero
, indexTemporary
), this);
969 } else if (term
.quantityType
== QuantifierNonGreedy
) {
970 // If this is zero we have now tested with both with and without the parens.
971 loadFromFrame(parenthesesFrameLocation
, indexTemporary
);
972 branchTest32(Zero
, indexTemporary
).linkTo(nonGreedyTryParentheses
, this);
975 parenthesesState
.plantJumpToBacktrackIfExists(this);
976 // A failure WITHIN the parens jumps here
977 parenthesesState
.linkAlternativeBacktracks(this);
978 if (term
.invertOrCapture
) {
979 store32(Imm32(-1), Address(output
, (term
.parentheses
.subpatternId
<< 1) * sizeof(int)));
980 store32(Imm32(-1), Address(output
, ((term
.parentheses
.subpatternId
<< 1) + 1) * sizeof(int)));
983 if (term
.quantityType
== QuantifierGreedy
)
984 storeToFrame(Imm32(0), parenthesesFrameLocation
);
986 state
.jumpToBacktrack(jump(), this);
988 state
.setBacktrackGenerated(backtrackFromAfterParens
);
989 if (term
.quantityType
== QuantifierNonGreedy
)
990 nonGreedySkipParentheses
.link(this);
995 void generateParentheticalAssertion(TermGenerationState
& state
)
997 PatternTerm
& term
= state
.term();
998 PatternDisjunction
* disjunction
= term
.parentheses
.disjunction
;
999 ASSERT(term
.quantityCount
== 1);
1000 ASSERT(term
.quantityType
== QuantifierFixedCount
);
1002 unsigned parenthesesFrameLocation
= term
.frameLocation
;
1003 unsigned alternativeFrameLocation
= parenthesesFrameLocation
+ RegexStackSpaceForBackTrackInfoParentheticalAssertion
;
1005 int countCheckedAfterAssertion
= state
.checkedTotal
- term
.inputPosition
;
1007 if (term
.invertOrCapture
) {
1009 storeToFrame(index
, parenthesesFrameLocation
);
1011 state
.checkedTotal
-= countCheckedAfterAssertion
;
1012 if (countCheckedAfterAssertion
)
1013 sub32(Imm32(countCheckedAfterAssertion
), index
);
1015 TermGenerationState
parenthesesState(disjunction
, state
.checkedTotal
);
1016 generateParenthesesDisjunction(state
.term(), parenthesesState
, alternativeFrameLocation
);
1017 // Success! - which means - Fail!
1018 loadFromFrame(parenthesesFrameLocation
, index
);
1019 state
.jumpToBacktrack(jump(), this);
1021 // And fail means success.
1022 parenthesesState
.linkAlternativeBacktracks(this);
1023 loadFromFrame(parenthesesFrameLocation
, index
);
1025 state
.checkedTotal
+= countCheckedAfterAssertion
;
1028 storeToFrame(index
, parenthesesFrameLocation
);
1030 state
.checkedTotal
-= countCheckedAfterAssertion
;
1031 if (countCheckedAfterAssertion
)
1032 sub32(Imm32(countCheckedAfterAssertion
), index
);
1034 TermGenerationState
parenthesesState(disjunction
, state
.checkedTotal
);
1035 generateParenthesesDisjunction(state
.term(), parenthesesState
, alternativeFrameLocation
);
1036 // Success! - which means - Success!
1037 loadFromFrame(parenthesesFrameLocation
, index
);
1038 Jump success
= jump();
1040 parenthesesState
.linkAlternativeBacktracks(this);
1041 loadFromFrame(parenthesesFrameLocation
, index
);
1042 state
.jumpToBacktrack(jump(), this);
1046 state
.checkedTotal
+= countCheckedAfterAssertion
;
1050 void generateTerm(TermGenerationState
& state
)
1052 PatternTerm
& term
= state
.term();
1054 switch (term
.type
) {
1055 case PatternTerm::TypeAssertionBOL
:
1056 generateAssertionBOL(state
);
1059 case PatternTerm::TypeAssertionEOL
:
1060 generateAssertionEOL(state
);
1063 case PatternTerm::TypeAssertionWordBoundary
:
1064 generateAssertionWordBoundary(state
);
1067 case PatternTerm::TypePatternCharacter
:
1068 switch (term
.quantityType
) {
1069 case QuantifierFixedCount
:
1070 if (term
.quantityCount
== 1) {
1071 if (state
.isSinglePatternCharacterLookaheadTerm() && (state
.lookaheadTerm().inputPosition
== (term
.inputPosition
+ 1))) {
1072 generatePatternCharacterPair(state
);
1075 generatePatternCharacterSingle(state
);
1077 generatePatternCharacterFixed(state
);
1079 case QuantifierGreedy
:
1080 generatePatternCharacterGreedy(state
);
1082 case QuantifierNonGreedy
:
1083 generatePatternCharacterNonGreedy(state
);
1088 case PatternTerm::TypeCharacterClass
:
1089 switch (term
.quantityType
) {
1090 case QuantifierFixedCount
:
1091 if (term
.quantityCount
== 1)
1092 generateCharacterClassSingle(state
);
1094 generateCharacterClassFixed(state
);
1096 case QuantifierGreedy
:
1097 generateCharacterClassGreedy(state
);
1099 case QuantifierNonGreedy
:
1100 generateCharacterClassNonGreedy(state
);
1105 case PatternTerm::TypeBackReference
:
1106 ASSERT_NOT_REACHED();
1109 case PatternTerm::TypeForwardReference
:
1112 case PatternTerm::TypeParenthesesSubpattern
:
1113 ASSERT((term
.quantityCount
== 1) && !term
.parentheses
.isCopy
); // must fallback to pcre before this point
1114 generateParenthesesSingle(state
);
1117 case PatternTerm::TypeParentheticalAssertion
:
1118 generateParentheticalAssertion(state
);
1123 void generateDisjunction(PatternDisjunction
* disjunction
)
1125 TermGenerationState
state(disjunction
, 0);
1126 state
.resetAlternative();
1128 // Plant a check to see if there is sufficient input available to run the first alternative.
1129 // Jumping back to the label 'firstAlternative' will get to this check, jumping to
1130 // 'firstAlternativeInputChecked' will jump directly to matching the alternative having
1131 // skipped this check.
1133 Label
firstAlternative(this);
1135 // check availability for the next alternative
1136 int countCheckedForCurrentAlternative
= 0;
1137 int countToCheckForFirstAlternative
= 0;
1138 bool hasShorterAlternatives
= false;
1139 JumpList notEnoughInputForPreviousAlternative
;
1141 if (state
.alternativeValid()) {
1142 PatternAlternative
* alternative
= state
.alternative();
1143 countToCheckForFirstAlternative
= alternative
->m_minimumSize
;
1144 state
.checkedTotal
+= countToCheckForFirstAlternative
;
1145 if (countToCheckForFirstAlternative
)
1146 notEnoughInputForPreviousAlternative
.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative
));
1147 countCheckedForCurrentAlternative
= countToCheckForFirstAlternative
;
1150 Label
firstAlternativeInputChecked(this);
1152 while (state
.alternativeValid()) {
1153 // Track whether any alternatives are shorter than the first one.
1154 hasShorterAlternatives
= hasShorterAlternatives
|| (countCheckedForCurrentAlternative
< countToCheckForFirstAlternative
);
1156 PatternAlternative
* alternative
= state
.alternative();
1157 optimizeAlternative(alternative
);
1159 for (state
.resetTerm(); state
.termValid(); state
.nextTerm())
1160 generateTerm(state
);
1162 // If we get here, the alternative matched.
1163 if (m_pattern
.m_body
->m_callFrameSize
)
1164 addPtr(Imm32(m_pattern
.m_body
->m_callFrameSize
* sizeof(void*)), stackPointerRegister
);
1166 ASSERT(index
!= returnRegister
);
1167 if (m_pattern
.m_body
->m_hasFixedSize
) {
1168 move(index
, returnRegister
);
1169 if (alternative
->m_minimumSize
)
1170 sub32(Imm32(alternative
->m_minimumSize
), returnRegister
);
1172 pop(returnRegister
);
1173 store32(index
, Address(output
, 4));
1174 store32(returnRegister
, output
);
1178 state
.nextAlternative();
1180 // if there are any more alternatives, plant the check for input before looping.
1181 if (state
.alternativeValid()) {
1182 PatternAlternative
* nextAlternative
= state
.alternative();
1183 int countToCheckForNextAlternative
= nextAlternative
->m_minimumSize
;
1185 if (countCheckedForCurrentAlternative
> countToCheckForNextAlternative
) { // CASE 1: current alternative was longer than the next one.
1186 // If we get here, there the last input checked failed.
1187 notEnoughInputForPreviousAlternative
.link(this);
1189 // Check if sufficent input available to run the next alternative
1190 notEnoughInputForPreviousAlternative
.append(jumpIfNoAvailableInput(countToCheckForNextAlternative
- countCheckedForCurrentAlternative
));
1191 // We are now in the correct state to enter the next alternative; this add is only required
1192 // to mirror and revert operation of the sub32, just below.
1193 add32(Imm32(countCheckedForCurrentAlternative
- countToCheckForNextAlternative
), index
);
1195 // If we get here, there the last input checked passed.
1196 state
.linkAlternativeBacktracks(this);
1197 // No need to check if we can run the next alternative, since it is shorter -
1198 // just update index.
1199 sub32(Imm32(countCheckedForCurrentAlternative
- countToCheckForNextAlternative
), index
);
1200 } else if (countCheckedForCurrentAlternative
< countToCheckForNextAlternative
) { // CASE 2: next alternative is longer than the current one.
1201 // If we get here, there the last input checked failed.
1202 // If there is insufficient input to run the current alternative, and the next alternative is longer,
1203 // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
1205 notEnoughInputForPreviousAlternative
.link(this);
1206 add32(Imm32(countToCheckForNextAlternative
- countCheckedForCurrentAlternative
), index
);
1207 notEnoughInputForPreviousAlternative
.append(jump());
1209 // The next alternative is longer than the current one; check the difference.
1210 state
.linkAlternativeBacktracks(this);
1211 notEnoughInputForPreviousAlternative
.append(jumpIfNoAvailableInput(countToCheckForNextAlternative
- countCheckedForCurrentAlternative
));
1212 } else { // CASE 3: Both alternatives are the same length.
1213 ASSERT(countCheckedForCurrentAlternative
== countToCheckForNextAlternative
);
1215 // If the next alterative is the same length as this one, then no need to check the input -
1216 // if there was sufficent input to run the current alternative then there is sufficient
1217 // input to run the next one; if not, there isn't.
1218 state
.linkAlternativeBacktracks(this);
1221 state
.checkedTotal
-= countCheckedForCurrentAlternative
;
1222 countCheckedForCurrentAlternative
= countToCheckForNextAlternative
;
1223 state
.checkedTotal
+= countCheckedForCurrentAlternative
;
1227 // If we get here, all Alternatives failed...
1229 state
.checkedTotal
-= countCheckedForCurrentAlternative
;
1231 // How much more input need there be to be able to retry from the first alternative?
1233 // /yarr_jit/ or /wrec|pcre/
1234 // In these examples we need check for one more input before looping.
1236 // In this case we need check for 5 more input to loop (+4 to allow for the first alterative
1237 // being four longer than the last alternative checked, and another +1 to effectively move
1238 // the start position along by one).
1239 // /yarr|rules/ or /wrec|notsomuch/
1240 // In these examples, provided that there was sufficient input to have just been matching for
1241 // the second alternative we can loop without checking for available input (since the second
1242 // alternative is longer than the first). In the latter example we need to decrement index
1243 // (by 4) so the start position is only progressed by 1 from the last iteration.
1244 int incrementForNextIter
= (countToCheckForFirstAlternative
- countCheckedForCurrentAlternative
) + 1;
1246 // First, deal with the cases where there was sufficient input to try the last alternative.
1247 if (incrementForNextIter
> 0) // We need to check for more input anyway, fall through to the checking below.
1248 state
.linkAlternativeBacktracks(this);
1249 else if (m_pattern
.m_body
->m_hasFixedSize
&& !incrementForNextIter
) // No need to update anything, link these backtracks straight to the to pof the loop!
1250 state
.linkAlternativeBacktracksTo(firstAlternativeInputChecked
, this);
1251 else { // no need to check the input, but we do have some bookkeeping to do first.
1252 state
.linkAlternativeBacktracks(this);
1254 // Where necessary update our preserved start position.
1255 if (!m_pattern
.m_body
->m_hasFixedSize
) {
1257 sub32(Imm32(countCheckedForCurrentAlternative
- 1), regT0
);
1258 poke(regT0
, m_pattern
.m_body
->m_callFrameSize
);
1261 // Update index if necessary, and loop (without checking).
1262 if (incrementForNextIter
)
1263 add32(Imm32(incrementForNextIter
), index
);
1264 jump().linkTo(firstAlternativeInputChecked
, this);
1267 notEnoughInputForPreviousAlternative
.link(this);
1268 // Update our idea of the start position, if we're tracking this.
1269 if (!m_pattern
.m_body
->m_hasFixedSize
) {
1270 if (countCheckedForCurrentAlternative
- 1) {
1272 sub32(Imm32(countCheckedForCurrentAlternative
- 1), regT0
);
1273 poke(regT0
, m_pattern
.m_body
->m_callFrameSize
);
1275 poke(index
, m_pattern
.m_body
->m_callFrameSize
);
1277 // Check if there is sufficent input to run the first alternative again.
1278 jumpIfAvailableInput(incrementForNextIter
).linkTo(firstAlternativeInputChecked
, this);
1279 // No - insufficent input to run the first alteranative, are there any other alternatives we
1280 // might need to check? If so, the last check will have left the index incremented by
1281 // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
1282 // LESS input is available, to have the effect of just progressing the start position by 1
1283 // from the last iteration. If this check passes we can just jump up to the check associated
1284 // with the first alternative in the loop. This is a bit sad, since we'll end up trying the
1285 // first alternative again, and this check will fail (otherwise the check planted just above
1286 // here would have passed). This is a bit sad, however it saves trying to do something more
1287 // complex here in compilation, and in the common case we should end up coallescing the checks.
1289 // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
1290 // of the minimum-alternative-lengths. E.g. if I have two alternatives of length 200 and 150,
1291 // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
1292 // is sufficient input to run either alternative (constantly failing). If there had been only
1293 // one alternative, or if the shorter alternative had come first, we would have terminated
1295 if (hasShorterAlternatives
)
1296 jumpIfAvailableInput(-countToCheckForFirstAlternative
).linkTo(firstAlternative
, this);
1297 // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
1298 // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ...
1299 // but since we're about to return a failure this doesn't really matter!)
1301 unsigned frameSize
= m_pattern
.m_body
->m_callFrameSize
;
1302 if (!m_pattern
.m_body
->m_hasFixedSize
)
1305 addPtr(Imm32(frameSize
* sizeof(void*)), stackPointerRegister
);
1307 move(Imm32(-1), returnRegister
);
1312 void generateEnter()
1315 push(X86Registers::ebp
);
1316 move(stackPointerRegister
, X86Registers::ebp
);
1317 push(X86Registers::ebx
);
1319 push(X86Registers::ebp
);
1320 move(stackPointerRegister
, X86Registers::ebp
);
1321 // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
1322 push(X86Registers::ebx
);
1323 push(X86Registers::edi
);
1324 push(X86Registers::esi
);
1325 // load output into edi (2 = saved ebp + return address).
1327 loadPtr(Address(X86Registers::ebp
, 2 * sizeof(void*)), input
);
1328 loadPtr(Address(X86Registers::ebp
, 3 * sizeof(void*)), index
);
1329 loadPtr(Address(X86Registers::ebp
, 4 * sizeof(void*)), length
);
1330 loadPtr(Address(X86Registers::ebp
, 5 * sizeof(void*)), output
);
1332 loadPtr(Address(X86Registers::ebp
, 2 * sizeof(void*)), output
);
1335 push(ARMRegisters::r4
);
1336 push(ARMRegisters::r5
);
1337 push(ARMRegisters::r6
);
1338 move(ARMRegisters::r3
, output
);
1344 void generateReturn()
1347 pop(X86Registers::ebx
);
1348 pop(X86Registers::ebp
);
1350 pop(X86Registers::esi
);
1351 pop(X86Registers::edi
);
1352 pop(X86Registers::ebx
);
1353 pop(X86Registers::ebp
);
1355 pop(ARMRegisters::r6
);
1356 pop(ARMRegisters::r5
);
1357 pop(ARMRegisters::r4
);
1365 RegexGenerator(RegexPattern
& pattern
)
1366 : m_pattern(pattern
)
1374 // TODO: do I really want this on the stack?
1375 if (!m_pattern
.m_body
->m_hasFixedSize
)
1378 if (m_pattern
.m_body
->m_callFrameSize
)
1379 subPtr(Imm32(m_pattern
.m_body
->m_callFrameSize
* sizeof(void*)), stackPointerRegister
);
1381 generateDisjunction(m_pattern
.m_body
);
1384 void compile(JSGlobalData
* globalData
, RegexCodeBlock
& jitObject
)
1388 LinkBuffer
patchBuffer(this, globalData
->regexAllocator
.poolForSize(size()), 0);
1390 for (unsigned i
= 0; i
< m_backtrackRecords
.size(); ++i
)
1391 patchBuffer
.patch(m_backtrackRecords
[i
].dataLabel
, patchBuffer
.locationOf(m_backtrackRecords
[i
].backtrackLocation
));
1393 jitObject
.set(patchBuffer
.finalizeCode());
1397 RegexPattern
& m_pattern
;
1398 Vector
<AlternativeBacktrackRecord
> m_backtrackRecords
;
1401 void jitCompileRegex(JSGlobalData
* globalData
, RegexCodeBlock
& jitObject
, const UString
& patternString
, unsigned& numSubpatterns
, const char*& error
, bool ignoreCase
, bool multiline
)
1403 RegexPattern
pattern(ignoreCase
, multiline
);
1404 if ((error
= compileRegex(patternString
, pattern
)))
1406 numSubpatterns
= pattern
.m_numSubpatterns
;
1408 if (!pattern
.m_shouldFallBack
&& globalData
->canUseJIT() && RegExpCache::isCacheable(patternString
)) {
1409 RegexGenerator
generator(pattern
);
1410 generator
.compile(globalData
, jitObject
);
1414 JSRegExpIgnoreCaseOption ignoreCaseOption
= ignoreCase
? JSRegExpIgnoreCase
: JSRegExpDoNotIgnoreCase
;
1415 JSRegExpMultilineOption multilineOption
= multiline
? JSRegExpMultiline
: JSRegExpSingleLine
;
1416 jitObject
.setFallback(jsRegExpCompile(reinterpret_cast<const UChar
*>(patternString
.data()), patternString
.size(), ignoreCaseOption
, multilineOption
, &numSubpatterns
, &error
));