2 * Copyright (C) 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include "ASCIICType.h"
30 #include "JSGlobalData.h"
31 #include "LinkBuffer.h"
32 #include "MacroAssembler.h"
33 #include "RegexCompiler.h"
35 #include "pcre.h" // temporary, remove when fallback is removed.
41 namespace JSC
{ namespace Yarr
{
44 class RegexGenerator
: private MacroAssembler
{
45 friend void jitCompileRegex(JSGlobalData
* globalData
, RegexCodeBlock
& jitObject
, const UString
& pattern
, unsigned& numSubpatterns
, const char*& error
, bool ignoreCase
, bool multiline
);
48 static const RegisterID input
= ARMRegisters::r0
;
49 static const RegisterID index
= ARMRegisters::r1
;
50 static const RegisterID length
= ARMRegisters::r2
;
51 static const RegisterID output
= ARMRegisters::r4
;
53 static const RegisterID regT0
= ARMRegisters::r5
;
54 static const RegisterID regT1
= ARMRegisters::r6
;
56 static const RegisterID returnRegister
= ARMRegisters::r0
;
58 static const RegisterID input
= X86Registers::eax
;
59 static const RegisterID index
= X86Registers::edx
;
60 static const RegisterID length
= X86Registers::ecx
;
61 static const RegisterID output
= X86Registers::edi
;
63 static const RegisterID regT0
= X86Registers::ebx
;
64 static const RegisterID regT1
= X86Registers::esi
;
66 static const RegisterID returnRegister
= X86Registers::eax
;
68 static const RegisterID input
= X86Registers::edi
;
69 static const RegisterID index
= X86Registers::esi
;
70 static const RegisterID length
= X86Registers::edx
;
71 static const RegisterID output
= X86Registers::ecx
;
73 static const RegisterID regT0
= X86Registers::eax
;
74 static const RegisterID regT1
= X86Registers::ebx
;
76 static const RegisterID returnRegister
= X86Registers::eax
;
79 void optimizeAlternative(PatternAlternative
* alternative
)
81 if (!alternative
->m_terms
.size())
84 for (unsigned i
= 0; i
< alternative
->m_terms
.size() - 1; ++i
) {
85 PatternTerm
& term
= alternative
->m_terms
[i
];
86 PatternTerm
& nextTerm
= alternative
->m_terms
[i
+ 1];
88 if ((term
.type
== PatternTerm::TypeCharacterClass
)
89 && (term
.quantityType
== QuantifierFixedCount
)
90 && (nextTerm
.type
== PatternTerm::TypePatternCharacter
)
91 && (nextTerm
.quantityType
== QuantifierFixedCount
)) {
92 PatternTerm termCopy
= term
;
93 alternative
->m_terms
[i
] = nextTerm
;
94 alternative
->m_terms
[i
+ 1] = termCopy
;
99 void matchCharacterClassRange(RegisterID character
, JumpList
& failures
, JumpList
& matchDest
, const CharacterRange
* ranges
, unsigned count
, unsigned* matchIndex
, const UChar
* matches
, unsigned matchCount
)
102 // pick which range we're going to generate
103 int which
= count
>> 1;
104 char lo
= ranges
[which
].begin
;
105 char hi
= ranges
[which
].end
;
107 // check if there are any ranges or matches below lo. If not, just jl to failure -
108 // if there is anything else to check, check that first, if it falls through jmp to failure.
109 if ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] < lo
)) {
110 Jump loOrAbove
= branch32(GreaterThanOrEqual
, character
, Imm32((unsigned short)lo
));
112 // generate code for all ranges before this one
114 matchCharacterClassRange(character
, failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
);
116 while ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] < lo
)) {
117 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)matches
[*matchIndex
])));
120 failures
.append(jump());
122 loOrAbove
.link(this);
124 Jump loOrAbove
= branch32(GreaterThanOrEqual
, character
, Imm32((unsigned short)lo
));
126 matchCharacterClassRange(character
, failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
);
127 failures
.append(jump());
129 loOrAbove
.link(this);
131 failures
.append(branch32(LessThan
, character
, Imm32((unsigned short)lo
)));
133 while ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] <= hi
))
136 matchDest
.append(branch32(LessThanOrEqual
, character
, Imm32((unsigned short)hi
)));
137 // fall through to here, the value is above hi.
139 // shuffle along & loop around if there are any more matches to handle.
140 unsigned next
= which
+ 1;
146 void matchCharacterClass(RegisterID character
, JumpList
& matchDest
, const CharacterClass
* charClass
)
149 if (charClass
->m_matchesUnicode
.size() || charClass
->m_rangesUnicode
.size()) {
150 Jump isAscii
= branch32(LessThanOrEqual
, character
, Imm32(0x7f));
152 if (charClass
->m_matchesUnicode
.size()) {
153 for (unsigned i
= 0; i
< charClass
->m_matchesUnicode
.size(); ++i
) {
154 UChar ch
= charClass
->m_matchesUnicode
[i
];
155 matchDest
.append(branch32(Equal
, character
, Imm32(ch
)));
159 if (charClass
->m_rangesUnicode
.size()) {
160 for (unsigned i
= 0; i
< charClass
->m_rangesUnicode
.size(); ++i
) {
161 UChar lo
= charClass
->m_rangesUnicode
[i
].begin
;
162 UChar hi
= charClass
->m_rangesUnicode
[i
].end
;
164 Jump below
= branch32(LessThan
, character
, Imm32(lo
));
165 matchDest
.append(branch32(LessThanOrEqual
, character
, Imm32(hi
)));
170 unicodeFail
= jump();
174 if (charClass
->m_ranges
.size()) {
175 unsigned matchIndex
= 0;
177 matchCharacterClassRange(character
, failures
, matchDest
, charClass
->m_ranges
.begin(), charClass
->m_ranges
.size(), &matchIndex
, charClass
->m_matches
.begin(), charClass
->m_matches
.size());
178 while (matchIndex
< charClass
->m_matches
.size())
179 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)charClass
->m_matches
[matchIndex
++])));
182 } else if (charClass
->m_matches
.size()) {
183 // optimization: gather 'a','A' etc back together, can mask & test once.
184 Vector
<char> matchesAZaz
;
186 for (unsigned i
= 0; i
< charClass
->m_matches
.size(); ++i
) {
187 char ch
= charClass
->m_matches
[i
];
188 if (m_pattern
.m_ignoreCase
) {
189 if (isASCIILower(ch
)) {
190 matchesAZaz
.append(ch
);
193 if (isASCIIUpper(ch
))
196 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)ch
)));
199 if (unsigned countAZaz
= matchesAZaz
.size()) {
200 or32(Imm32(32), character
);
201 for (unsigned i
= 0; i
< countAZaz
; ++i
)
202 matchDest
.append(branch32(Equal
, character
, Imm32(matchesAZaz
[i
])));
206 if (charClass
->m_matchesUnicode
.size() || charClass
->m_rangesUnicode
.size())
207 unicodeFail
.link(this);
210 // Jumps if input not available; will have (incorrectly) incremented already!
211 Jump
jumpIfNoAvailableInput(unsigned countToCheck
)
213 add32(Imm32(countToCheck
), index
);
214 return branch32(Above
, index
, length
);
217 Jump
jumpIfAvailableInput(unsigned countToCheck
)
219 add32(Imm32(countToCheck
), index
);
220 return branch32(BelowOrEqual
, index
, length
);
225 return branch32(BelowOrEqual
, index
, length
);
230 return branch32(Equal
, index
, length
);
233 Jump
notAtEndOfInput()
235 return branch32(NotEqual
, index
, length
);
238 Jump
jumpIfCharEquals(UChar ch
, int inputPosition
)
240 return branch16(Equal
, BaseIndex(input
, index
, TimesTwo
, inputPosition
* sizeof(UChar
)), Imm32(ch
));
243 Jump
jumpIfCharNotEquals(UChar ch
, int inputPosition
)
245 return branch16(NotEqual
, BaseIndex(input
, index
, TimesTwo
, inputPosition
* sizeof(UChar
)), Imm32(ch
));
248 void readCharacter(int inputPosition
, RegisterID reg
)
250 load16(BaseIndex(input
, index
, TimesTwo
, inputPosition
* sizeof(UChar
)), reg
);
253 void storeToFrame(RegisterID reg
, unsigned frameLocation
)
255 poke(reg
, frameLocation
);
258 void storeToFrame(Imm32 imm
, unsigned frameLocation
)
260 poke(imm
, frameLocation
);
263 DataLabelPtr
storeToFrameWithPatch(unsigned frameLocation
)
265 return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister
, frameLocation
* sizeof(void*)));
268 void loadFromFrame(unsigned frameLocation
, RegisterID reg
)
270 peek(reg
, frameLocation
);
273 void loadFromFrameAndJump(unsigned frameLocation
)
275 jump(Address(stackPointerRegister
, frameLocation
* sizeof(void*)));
278 struct AlternativeBacktrackRecord
{
279 DataLabelPtr dataLabel
;
280 Label backtrackLocation
;
282 AlternativeBacktrackRecord(DataLabelPtr dataLabel
, Label backtrackLocation
)
283 : dataLabel(dataLabel
)
284 , backtrackLocation(backtrackLocation
)
289 struct TermGenerationState
{
290 TermGenerationState(PatternDisjunction
* disjunction
, unsigned checkedTotal
)
291 : disjunction(disjunction
)
292 , checkedTotal(checkedTotal
)
296 void resetAlternative()
298 isBackTrackGenerated
= false;
301 bool alternativeValid()
303 return alt
< disjunction
->m_alternatives
.size();
305 void nextAlternative()
309 PatternAlternative
* alternative()
311 return disjunction
->m_alternatives
[alt
];
316 ASSERT(alternativeValid());
321 ASSERT(alternativeValid());
322 return t
< alternative()->m_terms
.size();
326 ASSERT(alternativeValid());
331 ASSERT(alternativeValid());
332 return alternative()->m_terms
[t
];
335 PatternTerm
& lookaheadTerm()
337 ASSERT(alternativeValid());
338 ASSERT((t
+ 1) < alternative()->m_terms
.size());
339 return alternative()->m_terms
[t
+ 1];
341 bool isSinglePatternCharacterLookaheadTerm()
343 ASSERT(alternativeValid());
344 return ((t
+ 1) < alternative()->m_terms
.size())
345 && (lookaheadTerm().type
== PatternTerm::TypePatternCharacter
)
346 && (lookaheadTerm().quantityType
== QuantifierFixedCount
)
347 && (lookaheadTerm().quantityCount
== 1);
352 return term().inputPosition
- checkedTotal
;
355 void jumpToBacktrack(Jump jump
, MacroAssembler
* masm
)
357 if (isBackTrackGenerated
)
358 jump
.linkTo(backtrackLabel
, masm
);
360 backTrackJumps
.append(jump
);
362 void jumpToBacktrack(JumpList
& jumps
, MacroAssembler
* masm
)
364 if (isBackTrackGenerated
)
365 jumps
.linkTo(backtrackLabel
, masm
);
367 backTrackJumps
.append(jumps
);
369 bool plantJumpToBacktrackIfExists(MacroAssembler
* masm
)
371 if (isBackTrackGenerated
) {
372 masm
->jump(backtrackLabel
);
377 void addBacktrackJump(Jump jump
)
379 backTrackJumps
.append(jump
);
381 void setBacktrackGenerated(Label label
)
383 isBackTrackGenerated
= true;
384 backtrackLabel
= label
;
386 void linkAlternativeBacktracks(MacroAssembler
* masm
)
388 isBackTrackGenerated
= false;
389 backTrackJumps
.link(masm
);
391 void linkAlternativeBacktracksTo(Label label
, MacroAssembler
* masm
)
393 isBackTrackGenerated
= false;
394 backTrackJumps
.linkTo(label
, masm
);
396 void propagateBacktrackingFrom(TermGenerationState
& nestedParenthesesState
, MacroAssembler
* masm
)
398 jumpToBacktrack(nestedParenthesesState
.backTrackJumps
, masm
);
399 if (nestedParenthesesState
.isBackTrackGenerated
)
400 setBacktrackGenerated(nestedParenthesesState
.backtrackLabel
);
403 PatternDisjunction
* disjunction
;
408 JumpList backTrackJumps
;
409 Label backtrackLabel
;
410 bool isBackTrackGenerated
;
413 void generateAssertionBOL(TermGenerationState
& state
)
415 PatternTerm
& term
= state
.term();
417 if (m_pattern
.m_multiline
) {
418 const RegisterID character
= regT0
;
421 if (!term
.inputPosition
)
422 matchDest
.append(branch32(Equal
, index
, Imm32(state
.checkedTotal
)));
424 readCharacter(state
.inputOffset() - 1, character
);
425 matchCharacterClass(character
, matchDest
, m_pattern
.newlineCharacterClass());
426 state
.jumpToBacktrack(jump(), this);
428 matchDest
.link(this);
430 // Erk, really should poison out these alternatives early. :-/
431 if (term
.inputPosition
)
432 state
.jumpToBacktrack(jump(), this);
434 state
.jumpToBacktrack(branch32(NotEqual
, index
, Imm32(state
.checkedTotal
)), this);
438 void generateAssertionEOL(TermGenerationState
& state
)
440 PatternTerm
& term
= state
.term();
442 if (m_pattern
.m_multiline
) {
443 const RegisterID character
= regT0
;
446 if (term
.inputPosition
== state
.checkedTotal
)
447 matchDest
.append(atEndOfInput());
449 readCharacter(state
.inputOffset(), character
);
450 matchCharacterClass(character
, matchDest
, m_pattern
.newlineCharacterClass());
451 state
.jumpToBacktrack(jump(), this);
453 matchDest
.link(this);
455 if (term
.inputPosition
== state
.checkedTotal
)
456 state
.jumpToBacktrack(notAtEndOfInput(), this);
457 // Erk, really should poison out these alternatives early. :-/
459 state
.jumpToBacktrack(jump(), this);
463 // Also falls though on nextIsNotWordChar.
464 void matchAssertionWordchar(TermGenerationState
& state
, JumpList
& nextIsWordChar
, JumpList
& nextIsNotWordChar
)
466 const RegisterID character
= regT0
;
467 PatternTerm
& term
= state
.term();
469 if (term
.inputPosition
== state
.checkedTotal
)
470 nextIsNotWordChar
.append(atEndOfInput());
472 readCharacter(state
.inputOffset(), character
);
473 matchCharacterClass(character
, nextIsWordChar
, m_pattern
.wordcharCharacterClass());
476 void generateAssertionWordBoundary(TermGenerationState
& state
)
478 const RegisterID character
= regT0
;
479 PatternTerm
& term
= state
.term();
483 if (!term
.inputPosition
)
484 atBegin
= branch32(Equal
, index
, Imm32(state
.checkedTotal
));
485 readCharacter(state
.inputOffset() - 1, character
);
486 matchCharacterClass(character
, matchDest
, m_pattern
.wordcharCharacterClass());
487 if (!term
.inputPosition
)
490 // We fall through to here if the last character was not a wordchar.
491 JumpList nonWordCharThenWordChar
;
492 JumpList nonWordCharThenNonWordChar
;
493 if (term
.invertOrCapture
) {
494 matchAssertionWordchar(state
, nonWordCharThenNonWordChar
, nonWordCharThenWordChar
);
495 nonWordCharThenWordChar
.append(jump());
497 matchAssertionWordchar(state
, nonWordCharThenWordChar
, nonWordCharThenNonWordChar
);
498 nonWordCharThenNonWordChar
.append(jump());
500 state
.jumpToBacktrack(nonWordCharThenNonWordChar
, this);
502 // We jump here if the last character was a wordchar.
503 matchDest
.link(this);
504 JumpList wordCharThenWordChar
;
505 JumpList wordCharThenNonWordChar
;
506 if (term
.invertOrCapture
) {
507 matchAssertionWordchar(state
, wordCharThenNonWordChar
, wordCharThenWordChar
);
508 wordCharThenWordChar
.append(jump());
510 matchAssertionWordchar(state
, wordCharThenWordChar
, wordCharThenNonWordChar
);
511 // This can fall-though!
514 state
.jumpToBacktrack(wordCharThenWordChar
, this);
516 nonWordCharThenWordChar
.link(this);
517 wordCharThenNonWordChar
.link(this);
520 void generatePatternCharacterSingle(TermGenerationState
& state
)
522 const RegisterID character
= regT0
;
523 UChar ch
= state
.term().patternCharacter
;
525 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
526 readCharacter(state
.inputOffset(), character
);
527 or32(Imm32(32), character
);
528 state
.jumpToBacktrack(branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))), this);
530 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
531 state
.jumpToBacktrack(jumpIfCharNotEquals(ch
, state
.inputOffset()), this);
535 void generatePatternCharacterPair(TermGenerationState
& state
)
537 const RegisterID character
= regT0
;
538 UChar ch1
= state
.term().patternCharacter
;
539 UChar ch2
= state
.lookaheadTerm().patternCharacter
;
542 int chPair
= ch1
| (ch2
<< 16);
544 if (m_pattern
.m_ignoreCase
) {
545 if (isASCIIAlpha(ch1
))
547 if (isASCIIAlpha(ch2
))
552 load32WithUnalignedHalfWords(BaseIndex(input
, index
, TimesTwo
, state
.inputOffset() * sizeof(UChar
)), character
);
553 or32(Imm32(mask
), character
);
554 state
.jumpToBacktrack(branch32(NotEqual
, character
, Imm32(chPair
| mask
)), this);
556 state
.jumpToBacktrack(branch32WithUnalignedHalfWords(NotEqual
, BaseIndex(input
, index
, TimesTwo
, state
.inputOffset() * sizeof(UChar
)), Imm32(chPair
)), this);
559 void generatePatternCharacterFixed(TermGenerationState
& state
)
561 const RegisterID character
= regT0
;
562 const RegisterID countRegister
= regT1
;
563 PatternTerm
& term
= state
.term();
564 UChar ch
= term
.patternCharacter
;
566 move(index
, countRegister
);
567 sub32(Imm32(term
.quantityCount
), countRegister
);
570 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
571 load16(BaseIndex(input
, countRegister
, TimesTwo
, (state
.inputOffset() + term
.quantityCount
) * sizeof(UChar
)), character
);
572 or32(Imm32(32), character
);
573 state
.jumpToBacktrack(branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))), this);
575 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
576 state
.jumpToBacktrack(branch16(NotEqual
, BaseIndex(input
, countRegister
, TimesTwo
, (state
.inputOffset() + term
.quantityCount
) * sizeof(UChar
)), Imm32(ch
)), this);
578 add32(Imm32(1), countRegister
);
579 branch32(NotEqual
, countRegister
, index
).linkTo(loop
, this);
582 void generatePatternCharacterGreedy(TermGenerationState
& state
)
584 const RegisterID character
= regT0
;
585 const RegisterID countRegister
= regT1
;
586 PatternTerm
& term
= state
.term();
587 UChar ch
= term
.patternCharacter
;
589 move(Imm32(0), countRegister
);
593 failures
.append(atEndOfInput());
594 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
595 readCharacter(state
.inputOffset(), character
);
596 or32(Imm32(32), character
);
597 failures
.append(branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))));
599 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
600 failures
.append(jumpIfCharNotEquals(ch
, state
.inputOffset()));
602 add32(Imm32(1), countRegister
);
603 add32(Imm32(1), index
);
604 branch32(NotEqual
, countRegister
, Imm32(term
.quantityCount
)).linkTo(loop
, this);
605 failures
.append(jump());
607 Label
backtrackBegin(this);
608 loadFromFrame(term
.frameLocation
, countRegister
);
609 state
.jumpToBacktrack(branchTest32(Zero
, countRegister
), this);
610 sub32(Imm32(1), countRegister
);
611 sub32(Imm32(1), index
);
615 storeToFrame(countRegister
, term
.frameLocation
);
617 state
.setBacktrackGenerated(backtrackBegin
);
620 void generatePatternCharacterNonGreedy(TermGenerationState
& state
)
622 const RegisterID character
= regT0
;
623 const RegisterID countRegister
= regT1
;
624 PatternTerm
& term
= state
.term();
625 UChar ch
= term
.patternCharacter
;
627 move(Imm32(0), countRegister
);
629 Jump firstTimeDoNothing
= jump();
631 Label
hardFail(this);
632 sub32(countRegister
, index
);
633 state
.jumpToBacktrack(jump(), this);
635 Label
backtrackBegin(this);
636 loadFromFrame(term
.frameLocation
, countRegister
);
638 atEndOfInput().linkTo(hardFail
, this);
639 branch32(Equal
, countRegister
, Imm32(term
.quantityCount
), hardFail
);
640 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
641 readCharacter(state
.inputOffset(), character
);
642 or32(Imm32(32), character
);
643 branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))).linkTo(hardFail
, this);
645 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
646 jumpIfCharNotEquals(ch
, state
.inputOffset()).linkTo(hardFail
, this);
649 add32(Imm32(1), countRegister
);
650 add32(Imm32(1), index
);
652 firstTimeDoNothing
.link(this);
653 storeToFrame(countRegister
, term
.frameLocation
);
655 state
.setBacktrackGenerated(backtrackBegin
);
658 void generateCharacterClassSingle(TermGenerationState
& state
)
660 const RegisterID character
= regT0
;
661 PatternTerm
& term
= state
.term();
664 readCharacter(state
.inputOffset(), character
);
665 matchCharacterClass(character
, matchDest
, term
.characterClass
);
667 if (term
.invertOrCapture
)
668 state
.jumpToBacktrack(matchDest
, this);
670 state
.jumpToBacktrack(jump(), this);
671 matchDest
.link(this);
675 void generateCharacterClassFixed(TermGenerationState
& state
)
677 const RegisterID character
= regT0
;
678 const RegisterID countRegister
= regT1
;
679 PatternTerm
& term
= state
.term();
681 move(index
, countRegister
);
682 sub32(Imm32(term
.quantityCount
), countRegister
);
686 load16(BaseIndex(input
, countRegister
, TimesTwo
, (state
.inputOffset() + term
.quantityCount
) * sizeof(UChar
)), character
);
687 matchCharacterClass(character
, matchDest
, term
.characterClass
);
689 if (term
.invertOrCapture
)
690 state
.jumpToBacktrack(matchDest
, this);
692 state
.jumpToBacktrack(jump(), this);
693 matchDest
.link(this);
696 add32(Imm32(1), countRegister
);
697 branch32(NotEqual
, countRegister
, index
).linkTo(loop
, this);
700 void generateCharacterClassGreedy(TermGenerationState
& state
)
702 const RegisterID character
= regT0
;
703 const RegisterID countRegister
= regT1
;
704 PatternTerm
& term
= state
.term();
706 move(Imm32(0), countRegister
);
710 failures
.append(atEndOfInput());
712 if (term
.invertOrCapture
) {
713 readCharacter(state
.inputOffset(), character
);
714 matchCharacterClass(character
, failures
, term
.characterClass
);
717 readCharacter(state
.inputOffset(), character
);
718 matchCharacterClass(character
, matchDest
, term
.characterClass
);
719 failures
.append(jump());
720 matchDest
.link(this);
723 add32(Imm32(1), countRegister
);
724 add32(Imm32(1), index
);
725 branch32(NotEqual
, countRegister
, Imm32(term
.quantityCount
)).linkTo(loop
, this);
726 failures
.append(jump());
728 Label
backtrackBegin(this);
729 loadFromFrame(term
.frameLocation
, countRegister
);
730 state
.jumpToBacktrack(branchTest32(Zero
, countRegister
), this);
731 sub32(Imm32(1), countRegister
);
732 sub32(Imm32(1), index
);
736 storeToFrame(countRegister
, term
.frameLocation
);
738 state
.setBacktrackGenerated(backtrackBegin
);
741 void generateCharacterClassNonGreedy(TermGenerationState
& state
)
743 const RegisterID character
= regT0
;
744 const RegisterID countRegister
= regT1
;
745 PatternTerm
& term
= state
.term();
747 move(Imm32(0), countRegister
);
749 Jump firstTimeDoNothing
= jump();
751 Label
hardFail(this);
752 sub32(countRegister
, index
);
753 state
.jumpToBacktrack(jump(), this);
755 Label
backtrackBegin(this);
756 loadFromFrame(term
.frameLocation
, countRegister
);
758 atEndOfInput().linkTo(hardFail
, this);
759 branch32(Equal
, countRegister
, Imm32(term
.quantityCount
), hardFail
);
762 readCharacter(state
.inputOffset(), character
);
763 matchCharacterClass(character
, matchDest
, term
.characterClass
);
765 if (term
.invertOrCapture
)
766 matchDest
.linkTo(hardFail
, this);
769 matchDest
.link(this);
772 add32(Imm32(1), countRegister
);
773 add32(Imm32(1), index
);
775 firstTimeDoNothing
.link(this);
776 storeToFrame(countRegister
, term
.frameLocation
);
778 state
.setBacktrackGenerated(backtrackBegin
);
781 void generateParenthesesDisjunction(PatternTerm
& parenthesesTerm
, TermGenerationState
& state
, unsigned alternativeFrameLocation
)
783 ASSERT((parenthesesTerm
.type
== PatternTerm::TypeParenthesesSubpattern
) || (parenthesesTerm
.type
== PatternTerm::TypeParentheticalAssertion
));
784 ASSERT(parenthesesTerm
.quantityCount
== 1);
786 PatternDisjunction
* disjunction
= parenthesesTerm
.parentheses
.disjunction
;
787 unsigned preCheckedCount
= ((parenthesesTerm
.quantityType
== QuantifierFixedCount
) && (parenthesesTerm
.type
!= PatternTerm::TypeParentheticalAssertion
)) ? disjunction
->m_minimumSize
: 0;
789 if (disjunction
->m_alternatives
.size() == 1) {
790 state
.resetAlternative();
791 ASSERT(state
.alternativeValid());
792 PatternAlternative
* alternative
= state
.alternative();
793 optimizeAlternative(alternative
);
795 int countToCheck
= alternative
->m_minimumSize
- preCheckedCount
;
797 ASSERT((parenthesesTerm
.type
== PatternTerm::TypeParentheticalAssertion
) || (parenthesesTerm
.quantityType
!= QuantifierFixedCount
));
799 // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists'
800 // will be forced to always trampoline into here, just to decrement the index.
804 Label
backtrackBegin(this);
805 sub32(Imm32(countToCheck
), index
);
806 state
.addBacktrackJump(jump());
810 state
.setBacktrackGenerated(backtrackBegin
);
812 state
.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck
), this);
813 state
.checkedTotal
+= countToCheck
;
816 for (state
.resetTerm(); state
.termValid(); state
.nextTerm())
819 state
.checkedTotal
-= countToCheck
;
823 for (state
.resetAlternative(); state
.alternativeValid(); state
.nextAlternative()) {
825 PatternAlternative
* alternative
= state
.alternative();
826 optimizeAlternative(alternative
);
828 ASSERT(alternative
->m_minimumSize
>= preCheckedCount
);
829 int countToCheck
= alternative
->m_minimumSize
- preCheckedCount
;
831 state
.addBacktrackJump(jumpIfNoAvailableInput(countToCheck
));
832 state
.checkedTotal
+= countToCheck
;
835 for (state
.resetTerm(); state
.termValid(); state
.nextTerm())
838 // Matched an alternative.
839 DataLabelPtr dataLabel
= storeToFrameWithPatch(alternativeFrameLocation
);
840 successes
.append(jump());
842 // Alternative did not match.
843 Label
backtrackLocation(this);
845 // Can we backtrack the alternative? - if so, do so. If not, just fall through to the next one.
846 state
.plantJumpToBacktrackIfExists(this);
848 state
.linkAlternativeBacktracks(this);
851 sub32(Imm32(countToCheck
), index
);
852 state
.checkedTotal
-= countToCheck
;
855 m_backtrackRecords
.append(AlternativeBacktrackRecord(dataLabel
, backtrackLocation
));
857 // We fall through to here when the last alternative fails.
858 // Add a backtrack out of here for the parenthese handling code to link up.
859 state
.addBacktrackJump(jump());
861 // Generate a trampoline for the parens code to backtrack to, to retry the
863 state
.setBacktrackGenerated(label());
864 loadFromFrameAndJump(alternativeFrameLocation
);
866 // FIXME: both of the above hooks are a little inefficient, in that you
867 // may end up trampolining here, just to trampoline back out to the
868 // parentheses code, or vice versa. We can probably eliminate a jump
869 // by restructuring, but coding this way for now for simplicity during
872 successes
.link(this);
876 void generateParenthesesSingle(TermGenerationState
& state
)
878 const RegisterID indexTemporary
= regT0
;
879 PatternTerm
& term
= state
.term();
880 PatternDisjunction
* disjunction
= term
.parentheses
.disjunction
;
881 ASSERT(term
.quantityCount
== 1);
883 unsigned preCheckedCount
= ((term
.quantityCount
== 1) && (term
.quantityType
== QuantifierFixedCount
)) ? disjunction
->m_minimumSize
: 0;
885 unsigned parenthesesFrameLocation
= term
.frameLocation
;
886 unsigned alternativeFrameLocation
= parenthesesFrameLocation
;
887 if (term
.quantityType
!= QuantifierFixedCount
)
888 alternativeFrameLocation
+= RegexStackSpaceForBackTrackInfoParenthesesOnce
;
890 // optimized case - no capture & no quantifier can be handled in a light-weight manner.
891 if (!term
.invertOrCapture
&& (term
.quantityType
== QuantifierFixedCount
)) {
892 TermGenerationState
parenthesesState(disjunction
, state
.checkedTotal
);
893 generateParenthesesDisjunction(state
.term(), parenthesesState
, alternativeFrameLocation
);
894 // this expects that any backtracks back out of the parentheses will be in the
895 // parenthesesState's backTrackJumps vector, and that if they need backtracking
896 // they will have set an entry point on the parenthesesState's backtrackLabel.
897 state
.propagateBacktrackingFrom(parenthesesState
, this);
899 Jump nonGreedySkipParentheses
;
900 Label nonGreedyTryParentheses
;
901 if (term
.quantityType
== QuantifierGreedy
)
902 storeToFrame(Imm32(1), parenthesesFrameLocation
);
903 else if (term
.quantityType
== QuantifierNonGreedy
) {
904 storeToFrame(Imm32(0), parenthesesFrameLocation
);
905 nonGreedySkipParentheses
= jump();
906 nonGreedyTryParentheses
= label();
907 storeToFrame(Imm32(1), parenthesesFrameLocation
);
910 // store the match start index
911 if (term
.invertOrCapture
) {
912 int inputOffset
= state
.inputOffset() - preCheckedCount
;
914 move(index
, indexTemporary
);
915 add32(Imm32(inputOffset
), indexTemporary
);
916 store32(indexTemporary
, Address(output
, (term
.parentheses
.subpatternId
<< 1) * sizeof(int)));
918 store32(index
, Address(output
, (term
.parentheses
.subpatternId
<< 1) * sizeof(int)));
921 // generate the body of the parentheses
922 TermGenerationState
parenthesesState(disjunction
, state
.checkedTotal
);
923 generateParenthesesDisjunction(state
.term(), parenthesesState
, alternativeFrameLocation
);
925 // store the match end index
926 if (term
.invertOrCapture
) {
927 int inputOffset
= state
.inputOffset();
929 move(index
, indexTemporary
);
930 add32(Imm32(state
.inputOffset()), indexTemporary
);
931 store32(indexTemporary
, Address(output
, ((term
.parentheses
.subpatternId
<< 1) + 1) * sizeof(int)));
933 store32(index
, Address(output
, ((term
.parentheses
.subpatternId
<< 1) + 1) * sizeof(int)));
935 Jump success
= jump();
937 // A failure AFTER the parens jumps here
938 Label
backtrackFromAfterParens(this);
940 if (term
.quantityType
== QuantifierGreedy
) {
941 // If this is zero we have now tested with both with and without the parens.
942 loadFromFrame(parenthesesFrameLocation
, indexTemporary
);
943 state
.jumpToBacktrack(branchTest32(Zero
, indexTemporary
), this);
944 } else if (term
.quantityType
== QuantifierNonGreedy
) {
945 // If this is zero we have now tested with both with and without the parens.
946 loadFromFrame(parenthesesFrameLocation
, indexTemporary
);
947 branchTest32(Zero
, indexTemporary
).linkTo(nonGreedyTryParentheses
, this);
950 parenthesesState
.plantJumpToBacktrackIfExists(this);
951 // A failure WITHIN the parens jumps here
952 parenthesesState
.linkAlternativeBacktracks(this);
953 if (term
.invertOrCapture
) {
954 store32(Imm32(-1), Address(output
, (term
.parentheses
.subpatternId
<< 1) * sizeof(int)));
955 store32(Imm32(-1), Address(output
, ((term
.parentheses
.subpatternId
<< 1) + 1) * sizeof(int)));
958 if (term
.quantityType
== QuantifierGreedy
)
959 storeToFrame(Imm32(0), parenthesesFrameLocation
);
961 state
.jumpToBacktrack(jump(), this);
963 state
.setBacktrackGenerated(backtrackFromAfterParens
);
964 if (term
.quantityType
== QuantifierNonGreedy
)
965 nonGreedySkipParentheses
.link(this);
970 void generateParentheticalAssertion(TermGenerationState
& state
)
972 PatternTerm
& term
= state
.term();
973 PatternDisjunction
* disjunction
= term
.parentheses
.disjunction
;
974 ASSERT(term
.quantityCount
== 1);
975 ASSERT(term
.quantityType
== QuantifierFixedCount
);
977 unsigned parenthesesFrameLocation
= term
.frameLocation
;
978 unsigned alternativeFrameLocation
= parenthesesFrameLocation
+ RegexStackSpaceForBackTrackInfoParentheticalAssertion
;
980 int countCheckedAfterAssertion
= state
.checkedTotal
- term
.inputPosition
;
982 if (term
.invertOrCapture
) {
984 storeToFrame(index
, parenthesesFrameLocation
);
986 state
.checkedTotal
-= countCheckedAfterAssertion
;
987 if (countCheckedAfterAssertion
)
988 sub32(Imm32(countCheckedAfterAssertion
), index
);
990 TermGenerationState
parenthesesState(disjunction
, state
.checkedTotal
);
991 generateParenthesesDisjunction(state
.term(), parenthesesState
, alternativeFrameLocation
);
992 // Success! - which means - Fail!
993 loadFromFrame(parenthesesFrameLocation
, index
);
994 state
.jumpToBacktrack(jump(), this);
996 // And fail means success.
997 parenthesesState
.linkAlternativeBacktracks(this);
998 loadFromFrame(parenthesesFrameLocation
, index
);
1000 state
.checkedTotal
+= countCheckedAfterAssertion
;
1003 storeToFrame(index
, parenthesesFrameLocation
);
1005 state
.checkedTotal
-= countCheckedAfterAssertion
;
1006 if (countCheckedAfterAssertion
)
1007 sub32(Imm32(countCheckedAfterAssertion
), index
);
1009 TermGenerationState
parenthesesState(disjunction
, state
.checkedTotal
);
1010 generateParenthesesDisjunction(state
.term(), parenthesesState
, alternativeFrameLocation
);
1011 // Success! - which means - Success!
1012 loadFromFrame(parenthesesFrameLocation
, index
);
1013 Jump success
= jump();
1015 parenthesesState
.linkAlternativeBacktracks(this);
1016 loadFromFrame(parenthesesFrameLocation
, index
);
1017 state
.jumpToBacktrack(jump(), this);
1021 state
.checkedTotal
+= countCheckedAfterAssertion
;
1025 void generateTerm(TermGenerationState
& state
)
1027 PatternTerm
& term
= state
.term();
1029 switch (term
.type
) {
1030 case PatternTerm::TypeAssertionBOL
:
1031 generateAssertionBOL(state
);
1034 case PatternTerm::TypeAssertionEOL
:
1035 generateAssertionEOL(state
);
1038 case PatternTerm::TypeAssertionWordBoundary
:
1039 generateAssertionWordBoundary(state
);
1042 case PatternTerm::TypePatternCharacter
:
1043 switch (term
.quantityType
) {
1044 case QuantifierFixedCount
:
1045 if (term
.quantityCount
== 1) {
1046 if (state
.isSinglePatternCharacterLookaheadTerm() && (state
.lookaheadTerm().inputPosition
== (term
.inputPosition
+ 1))) {
1047 generatePatternCharacterPair(state
);
1050 generatePatternCharacterSingle(state
);
1052 generatePatternCharacterFixed(state
);
1054 case QuantifierGreedy
:
1055 generatePatternCharacterGreedy(state
);
1057 case QuantifierNonGreedy
:
1058 generatePatternCharacterNonGreedy(state
);
1063 case PatternTerm::TypeCharacterClass
:
1064 switch (term
.quantityType
) {
1065 case QuantifierFixedCount
:
1066 if (term
.quantityCount
== 1)
1067 generateCharacterClassSingle(state
);
1069 generateCharacterClassFixed(state
);
1071 case QuantifierGreedy
:
1072 generateCharacterClassGreedy(state
);
1074 case QuantifierNonGreedy
:
1075 generateCharacterClassNonGreedy(state
);
1080 case PatternTerm::TypeBackReference
:
1081 m_generationFailed
= true;
1084 case PatternTerm::TypeForwardReference
:
1087 case PatternTerm::TypeParenthesesSubpattern
:
1088 if ((term
.quantityCount
== 1) && !term
.parentheses
.isCopy
)
1089 generateParenthesesSingle(state
);
1091 m_generationFailed
= true;
1094 case PatternTerm::TypeParentheticalAssertion
:
1095 generateParentheticalAssertion(state
);
1100 void generateDisjunction(PatternDisjunction
* disjunction
)
1102 TermGenerationState
state(disjunction
, 0);
1103 state
.resetAlternative();
1105 // Plant a check to see if there is sufficient input available to run the first alternative.
1106 // Jumping back to the label 'firstAlternative' will get to this check, jumping to
1107 // 'firstAlternativeInputChecked' will jump directly to matching the alternative having
1108 // skipped this check.
1110 Label
firstAlternative(this);
1112 // check availability for the next alternative
1113 int countCheckedForCurrentAlternative
= 0;
1114 int countToCheckForFirstAlternative
= 0;
1115 bool hasShorterAlternatives
= false;
1116 JumpList notEnoughInputForPreviousAlternative
;
1118 if (state
.alternativeValid()) {
1119 PatternAlternative
* alternative
= state
.alternative();
1120 countToCheckForFirstAlternative
= alternative
->m_minimumSize
;
1121 state
.checkedTotal
+= countToCheckForFirstAlternative
;
1122 if (countToCheckForFirstAlternative
)
1123 notEnoughInputForPreviousAlternative
.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative
));
1124 countCheckedForCurrentAlternative
= countToCheckForFirstAlternative
;
1127 Label
firstAlternativeInputChecked(this);
1129 while (state
.alternativeValid()) {
1130 // Track whether any alternatives are shorter than the first one.
1131 hasShorterAlternatives
= hasShorterAlternatives
|| (countCheckedForCurrentAlternative
< countToCheckForFirstAlternative
);
1133 PatternAlternative
* alternative
= state
.alternative();
1134 optimizeAlternative(alternative
);
1136 for (state
.resetTerm(); state
.termValid(); state
.nextTerm())
1137 generateTerm(state
);
1139 // If we get here, the alternative matched.
1140 if (m_pattern
.m_body
->m_callFrameSize
)
1141 addPtr(Imm32(m_pattern
.m_body
->m_callFrameSize
* sizeof(void*)), stackPointerRegister
);
1143 ASSERT(index
!= returnRegister
);
1144 if (m_pattern
.m_body
->m_hasFixedSize
) {
1145 move(index
, returnRegister
);
1146 if (alternative
->m_minimumSize
)
1147 sub32(Imm32(alternative
->m_minimumSize
), returnRegister
);
1149 pop(returnRegister
);
1150 store32(index
, Address(output
, 4));
1151 store32(returnRegister
, output
);
1155 state
.nextAlternative();
1157 // if there are any more alternatives, plant the check for input before looping.
1158 if (state
.alternativeValid()) {
1159 PatternAlternative
* nextAlternative
= state
.alternative();
1160 int countToCheckForNextAlternative
= nextAlternative
->m_minimumSize
;
1162 if (countCheckedForCurrentAlternative
> countToCheckForNextAlternative
) { // CASE 1: current alternative was longer than the next one.
1163 // If we get here, there the last input checked failed.
1164 notEnoughInputForPreviousAlternative
.link(this);
1166 // Check if sufficent input available to run the next alternative
1167 notEnoughInputForPreviousAlternative
.append(jumpIfNoAvailableInput(countToCheckForNextAlternative
- countCheckedForCurrentAlternative
));
1168 // We are now in the correct state to enter the next alternative; this add is only required
1169 // to mirror and revert operation of the sub32, just below.
1170 add32(Imm32(countCheckedForCurrentAlternative
- countToCheckForNextAlternative
), index
);
1172 // If we get here, there the last input checked passed.
1173 state
.linkAlternativeBacktracks(this);
1174 // No need to check if we can run the next alternative, since it is shorter -
1175 // just update index.
1176 sub32(Imm32(countCheckedForCurrentAlternative
- countToCheckForNextAlternative
), index
);
1177 } else if (countCheckedForCurrentAlternative
< countToCheckForNextAlternative
) { // CASE 2: next alternative is longer than the current one.
1178 // If we get here, there the last input checked failed.
1179 // If there is insufficient input to run the current alternative, and the next alternative is longer,
1180 // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
1182 notEnoughInputForPreviousAlternative
.link(this);
1183 add32(Imm32(countToCheckForNextAlternative
- countCheckedForCurrentAlternative
), index
);
1184 notEnoughInputForPreviousAlternative
.append(jump());
1186 // The next alternative is longer than the current one; check the difference.
1187 state
.linkAlternativeBacktracks(this);
1188 notEnoughInputForPreviousAlternative
.append(jumpIfNoAvailableInput(countToCheckForNextAlternative
- countCheckedForCurrentAlternative
));
1189 } else { // CASE 3: Both alternatives are the same length.
1190 ASSERT(countCheckedForCurrentAlternative
== countToCheckForNextAlternative
);
1192 // If the next alterative is the same length as this one, then no need to check the input -
1193 // if there was sufficent input to run the current alternative then there is sufficient
1194 // input to run the next one; if not, there isn't.
1195 state
.linkAlternativeBacktracks(this);
1198 state
.checkedTotal
-= countCheckedForCurrentAlternative
;
1199 countCheckedForCurrentAlternative
= countToCheckForNextAlternative
;
1200 state
.checkedTotal
+= countCheckedForCurrentAlternative
;
1204 // If we get here, all Alternatives failed...
1206 state
.checkedTotal
-= countCheckedForCurrentAlternative
;
1208 // How much more input need there be to be able to retry from the first alternative?
1210 // /yarr_jit/ or /wrec|pcre/
1211 // In these examples we need check for one more input before looping.
1213 // In this case we need check for 5 more input to loop (+4 to allow for the first alterative
1214 // being four longer than the last alternative checked, and another +1 to effectively move
1215 // the start position along by one).
1216 // /yarr|rules/ or /wrec|notsomuch/
1217 // In these examples, provided that there was sufficient input to have just been matching for
1218 // the second alternative we can loop without checking for available input (since the second
1219 // alternative is longer than the first). In the latter example we need to decrement index
1220 // (by 4) so the start position is only progressed by 1 from the last iteration.
1221 int incrementForNextIter
= (countToCheckForFirstAlternative
- countCheckedForCurrentAlternative
) + 1;
1223 // First, deal with the cases where there was sufficient input to try the last alternative.
1224 if (incrementForNextIter
> 0) // We need to check for more input anyway, fall through to the checking below.
1225 state
.linkAlternativeBacktracks(this);
1226 else if (m_pattern
.m_body
->m_hasFixedSize
&& !incrementForNextIter
) // No need to update anything, link these backtracks straight to the to pof the loop!
1227 state
.linkAlternativeBacktracksTo(firstAlternativeInputChecked
, this);
1228 else { // no need to check the input, but we do have some bookkeeping to do first.
1229 state
.linkAlternativeBacktracks(this);
1231 // Where necessary update our preserved start position.
1232 if (!m_pattern
.m_body
->m_hasFixedSize
) {
1234 sub32(Imm32(countCheckedForCurrentAlternative
- 1), regT0
);
1235 poke(regT0
, m_pattern
.m_body
->m_callFrameSize
);
1238 // Update index if necessary, and loop (without checking).
1239 if (incrementForNextIter
)
1240 add32(Imm32(incrementForNextIter
), index
);
1241 jump().linkTo(firstAlternativeInputChecked
, this);
1244 notEnoughInputForPreviousAlternative
.link(this);
1245 // Update our idea of the start position, if we're tracking this.
1246 if (!m_pattern
.m_body
->m_hasFixedSize
) {
1247 if (countCheckedForCurrentAlternative
- 1) {
1249 sub32(Imm32(countCheckedForCurrentAlternative
- 1), regT0
);
1250 poke(regT0
, m_pattern
.m_body
->m_callFrameSize
);
1252 poke(index
, m_pattern
.m_body
->m_callFrameSize
);
1254 // Check if there is sufficent input to run the first alternative again.
1255 jumpIfAvailableInput(incrementForNextIter
).linkTo(firstAlternativeInputChecked
, this);
1256 // No - insufficent input to run the first alteranative, are there any other alternatives we
1257 // might need to check? If so, the last check will have left the index incremented by
1258 // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
1259 // LESS input is available, to have the effect of just progressing the start position by 1
1260 // from the last iteration. If this check passes we can just jump up to the check associated
1261 // with the first alternative in the loop. This is a bit sad, since we'll end up trying the
1262 // first alternative again, and this check will fail (otherwise the check planted just above
1263 // here would have passed). This is a bit sad, however it saves trying to do something more
1264 // complex here in compilation, and in the common case we should end up coallescing the checks.
1266 // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
1267 // of the minimum-alternative-lengths. E.g. if I have two alternatives of length 200 and 150,
1268 // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
1269 // is sufficient input to run either alternative (constantly failing). If there had been only
1270 // one alternative, or if the shorter alternative had come first, we would have terminated
1272 if (hasShorterAlternatives
)
1273 jumpIfAvailableInput(-countToCheckForFirstAlternative
).linkTo(firstAlternative
, this);
1274 // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
1275 // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ...
1276 // but since we're about to return a failure this doesn't really matter!)
1278 unsigned frameSize
= m_pattern
.m_body
->m_callFrameSize
;
1279 if (!m_pattern
.m_body
->m_hasFixedSize
)
1282 addPtr(Imm32(frameSize
* sizeof(void*)), stackPointerRegister
);
1284 move(Imm32(-1), returnRegister
);
1289 void generateEnter()
1292 push(X86Registers::ebp
);
1293 move(stackPointerRegister
, X86Registers::ebp
);
1294 push(X86Registers::ebx
);
1296 push(X86Registers::ebp
);
1297 move(stackPointerRegister
, X86Registers::ebp
);
1298 // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
1299 push(X86Registers::ebx
);
1300 push(X86Registers::edi
);
1301 push(X86Registers::esi
);
1302 // load output into edi (2 = saved ebp + return address).
1304 loadPtr(Address(X86Registers::ebp
, 2 * sizeof(void*)), input
);
1305 loadPtr(Address(X86Registers::ebp
, 3 * sizeof(void*)), index
);
1306 loadPtr(Address(X86Registers::ebp
, 4 * sizeof(void*)), length
);
1307 loadPtr(Address(X86Registers::ebp
, 5 * sizeof(void*)), output
);
1309 loadPtr(Address(X86Registers::ebp
, 2 * sizeof(void*)), output
);
1312 push(ARMRegisters::r4
);
1313 push(ARMRegisters::r5
);
1314 push(ARMRegisters::r6
);
1315 move(ARMRegisters::r3
, output
);
1319 void generateReturn()
1322 pop(X86Registers::ebx
);
1323 pop(X86Registers::ebp
);
1325 pop(X86Registers::esi
);
1326 pop(X86Registers::edi
);
1327 pop(X86Registers::ebx
);
1328 pop(X86Registers::ebp
);
1330 pop(ARMRegisters::r6
);
1331 pop(ARMRegisters::r5
);
1332 pop(ARMRegisters::r4
);
1338 RegexGenerator(RegexPattern
& pattern
)
1339 : m_pattern(pattern
)
1340 , m_generationFailed(false)
1348 // TODO: do I really want this on the stack?
1349 if (!m_pattern
.m_body
->m_hasFixedSize
)
1352 if (m_pattern
.m_body
->m_callFrameSize
)
1353 subPtr(Imm32(m_pattern
.m_body
->m_callFrameSize
* sizeof(void*)), stackPointerRegister
);
1355 generateDisjunction(m_pattern
.m_body
);
1358 void compile(JSGlobalData
* globalData
, RegexCodeBlock
& jitObject
)
1362 LinkBuffer
patchBuffer(this, globalData
->executableAllocator
.poolForSize(size()));
1364 for (unsigned i
= 0; i
< m_backtrackRecords
.size(); ++i
)
1365 patchBuffer
.patch(m_backtrackRecords
[i
].dataLabel
, patchBuffer
.locationOf(m_backtrackRecords
[i
].backtrackLocation
));
1367 jitObject
.set(patchBuffer
.finalizeCode());
1370 bool generationFailed()
1372 return m_generationFailed
;
1376 RegexPattern
& m_pattern
;
1377 Vector
<AlternativeBacktrackRecord
> m_backtrackRecords
;
1378 bool m_generationFailed
;
1381 void jitCompileRegex(JSGlobalData
* globalData
, RegexCodeBlock
& jitObject
, const UString
& patternString
, unsigned& numSubpatterns
, const char*& error
, bool ignoreCase
, bool multiline
)
1383 RegexPattern
pattern(ignoreCase
, multiline
);
1385 if ((error
= compileRegex(patternString
, pattern
)))
1388 numSubpatterns
= pattern
.m_numSubpatterns
;
1390 RegexGenerator
generator(pattern
);
1391 generator
.compile(globalData
, jitObject
);
1393 if (generator
.generationFailed()) {
1394 JSRegExpIgnoreCaseOption ignoreCaseOption
= ignoreCase
? JSRegExpIgnoreCase
: JSRegExpDoNotIgnoreCase
;
1395 JSRegExpMultilineOption multilineOption
= multiline
? JSRegExpMultiline
: JSRegExpSingleLine
;
1396 jitObject
.setFallback(jsRegExpCompile(reinterpret_cast<const UChar
*>(patternString
.data()), patternString
.size(), ignoreCaseOption
, multilineOption
, &numSubpatterns
, &error
));