2 * Copyright (C) 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include "ASCIICType.h"
30 #include "JSGlobalData.h"
31 #include "LinkBuffer.h"
32 #include "MacroAssembler.h"
33 #include "RegexCompiler.h"
35 #include "pcre.h" // temporary, remove when fallback is removed.
41 namespace JSC
{ namespace Yarr
{
44 class RegexGenerator
: private MacroAssembler
{
45 friend void jitCompileRegex(JSGlobalData
* globalData
, RegexCodeBlock
& jitObject
, const UString
& pattern
, unsigned& numSubpatterns
, const char*& error
, bool ignoreCase
, bool multiline
);
47 #if PLATFORM_ARM_ARCH(7)
48 static const RegisterID input
= ARM::r0
;
49 static const RegisterID index
= ARM::r1
;
50 static const RegisterID length
= ARM::r2
;
52 static const RegisterID output
= ARM::r4
;
53 static const RegisterID regT0
= ARM::r5
;
54 static const RegisterID regT1
= ARM::r6
;
56 static const RegisterID returnRegister
= ARM::r0
;
59 static const RegisterID input
= X86::eax
;
60 static const RegisterID index
= X86::edx
;
61 static const RegisterID length
= X86::ecx
;
62 static const RegisterID output
= X86::edi
;
64 static const RegisterID regT0
= X86::ebx
;
65 static const RegisterID regT1
= X86::esi
;
67 static const RegisterID returnRegister
= X86::eax
;
70 static const RegisterID input
= X86::edi
;
71 static const RegisterID index
= X86::esi
;
72 static const RegisterID length
= X86::edx
;
73 static const RegisterID output
= X86::ecx
;
75 static const RegisterID regT0
= X86::eax
;
76 static const RegisterID regT1
= X86::ebx
;
78 static const RegisterID returnRegister
= X86::eax
;
81 void optimizeAlternative(PatternAlternative
* alternative
)
83 if (!alternative
->m_terms
.size())
86 for (unsigned i
= 0; i
< alternative
->m_terms
.size() - 1; ++i
) {
87 PatternTerm
& term
= alternative
->m_terms
[i
];
88 PatternTerm
& nextTerm
= alternative
->m_terms
[i
+ 1];
90 if ((term
.type
== PatternTerm::TypeCharacterClass
)
91 && (term
.quantityType
== QuantifierFixedCount
)
92 && (nextTerm
.type
== PatternTerm::TypePatternCharacter
)
93 && (nextTerm
.quantityType
== QuantifierFixedCount
)) {
94 PatternTerm termCopy
= term
;
95 alternative
->m_terms
[i
] = nextTerm
;
96 alternative
->m_terms
[i
+ 1] = termCopy
;
101 void matchCharacterClassRange(RegisterID character
, JumpList
& failures
, JumpList
& matchDest
, const CharacterRange
* ranges
, unsigned count
, unsigned* matchIndex
, const UChar
* matches
, unsigned matchCount
)
104 // pick which range we're going to generate
105 int which
= count
>> 1;
106 char lo
= ranges
[which
].begin
;
107 char hi
= ranges
[which
].end
;
109 // check if there are any ranges or matches below lo. If not, just jl to failure -
110 // if there is anything else to check, check that first, if it falls through jmp to failure.
111 if ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] < lo
)) {
112 Jump loOrAbove
= branch32(GreaterThanOrEqual
, character
, Imm32((unsigned short)lo
));
114 // generate code for all ranges before this one
116 matchCharacterClassRange(character
, failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
);
118 while ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] < lo
)) {
119 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)matches
[*matchIndex
])));
122 failures
.append(jump());
124 loOrAbove
.link(this);
126 Jump loOrAbove
= branch32(GreaterThanOrEqual
, character
, Imm32((unsigned short)lo
));
128 matchCharacterClassRange(character
, failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
);
129 failures
.append(jump());
131 loOrAbove
.link(this);
133 failures
.append(branch32(LessThan
, character
, Imm32((unsigned short)lo
)));
135 while ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] <= hi
))
138 matchDest
.append(branch32(LessThanOrEqual
, character
, Imm32((unsigned short)hi
)));
139 // fall through to here, the value is above hi.
141 // shuffle along & loop around if there are any more matches to handle.
142 unsigned next
= which
+ 1;
148 void matchCharacterClass(RegisterID character
, JumpList
& matchDest
, const CharacterClass
* charClass
)
151 if (charClass
->m_matchesUnicode
.size() || charClass
->m_rangesUnicode
.size()) {
152 Jump isAscii
= branch32(LessThanOrEqual
, character
, Imm32(0x7f));
154 if (charClass
->m_matchesUnicode
.size()) {
155 for (unsigned i
= 0; i
< charClass
->m_matchesUnicode
.size(); ++i
) {
156 UChar ch
= charClass
->m_matchesUnicode
[i
];
157 matchDest
.append(branch32(Equal
, character
, Imm32(ch
)));
161 if (charClass
->m_rangesUnicode
.size()) {
162 for (unsigned i
= 0; i
< charClass
->m_rangesUnicode
.size(); ++i
) {
163 UChar lo
= charClass
->m_rangesUnicode
[i
].begin
;
164 UChar hi
= charClass
->m_rangesUnicode
[i
].end
;
166 Jump below
= branch32(LessThan
, character
, Imm32(lo
));
167 matchDest
.append(branch32(LessThanOrEqual
, character
, Imm32(hi
)));
172 unicodeFail
= jump();
176 if (charClass
->m_ranges
.size()) {
177 unsigned matchIndex
= 0;
179 matchCharacterClassRange(character
, failures
, matchDest
, charClass
->m_ranges
.begin(), charClass
->m_ranges
.size(), &matchIndex
, charClass
->m_matches
.begin(), charClass
->m_matches
.size());
180 while (matchIndex
< charClass
->m_matches
.size())
181 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)charClass
->m_matches
[matchIndex
++])));
184 } else if (charClass
->m_matches
.size()) {
185 // optimization: gather 'a','A' etc back together, can mask & test once.
186 Vector
<char> matchesAZaz
;
188 for (unsigned i
= 0; i
< charClass
->m_matches
.size(); ++i
) {
189 char ch
= charClass
->m_matches
[i
];
190 if (m_pattern
.m_ignoreCase
) {
191 if (isASCIILower(ch
)) {
192 matchesAZaz
.append(ch
);
195 if (isASCIIUpper(ch
))
198 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)ch
)));
201 if (unsigned countAZaz
= matchesAZaz
.size()) {
202 or32(Imm32(32), character
);
203 for (unsigned i
= 0; i
< countAZaz
; ++i
)
204 matchDest
.append(branch32(Equal
, character
, Imm32(matchesAZaz
[i
])));
208 if (charClass
->m_matchesUnicode
.size() || charClass
->m_rangesUnicode
.size())
209 unicodeFail
.link(this);
212 // Jumps if input not available; will have (incorrectly) incremented already!
213 Jump
jumpIfNoAvailableInput(unsigned countToCheck
)
215 add32(Imm32(countToCheck
), index
);
216 return branch32(Above
, index
, length
);
219 Jump
jumpIfAvailableInput(unsigned countToCheck
)
221 add32(Imm32(countToCheck
), index
);
222 return branch32(BelowOrEqual
, index
, length
);
227 return branch32(BelowOrEqual
, index
, length
);
232 return branch32(Equal
, index
, length
);
235 Jump
notAtEndOfInput()
237 return branch32(NotEqual
, index
, length
);
240 Jump
jumpIfCharEquals(UChar ch
, int inputPosition
)
242 return branch16(Equal
, BaseIndex(input
, index
, TimesTwo
, inputPosition
* sizeof(UChar
)), Imm32(ch
));
245 Jump
jumpIfCharNotEquals(UChar ch
, int inputPosition
)
247 return branch16(NotEqual
, BaseIndex(input
, index
, TimesTwo
, inputPosition
* sizeof(UChar
)), Imm32(ch
));
250 void readCharacter(int inputPosition
, RegisterID reg
)
252 load16(BaseIndex(input
, index
, TimesTwo
, inputPosition
* sizeof(UChar
)), reg
);
255 void storeToFrame(RegisterID reg
, unsigned frameLocation
)
257 poke(reg
, frameLocation
);
260 void storeToFrame(Imm32 imm
, unsigned frameLocation
)
262 poke(imm
, frameLocation
);
265 DataLabelPtr
storeToFrameWithPatch(unsigned frameLocation
)
267 return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister
, frameLocation
* sizeof(void*)));
270 void loadFromFrame(unsigned frameLocation
, RegisterID reg
)
272 peek(reg
, frameLocation
);
275 void loadFromFrameAndJump(unsigned frameLocation
)
277 jump(Address(stackPointerRegister
, frameLocation
* sizeof(void*)));
280 struct AlternativeBacktrackRecord
{
281 DataLabelPtr dataLabel
;
282 Label backtrackLocation
;
284 AlternativeBacktrackRecord(DataLabelPtr dataLabel
, Label backtrackLocation
)
285 : dataLabel(dataLabel
)
286 , backtrackLocation(backtrackLocation
)
291 struct TermGenerationState
{
292 TermGenerationState(PatternDisjunction
* disjunction
, unsigned checkedTotal
)
293 : disjunction(disjunction
)
294 , checkedTotal(checkedTotal
)
298 void resetAlternative()
300 isBackTrackGenerated
= false;
303 bool alternativeValid()
305 return alt
< disjunction
->m_alternatives
.size();
307 void nextAlternative()
311 PatternAlternative
* alternative()
313 return disjunction
->m_alternatives
[alt
];
318 ASSERT(alternativeValid());
323 ASSERT(alternativeValid());
324 return t
< alternative()->m_terms
.size();
328 ASSERT(alternativeValid());
333 ASSERT(alternativeValid());
334 return alternative()->m_terms
[t
];
337 PatternTerm
& lookaheadTerm()
339 ASSERT(alternativeValid());
340 ASSERT((t
+ 1) < alternative()->m_terms
.size());
341 return alternative()->m_terms
[t
+ 1];
343 bool isSinglePatternCharacterLookaheadTerm()
345 ASSERT(alternativeValid());
346 return ((t
+ 1) < alternative()->m_terms
.size())
347 && (lookaheadTerm().type
== PatternTerm::TypePatternCharacter
)
348 && (lookaheadTerm().quantityType
== QuantifierFixedCount
)
349 && (lookaheadTerm().quantityCount
== 1);
354 return term().inputPosition
- checkedTotal
;
357 void jumpToBacktrack(Jump jump
, MacroAssembler
* masm
)
359 if (isBackTrackGenerated
)
360 jump
.linkTo(backtrackLabel
, masm
);
362 backTrackJumps
.append(jump
);
364 void jumpToBacktrack(JumpList
& jumps
, MacroAssembler
* masm
)
366 if (isBackTrackGenerated
)
367 jumps
.linkTo(backtrackLabel
, masm
);
369 backTrackJumps
.append(jumps
);
371 bool plantJumpToBacktrackIfExists(MacroAssembler
* masm
)
373 if (isBackTrackGenerated
) {
374 masm
->jump(backtrackLabel
);
379 void addBacktrackJump(Jump jump
)
381 backTrackJumps
.append(jump
);
383 void setBacktrackGenerated(Label label
)
385 isBackTrackGenerated
= true;
386 backtrackLabel
= label
;
388 void linkAlternativeBacktracks(MacroAssembler
* masm
)
390 isBackTrackGenerated
= false;
391 backTrackJumps
.link(masm
);
393 void linkAlternativeBacktracksTo(Label label
, MacroAssembler
* masm
)
395 isBackTrackGenerated
= false;
396 backTrackJumps
.linkTo(label
, masm
);
398 void propagateBacktrackingFrom(TermGenerationState
& nestedParenthesesState
, MacroAssembler
* masm
)
400 jumpToBacktrack(nestedParenthesesState
.backTrackJumps
, masm
);
401 if (nestedParenthesesState
.isBackTrackGenerated
)
402 setBacktrackGenerated(nestedParenthesesState
.backtrackLabel
);
405 PatternDisjunction
* disjunction
;
410 JumpList backTrackJumps
;
411 Label backtrackLabel
;
412 bool isBackTrackGenerated
;
415 void generateAssertionBOL(TermGenerationState
& state
)
417 PatternTerm
& term
= state
.term();
419 if (m_pattern
.m_multiline
) {
420 const RegisterID character
= regT0
;
423 if (!term
.inputPosition
)
424 matchDest
.append(branch32(Equal
, index
, Imm32(state
.checkedTotal
)));
426 readCharacter(state
.inputOffset() - 1, character
);
427 matchCharacterClass(character
, matchDest
, m_pattern
.newlineCharacterClass());
428 state
.jumpToBacktrack(jump(), this);
430 matchDest
.link(this);
432 // Erk, really should poison out these alternatives early. :-/
433 if (term
.inputPosition
)
434 state
.jumpToBacktrack(jump(), this);
436 state
.jumpToBacktrack(branch32(NotEqual
, index
, Imm32(state
.checkedTotal
)), this);
440 void generateAssertionEOL(TermGenerationState
& state
)
442 PatternTerm
& term
= state
.term();
444 if (m_pattern
.m_multiline
) {
445 const RegisterID character
= regT0
;
448 if (term
.inputPosition
== state
.checkedTotal
)
449 matchDest
.append(atEndOfInput());
451 readCharacter(state
.inputOffset(), character
);
452 matchCharacterClass(character
, matchDest
, m_pattern
.newlineCharacterClass());
453 state
.jumpToBacktrack(jump(), this);
455 matchDest
.link(this);
457 if (term
.inputPosition
== state
.checkedTotal
)
458 state
.jumpToBacktrack(notAtEndOfInput(), this);
459 // Erk, really should poison out these alternatives early. :-/
461 state
.jumpToBacktrack(jump(), this);
465 // Also falls though on nextIsNotWordChar.
466 void matchAssertionWordchar(TermGenerationState
& state
, JumpList
& nextIsWordChar
, JumpList
& nextIsNotWordChar
)
468 const RegisterID character
= regT0
;
469 PatternTerm
& term
= state
.term();
471 if (term
.inputPosition
== state
.checkedTotal
)
472 nextIsNotWordChar
.append(atEndOfInput());
474 readCharacter(state
.inputOffset(), character
);
475 matchCharacterClass(character
, nextIsWordChar
, m_pattern
.wordcharCharacterClass());
478 void generateAssertionWordBoundary(TermGenerationState
& state
)
480 const RegisterID character
= regT0
;
481 PatternTerm
& term
= state
.term();
485 if (!term
.inputPosition
)
486 atBegin
= branch32(Equal
, index
, Imm32(state
.checkedTotal
));
487 readCharacter(state
.inputOffset() - 1, character
);
488 matchCharacterClass(character
, matchDest
, m_pattern
.wordcharCharacterClass());
489 if (!term
.inputPosition
)
492 // We fall through to here if the last character was not a wordchar.
493 JumpList nonWordCharThenWordChar
;
494 JumpList nonWordCharThenNonWordChar
;
495 if (term
.invertOrCapture
) {
496 matchAssertionWordchar(state
, nonWordCharThenNonWordChar
, nonWordCharThenWordChar
);
497 nonWordCharThenWordChar
.append(jump());
499 matchAssertionWordchar(state
, nonWordCharThenWordChar
, nonWordCharThenNonWordChar
);
500 nonWordCharThenNonWordChar
.append(jump());
502 state
.jumpToBacktrack(nonWordCharThenNonWordChar
, this);
504 // We jump here if the last character was a wordchar.
505 matchDest
.link(this);
506 JumpList wordCharThenWordChar
;
507 JumpList wordCharThenNonWordChar
;
508 if (term
.invertOrCapture
) {
509 matchAssertionWordchar(state
, wordCharThenNonWordChar
, wordCharThenWordChar
);
510 wordCharThenWordChar
.append(jump());
512 matchAssertionWordchar(state
, wordCharThenWordChar
, wordCharThenNonWordChar
);
513 // This can fall-though!
516 state
.jumpToBacktrack(wordCharThenWordChar
, this);
518 nonWordCharThenWordChar
.link(this);
519 wordCharThenNonWordChar
.link(this);
522 void generatePatternCharacterSingle(TermGenerationState
& state
)
524 const RegisterID character
= regT0
;
525 UChar ch
= state
.term().patternCharacter
;
527 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
528 readCharacter(state
.inputOffset(), character
);
529 or32(Imm32(32), character
);
530 state
.jumpToBacktrack(branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))), this);
532 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
533 state
.jumpToBacktrack(jumpIfCharNotEquals(ch
, state
.inputOffset()), this);
537 void generatePatternCharacterPair(TermGenerationState
& state
)
539 const RegisterID character
= regT0
;
540 UChar ch1
= state
.term().patternCharacter
;
541 UChar ch2
= state
.lookaheadTerm().patternCharacter
;
544 int chPair
= ch1
| (ch2
<< 16);
546 if (m_pattern
.m_ignoreCase
) {
547 if (isASCIIAlpha(ch1
))
549 if (isASCIIAlpha(ch2
))
554 load32(BaseIndex(input
, index
, TimesTwo
, state
.inputOffset() * sizeof(UChar
)), character
);
555 or32(Imm32(mask
), character
);
556 state
.jumpToBacktrack(branch32(NotEqual
, character
, Imm32(chPair
| mask
)), this);
558 state
.jumpToBacktrack(branch32(NotEqual
, BaseIndex(input
, index
, TimesTwo
, state
.inputOffset() * sizeof(UChar
)), Imm32(chPair
)), this);
561 void generatePatternCharacterFixed(TermGenerationState
& state
)
563 const RegisterID character
= regT0
;
564 const RegisterID countRegister
= regT1
;
565 PatternTerm
& term
= state
.term();
566 UChar ch
= term
.patternCharacter
;
568 move(index
, countRegister
);
569 sub32(Imm32(term
.quantityCount
), countRegister
);
572 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
573 load16(BaseIndex(input
, countRegister
, TimesTwo
, (state
.inputOffset() + term
.quantityCount
) * sizeof(UChar
)), character
);
574 or32(Imm32(32), character
);
575 state
.jumpToBacktrack(branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))), this);
577 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
578 state
.jumpToBacktrack(branch16(NotEqual
, BaseIndex(input
, countRegister
, TimesTwo
, (state
.inputOffset() + term
.quantityCount
) * sizeof(UChar
)), Imm32(ch
)), this);
580 add32(Imm32(1), countRegister
);
581 branch32(NotEqual
, countRegister
, index
).linkTo(loop
, this);
584 void generatePatternCharacterGreedy(TermGenerationState
& state
)
586 const RegisterID character
= regT0
;
587 const RegisterID countRegister
= regT1
;
588 PatternTerm
& term
= state
.term();
589 UChar ch
= term
.patternCharacter
;
591 move(Imm32(0), countRegister
);
595 failures
.append(atEndOfInput());
596 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
597 readCharacter(state
.inputOffset(), character
);
598 or32(Imm32(32), character
);
599 failures
.append(branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))));
601 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
602 failures
.append(jumpIfCharNotEquals(ch
, state
.inputOffset()));
604 add32(Imm32(1), countRegister
);
605 add32(Imm32(1), index
);
606 branch32(NotEqual
, countRegister
, Imm32(term
.quantityCount
)).linkTo(loop
, this);
607 failures
.append(jump());
609 Label
backtrackBegin(this);
610 loadFromFrame(term
.frameLocation
, countRegister
);
611 state
.jumpToBacktrack(branchTest32(Zero
, countRegister
), this);
612 sub32(Imm32(1), countRegister
);
613 sub32(Imm32(1), index
);
617 storeToFrame(countRegister
, term
.frameLocation
);
619 state
.setBacktrackGenerated(backtrackBegin
);
622 void generatePatternCharacterNonGreedy(TermGenerationState
& state
)
624 const RegisterID character
= regT0
;
625 const RegisterID countRegister
= regT1
;
626 PatternTerm
& term
= state
.term();
627 UChar ch
= term
.patternCharacter
;
629 move(Imm32(0), countRegister
);
631 Jump firstTimeDoNothing
= jump();
633 Label
hardFail(this);
634 sub32(countRegister
, index
);
635 state
.jumpToBacktrack(jump(), this);
637 Label
backtrackBegin(this);
638 loadFromFrame(term
.frameLocation
, countRegister
);
640 atEndOfInput().linkTo(hardFail
, this);
641 branch32(Equal
, countRegister
, Imm32(term
.quantityCount
), hardFail
);
642 if (m_pattern
.m_ignoreCase
&& isASCIIAlpha(ch
)) {
643 readCharacter(state
.inputOffset(), character
);
644 or32(Imm32(32), character
);
645 branch32(NotEqual
, character
, Imm32(Unicode::toLower(ch
))).linkTo(hardFail
, this);
647 ASSERT(!m_pattern
.m_ignoreCase
|| (Unicode::toLower(ch
) == Unicode::toUpper(ch
)));
648 jumpIfCharNotEquals(ch
, state
.inputOffset()).linkTo(hardFail
, this);
651 add32(Imm32(1), countRegister
);
652 add32(Imm32(1), index
);
654 firstTimeDoNothing
.link(this);
655 storeToFrame(countRegister
, term
.frameLocation
);
657 state
.setBacktrackGenerated(backtrackBegin
);
660 void generateCharacterClassSingle(TermGenerationState
& state
)
662 const RegisterID character
= regT0
;
663 PatternTerm
& term
= state
.term();
666 readCharacter(state
.inputOffset(), character
);
667 matchCharacterClass(character
, matchDest
, term
.characterClass
);
669 if (term
.invertOrCapture
)
670 state
.jumpToBacktrack(matchDest
, this);
672 state
.jumpToBacktrack(jump(), this);
673 matchDest
.link(this);
677 void generateCharacterClassFixed(TermGenerationState
& state
)
679 const RegisterID character
= regT0
;
680 const RegisterID countRegister
= regT1
;
681 PatternTerm
& term
= state
.term();
683 move(index
, countRegister
);
684 sub32(Imm32(term
.quantityCount
), countRegister
);
688 load16(BaseIndex(input
, countRegister
, TimesTwo
, (state
.inputOffset() + term
.quantityCount
) * sizeof(UChar
)), character
);
689 matchCharacterClass(character
, matchDest
, term
.characterClass
);
691 if (term
.invertOrCapture
)
692 state
.jumpToBacktrack(matchDest
, this);
694 state
.jumpToBacktrack(jump(), this);
695 matchDest
.link(this);
698 add32(Imm32(1), countRegister
);
699 branch32(NotEqual
, countRegister
, index
).linkTo(loop
, this);
702 void generateCharacterClassGreedy(TermGenerationState
& state
)
704 const RegisterID character
= regT0
;
705 const RegisterID countRegister
= regT1
;
706 PatternTerm
& term
= state
.term();
708 move(Imm32(0), countRegister
);
712 failures
.append(atEndOfInput());
714 if (term
.invertOrCapture
) {
715 readCharacter(state
.inputOffset(), character
);
716 matchCharacterClass(character
, failures
, term
.characterClass
);
719 readCharacter(state
.inputOffset(), character
);
720 matchCharacterClass(character
, matchDest
, term
.characterClass
);
721 failures
.append(jump());
722 matchDest
.link(this);
725 add32(Imm32(1), countRegister
);
726 add32(Imm32(1), index
);
727 branch32(NotEqual
, countRegister
, Imm32(term
.quantityCount
)).linkTo(loop
, this);
728 failures
.append(jump());
730 Label
backtrackBegin(this);
731 loadFromFrame(term
.frameLocation
, countRegister
);
732 state
.jumpToBacktrack(branchTest32(Zero
, countRegister
), this);
733 sub32(Imm32(1), countRegister
);
734 sub32(Imm32(1), index
);
738 storeToFrame(countRegister
, term
.frameLocation
);
740 state
.setBacktrackGenerated(backtrackBegin
);
743 void generateCharacterClassNonGreedy(TermGenerationState
& state
)
745 const RegisterID character
= regT0
;
746 const RegisterID countRegister
= regT1
;
747 PatternTerm
& term
= state
.term();
749 move(Imm32(0), countRegister
);
751 Jump firstTimeDoNothing
= jump();
753 Label
hardFail(this);
754 sub32(countRegister
, index
);
755 state
.jumpToBacktrack(jump(), this);
757 Label
backtrackBegin(this);
758 loadFromFrame(term
.frameLocation
, countRegister
);
760 atEndOfInput().linkTo(hardFail
, this);
761 branch32(Equal
, countRegister
, Imm32(term
.quantityCount
), hardFail
);
764 readCharacter(state
.inputOffset(), character
);
765 matchCharacterClass(character
, matchDest
, term
.characterClass
);
767 if (term
.invertOrCapture
)
768 matchDest
.linkTo(hardFail
, this);
771 matchDest
.link(this);
774 add32(Imm32(1), countRegister
);
775 add32(Imm32(1), index
);
777 firstTimeDoNothing
.link(this);
778 storeToFrame(countRegister
, term
.frameLocation
);
780 state
.setBacktrackGenerated(backtrackBegin
);
783 void generateParenthesesDisjunction(PatternTerm
& parenthesesTerm
, TermGenerationState
& state
, unsigned alternativeFrameLocation
)
785 ASSERT((parenthesesTerm
.type
== PatternTerm::TypeParenthesesSubpattern
) || (parenthesesTerm
.type
== PatternTerm::TypeParentheticalAssertion
));
786 ASSERT(parenthesesTerm
.quantityCount
== 1);
788 PatternDisjunction
* disjunction
= parenthesesTerm
.parentheses
.disjunction
;
789 unsigned preCheckedCount
= ((parenthesesTerm
.quantityType
== QuantifierFixedCount
) && (parenthesesTerm
.type
!= PatternTerm::TypeParentheticalAssertion
)) ? disjunction
->m_minimumSize
: 0;
791 if (disjunction
->m_alternatives
.size() == 1) {
792 state
.resetAlternative();
793 ASSERT(state
.alternativeValid());
794 PatternAlternative
* alternative
= state
.alternative();
795 optimizeAlternative(alternative
);
797 int countToCheck
= alternative
->m_minimumSize
- preCheckedCount
;
799 ASSERT((parenthesesTerm
.type
== PatternTerm::TypeParentheticalAssertion
) || (parenthesesTerm
.quantityType
!= QuantifierFixedCount
));
801 // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists'
802 // will be forced to always trampoline into here, just to decrement the index.
806 Label
backtrackBegin(this);
807 sub32(Imm32(countToCheck
), index
);
808 state
.addBacktrackJump(jump());
812 state
.setBacktrackGenerated(backtrackBegin
);
814 state
.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck
), this);
815 state
.checkedTotal
+= countToCheck
;
818 for (state
.resetTerm(); state
.termValid(); state
.nextTerm())
821 state
.checkedTotal
-= countToCheck
;
825 for (state
.resetAlternative(); state
.alternativeValid(); state
.nextAlternative()) {
827 PatternAlternative
* alternative
= state
.alternative();
828 optimizeAlternative(alternative
);
830 ASSERT(alternative
->m_minimumSize
>= preCheckedCount
);
831 int countToCheck
= alternative
->m_minimumSize
- preCheckedCount
;
833 state
.addBacktrackJump(jumpIfNoAvailableInput(countToCheck
));
834 state
.checkedTotal
+= countToCheck
;
837 for (state
.resetTerm(); state
.termValid(); state
.nextTerm())
840 // Matched an alternative.
841 DataLabelPtr dataLabel
= storeToFrameWithPatch(alternativeFrameLocation
);
842 successes
.append(jump());
844 // Alternative did not match.
845 Label
backtrackLocation(this);
847 // Can we backtrack the alternative? - if so, do so. If not, just fall through to the next one.
848 state
.plantJumpToBacktrackIfExists(this);
850 state
.linkAlternativeBacktracks(this);
853 sub32(Imm32(countToCheck
), index
);
854 state
.checkedTotal
-= countToCheck
;
857 m_backtrackRecords
.append(AlternativeBacktrackRecord(dataLabel
, backtrackLocation
));
859 // We fall through to here when the last alternative fails.
860 // Add a backtrack out of here for the parenthese handling code to link up.
861 state
.addBacktrackJump(jump());
863 // Generate a trampoline for the parens code to backtrack to, to retry the
865 state
.setBacktrackGenerated(label());
866 loadFromFrameAndJump(alternativeFrameLocation
);
868 // FIXME: both of the above hooks are a little inefficient, in that you
869 // may end up trampolining here, just to trampoline back out to the
870 // parentheses code, or vice versa. We can probably eliminate a jump
871 // by restructuring, but coding this way for now for simplicity during
874 successes
.link(this);
878 void generateParenthesesSingle(TermGenerationState
& state
)
880 const RegisterID indexTemporary
= regT0
;
881 PatternTerm
& term
= state
.term();
882 PatternDisjunction
* disjunction
= term
.parentheses
.disjunction
;
883 ASSERT(term
.quantityCount
== 1);
885 unsigned preCheckedCount
= ((term
.quantityCount
== 1) && (term
.quantityType
== QuantifierFixedCount
)) ? disjunction
->m_minimumSize
: 0;
887 unsigned parenthesesFrameLocation
= term
.frameLocation
;
888 unsigned alternativeFrameLocation
= parenthesesFrameLocation
;
889 if (term
.quantityType
!= QuantifierFixedCount
)
890 alternativeFrameLocation
+= RegexStackSpaceForBackTrackInfoParenthesesOnce
;
892 // optimized case - no capture & no quantifier can be handled in a light-weight manner.
893 if (!term
.invertOrCapture
&& (term
.quantityType
== QuantifierFixedCount
)) {
894 TermGenerationState
parenthesesState(disjunction
, state
.checkedTotal
);
895 generateParenthesesDisjunction(state
.term(), parenthesesState
, alternativeFrameLocation
);
896 // this expects that any backtracks back out of the parentheses will be in the
897 // parenthesesState's backTrackJumps vector, and that if they need backtracking
898 // they will have set an entry point on the parenthesesState's backtrackLabel.
899 state
.propagateBacktrackingFrom(parenthesesState
, this);
901 Jump nonGreedySkipParentheses
;
902 Label nonGreedyTryParentheses
;
903 if (term
.quantityType
== QuantifierGreedy
)
904 storeToFrame(Imm32(1), parenthesesFrameLocation
);
905 else if (term
.quantityType
== QuantifierNonGreedy
) {
906 storeToFrame(Imm32(0), parenthesesFrameLocation
);
907 nonGreedySkipParentheses
= jump();
908 nonGreedyTryParentheses
= label();
909 storeToFrame(Imm32(1), parenthesesFrameLocation
);
912 // store the match start index
913 if (term
.invertOrCapture
) {
914 int inputOffset
= state
.inputOffset() - preCheckedCount
;
916 move(index
, indexTemporary
);
917 add32(Imm32(inputOffset
), indexTemporary
);
918 store32(indexTemporary
, Address(output
, (term
.parentheses
.subpatternId
<< 1) * sizeof(int)));
920 store32(index
, Address(output
, (term
.parentheses
.subpatternId
<< 1) * sizeof(int)));
923 // generate the body of the parentheses
924 TermGenerationState
parenthesesState(disjunction
, state
.checkedTotal
);
925 generateParenthesesDisjunction(state
.term(), parenthesesState
, alternativeFrameLocation
);
927 // store the match end index
928 if (term
.invertOrCapture
) {
929 int inputOffset
= state
.inputOffset();
931 move(index
, indexTemporary
);
932 add32(Imm32(state
.inputOffset()), indexTemporary
);
933 store32(indexTemporary
, Address(output
, ((term
.parentheses
.subpatternId
<< 1) + 1) * sizeof(int)));
935 store32(index
, Address(output
, ((term
.parentheses
.subpatternId
<< 1) + 1) * sizeof(int)));
937 Jump success
= jump();
939 // A failure AFTER the parens jumps here
940 Label
backtrackFromAfterParens(this);
942 if (term
.quantityType
== QuantifierGreedy
) {
943 // If this is zero we have now tested with both with and without the parens.
944 loadFromFrame(parenthesesFrameLocation
, indexTemporary
);
945 state
.jumpToBacktrack(branchTest32(Zero
, indexTemporary
), this);
946 } else if (term
.quantityType
== QuantifierNonGreedy
) {
947 // If this is zero we have now tested with both with and without the parens.
948 loadFromFrame(parenthesesFrameLocation
, indexTemporary
);
949 branchTest32(Zero
, indexTemporary
).linkTo(nonGreedyTryParentheses
, this);
952 parenthesesState
.plantJumpToBacktrackIfExists(this);
953 // A failure WITHIN the parens jumps here
954 parenthesesState
.linkAlternativeBacktracks(this);
955 if (term
.invertOrCapture
) {
956 store32(Imm32(-1), Address(output
, (term
.parentheses
.subpatternId
<< 1) * sizeof(int)));
957 store32(Imm32(-1), Address(output
, ((term
.parentheses
.subpatternId
<< 1) + 1) * sizeof(int)));
960 if (term
.quantityType
== QuantifierGreedy
)
961 storeToFrame(Imm32(0), parenthesesFrameLocation
);
963 state
.jumpToBacktrack(jump(), this);
965 state
.setBacktrackGenerated(backtrackFromAfterParens
);
966 if (term
.quantityType
== QuantifierNonGreedy
)
967 nonGreedySkipParentheses
.link(this);
972 void generateParentheticalAssertion(TermGenerationState
& state
)
974 PatternTerm
& term
= state
.term();
975 PatternDisjunction
* disjunction
= term
.parentheses
.disjunction
;
976 ASSERT(term
.quantityCount
== 1);
977 ASSERT(term
.quantityType
== QuantifierFixedCount
);
979 unsigned parenthesesFrameLocation
= term
.frameLocation
;
980 unsigned alternativeFrameLocation
= parenthesesFrameLocation
+ RegexStackSpaceForBackTrackInfoParentheticalAssertion
;
982 int countCheckedAfterAssertion
= state
.checkedTotal
- term
.inputPosition
;
984 if (term
.invertOrCapture
) {
986 storeToFrame(index
, parenthesesFrameLocation
);
988 state
.checkedTotal
-= countCheckedAfterAssertion
;
989 if (countCheckedAfterAssertion
)
990 sub32(Imm32(countCheckedAfterAssertion
), index
);
992 TermGenerationState
parenthesesState(disjunction
, state
.checkedTotal
);
993 generateParenthesesDisjunction(state
.term(), parenthesesState
, alternativeFrameLocation
);
994 // Success! - which means - Fail!
995 loadFromFrame(parenthesesFrameLocation
, index
);
996 state
.jumpToBacktrack(jump(), this);
998 // And fail means success.
999 parenthesesState
.linkAlternativeBacktracks(this);
1000 loadFromFrame(parenthesesFrameLocation
, index
);
1002 state
.checkedTotal
+= countCheckedAfterAssertion
;
1005 storeToFrame(index
, parenthesesFrameLocation
);
1007 state
.checkedTotal
-= countCheckedAfterAssertion
;
1008 if (countCheckedAfterAssertion
)
1009 sub32(Imm32(countCheckedAfterAssertion
), index
);
1011 TermGenerationState
parenthesesState(disjunction
, state
.checkedTotal
);
1012 generateParenthesesDisjunction(state
.term(), parenthesesState
, alternativeFrameLocation
);
1013 // Success! - which means - Success!
1014 loadFromFrame(parenthesesFrameLocation
, index
);
1015 Jump success
= jump();
1017 parenthesesState
.linkAlternativeBacktracks(this);
1018 loadFromFrame(parenthesesFrameLocation
, index
);
1019 state
.jumpToBacktrack(jump(), this);
1023 state
.checkedTotal
+= countCheckedAfterAssertion
;
1027 void generateTerm(TermGenerationState
& state
)
1029 PatternTerm
& term
= state
.term();
1031 switch (term
.type
) {
1032 case PatternTerm::TypeAssertionBOL
:
1033 generateAssertionBOL(state
);
1036 case PatternTerm::TypeAssertionEOL
:
1037 generateAssertionEOL(state
);
1040 case PatternTerm::TypeAssertionWordBoundary
:
1041 generateAssertionWordBoundary(state
);
1044 case PatternTerm::TypePatternCharacter
:
1045 switch (term
.quantityType
) {
1046 case QuantifierFixedCount
:
1047 if (term
.quantityCount
== 1) {
1048 if (state
.isSinglePatternCharacterLookaheadTerm() && (state
.lookaheadTerm().inputPosition
== (term
.inputPosition
+ 1))) {
1049 generatePatternCharacterPair(state
);
1052 generatePatternCharacterSingle(state
);
1054 generatePatternCharacterFixed(state
);
1056 case QuantifierGreedy
:
1057 generatePatternCharacterGreedy(state
);
1059 case QuantifierNonGreedy
:
1060 generatePatternCharacterNonGreedy(state
);
1065 case PatternTerm::TypeCharacterClass
:
1066 switch (term
.quantityType
) {
1067 case QuantifierFixedCount
:
1068 if (term
.quantityCount
== 1)
1069 generateCharacterClassSingle(state
);
1071 generateCharacterClassFixed(state
);
1073 case QuantifierGreedy
:
1074 generateCharacterClassGreedy(state
);
1076 case QuantifierNonGreedy
:
1077 generateCharacterClassNonGreedy(state
);
1082 case PatternTerm::TypeBackReference
:
1083 m_generationFailed
= true;
1086 case PatternTerm::TypeForwardReference
:
1089 case PatternTerm::TypeParenthesesSubpattern
:
1090 if ((term
.quantityCount
== 1) && !term
.parentheses
.isCopy
)
1091 generateParenthesesSingle(state
);
1093 m_generationFailed
= true;
1096 case PatternTerm::TypeParentheticalAssertion
:
1097 generateParentheticalAssertion(state
);
1102 void generateDisjunction(PatternDisjunction
* disjunction
)
1104 TermGenerationState
state(disjunction
, 0);
1105 state
.resetAlternative();
1107 // Plant a check to see if there is sufficient input available to run the first alternative.
1108 // Jumping back to the label 'firstAlternative' will get to this check, jumping to
1109 // 'firstAlternativeInputChecked' will jump directly to matching the alternative having
1110 // skipped this check.
1112 Label
firstAlternative(this);
1114 // check availability for the next alternative
1115 int countCheckedForCurrentAlternative
= 0;
1116 int countToCheckForFirstAlternative
= 0;
1117 bool hasShorterAlternatives
= false;
1118 JumpList notEnoughInputForPreviousAlternative
;
1120 if (state
.alternativeValid()) {
1121 PatternAlternative
* alternative
= state
.alternative();
1122 countToCheckForFirstAlternative
= alternative
->m_minimumSize
;
1123 state
.checkedTotal
+= countToCheckForFirstAlternative
;
1124 if (countToCheckForFirstAlternative
)
1125 notEnoughInputForPreviousAlternative
.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative
));
1126 countCheckedForCurrentAlternative
= countToCheckForFirstAlternative
;
1129 Label
firstAlternativeInputChecked(this);
1131 while (state
.alternativeValid()) {
1132 // Track whether any alternatives are shorter than the first one.
1133 hasShorterAlternatives
= hasShorterAlternatives
|| (countCheckedForCurrentAlternative
< countToCheckForFirstAlternative
);
1135 PatternAlternative
* alternative
= state
.alternative();
1136 optimizeAlternative(alternative
);
1138 for (state
.resetTerm(); state
.termValid(); state
.nextTerm())
1139 generateTerm(state
);
1141 // If we get here, the alternative matched.
1142 if (m_pattern
.m_body
->m_callFrameSize
)
1143 addPtr(Imm32(m_pattern
.m_body
->m_callFrameSize
* sizeof(void*)), stackPointerRegister
);
1145 ASSERT(index
!= returnRegister
);
1146 if (m_pattern
.m_body
->m_hasFixedSize
) {
1147 move(index
, returnRegister
);
1148 if (alternative
->m_minimumSize
)
1149 sub32(Imm32(alternative
->m_minimumSize
), returnRegister
);
1151 pop(returnRegister
);
1152 store32(index
, Address(output
, 4));
1153 store32(returnRegister
, output
);
1157 state
.nextAlternative();
1159 // if there are any more alternatives, plant the check for input before looping.
1160 if (state
.alternativeValid()) {
1161 PatternAlternative
* nextAlternative
= state
.alternative();
1162 int countToCheckForNextAlternative
= nextAlternative
->m_minimumSize
;
1164 if (countCheckedForCurrentAlternative
> countToCheckForNextAlternative
) { // CASE 1: current alternative was longer than the next one.
1165 // If we get here, there the last input checked failed.
1166 notEnoughInputForPreviousAlternative
.link(this);
1168 // Check if sufficent input available to run the next alternative
1169 notEnoughInputForPreviousAlternative
.append(jumpIfNoAvailableInput(countToCheckForNextAlternative
- countCheckedForCurrentAlternative
));
1170 // We are now in the correct state to enter the next alternative; this add is only required
1171 // to mirror and revert operation of the sub32, just below.
1172 add32(Imm32(countCheckedForCurrentAlternative
- countToCheckForNextAlternative
), index
);
1174 // If we get here, there the last input checked passed.
1175 state
.linkAlternativeBacktracks(this);
1176 // No need to check if we can run the next alternative, since it is shorter -
1177 // just update index.
1178 sub32(Imm32(countCheckedForCurrentAlternative
- countToCheckForNextAlternative
), index
);
1179 } else if (countCheckedForCurrentAlternative
< countToCheckForNextAlternative
) { // CASE 2: next alternative is longer than the current one.
1180 // If we get here, there the last input checked failed.
1181 // If there is insufficient input to run the current alternative, and the next alternative is longer,
1182 // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
1184 notEnoughInputForPreviousAlternative
.link(this);
1185 add32(Imm32(countToCheckForNextAlternative
- countCheckedForCurrentAlternative
), index
);
1186 notEnoughInputForPreviousAlternative
.append(jump());
1188 // The next alternative is longer than the current one; check the difference.
1189 state
.linkAlternativeBacktracks(this);
1190 notEnoughInputForPreviousAlternative
.append(jumpIfNoAvailableInput(countToCheckForNextAlternative
- countCheckedForCurrentAlternative
));
1191 } else { // CASE 3: Both alternatives are the same length.
1192 ASSERT(countCheckedForCurrentAlternative
== countToCheckForNextAlternative
);
1194 // If the next alterative is the same length as this one, then no need to check the input -
1195 // if there was sufficent input to run the current alternative then there is sufficient
1196 // input to run the next one; if not, there isn't.
1197 state
.linkAlternativeBacktracks(this);
1200 state
.checkedTotal
-= countCheckedForCurrentAlternative
;
1201 countCheckedForCurrentAlternative
= countToCheckForNextAlternative
;
1202 state
.checkedTotal
+= countCheckedForCurrentAlternative
;
1206 // If we get here, all Alternatives failed...
1208 state
.checkedTotal
-= countCheckedForCurrentAlternative
;
1210 // How much more input need there be to be able to retry from the first alternative?
1212 // /yarr_jit/ or /wrec|pcre/
1213 // In these examples we need check for one more input before looping.
1215 // In this case we need check for 5 more input to loop (+4 to allow for the first alterative
1216 // being four longer than the last alternative checked, and another +1 to effectively move
1217 // the start position along by one).
1218 // /yarr|rules/ or /wrec|notsomuch/
1219 // In these examples, provided that there was sufficient input to have just been matching for
1220 // the second alternative we can loop without checking for available input (since the second
1221 // alternative is longer than the first). In the latter example we need to decrement index
1222 // (by 4) so the start position is only progressed by 1 from the last iteration.
1223 int incrementForNextIter
= (countToCheckForFirstAlternative
- countCheckedForCurrentAlternative
) + 1;
1225 // First, deal with the cases where there was sufficient input to try the last alternative.
1226 if (incrementForNextIter
> 0) // We need to check for more input anyway, fall through to the checking below.
1227 state
.linkAlternativeBacktracks(this);
1228 else if (m_pattern
.m_body
->m_hasFixedSize
&& !incrementForNextIter
) // No need to update anything, link these backtracks straight to the to pof the loop!
1229 state
.linkAlternativeBacktracksTo(firstAlternativeInputChecked
, this);
1230 else { // no need to check the input, but we do have some bookkeeping to do first.
1231 state
.linkAlternativeBacktracks(this);
1233 // Where necessary update our preserved start position.
1234 if (!m_pattern
.m_body
->m_hasFixedSize
) {
1236 sub32(Imm32(countCheckedForCurrentAlternative
- 1), regT0
);
1237 poke(regT0
, m_pattern
.m_body
->m_callFrameSize
);
1240 // Update index if necessary, and loop (without checking).
1241 if (incrementForNextIter
)
1242 add32(Imm32(incrementForNextIter
), index
);
1243 jump().linkTo(firstAlternativeInputChecked
, this);
1246 notEnoughInputForPreviousAlternative
.link(this);
1247 // Update our idea of the start position, if we're tracking this.
1248 if (!m_pattern
.m_body
->m_hasFixedSize
) {
1249 if (countCheckedForCurrentAlternative
- 1) {
1251 sub32(Imm32(countCheckedForCurrentAlternative
- 1), regT0
);
1252 poke(regT0
, m_pattern
.m_body
->m_callFrameSize
);
1254 poke(index
, m_pattern
.m_body
->m_callFrameSize
);
1256 // Check if there is sufficent input to run the first alternative again.
1257 jumpIfAvailableInput(incrementForNextIter
).linkTo(firstAlternativeInputChecked
, this);
1258 // No - insufficent input to run the first alteranative, are there any other alternatives we
1259 // might need to check? If so, the last check will have left the index incremented by
1260 // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
1261 // LESS input is available, to have the effect of just progressing the start position by 1
1262 // from the last iteration. If this check passes we can just jump up to the check associated
1263 // with the first alternative in the loop. This is a bit sad, since we'll end up trying the
1264 // first alternative again, and this check will fail (otherwise the check planted just above
1265 // here would have passed). This is a bit sad, however it saves trying to do something more
1266 // complex here in compilation, and in the common case we should end up coallescing the checks.
1268 // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
1269 // of the minimum-alterantive-lengths. E.g. if I have two alternatives of length 200 and 150,
1270 // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
1271 // is sufficient input to run either alternative (constantly failing). If there had been only
1272 // one alternative, or if the shorter alternative had come first, we would have terminated
1274 if (hasShorterAlternatives
)
1275 jumpIfAvailableInput(-countToCheckForFirstAlternative
).linkTo(firstAlternative
, this);
1276 // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
1277 // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ...
1278 // but since we're about to return a failure this doesn't really matter!)
1280 unsigned frameSize
= m_pattern
.m_body
->m_callFrameSize
;
1281 if (!m_pattern
.m_body
->m_hasFixedSize
)
1284 addPtr(Imm32(frameSize
* sizeof(void*)), stackPointerRegister
);
1286 move(Imm32(-1), returnRegister
);
1291 void generateEnter()
1293 #if PLATFORM(X86_64)
1295 move(stackPointerRegister
, X86::ebp
);
1299 move(stackPointerRegister
, X86::ebp
);
1300 // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
1304 // load output into edi (2 = saved ebp + return address).
1306 loadPtr(Address(X86::ebp
, 2 * sizeof(void*)), input
);
1307 loadPtr(Address(X86::ebp
, 3 * sizeof(void*)), index
);
1308 loadPtr(Address(X86::ebp
, 4 * sizeof(void*)), length
);
1309 loadPtr(Address(X86::ebp
, 5 * sizeof(void*)), output
);
1311 loadPtr(Address(X86::ebp
, 2 * sizeof(void*)), output
);
1313 #elif PLATFORM_ARM_ARCH(7)
1317 move(ARM::r3
, output
);
1321 void generateReturn()
1323 #if PLATFORM(X86_64)
1331 #elif PLATFORM_ARM_ARCH(7)
1340 RegexGenerator(RegexPattern
& pattern
)
1341 : m_pattern(pattern
)
1342 , m_generationFailed(false)
1350 // TODO: do I really want this on the stack?
1351 if (!m_pattern
.m_body
->m_hasFixedSize
)
1354 if (m_pattern
.m_body
->m_callFrameSize
)
1355 subPtr(Imm32(m_pattern
.m_body
->m_callFrameSize
* sizeof(void*)), stackPointerRegister
);
1357 generateDisjunction(m_pattern
.m_body
);
1360 void compile(JSGlobalData
* globalData
, RegexCodeBlock
& jitObject
)
1364 LinkBuffer
patchBuffer(this, globalData
->executableAllocator
.poolForSize(size()));
1366 for (unsigned i
= 0; i
< m_backtrackRecords
.size(); ++i
)
1367 patchBuffer
.patch(m_backtrackRecords
[i
].dataLabel
, patchBuffer
.locationOf(m_backtrackRecords
[i
].backtrackLocation
));
1369 jitObject
.set(patchBuffer
.finalizeCode());
1372 bool generationFailed()
1374 return m_generationFailed
;
1378 RegexPattern
& m_pattern
;
1379 Vector
<AlternativeBacktrackRecord
> m_backtrackRecords
;
1380 bool m_generationFailed
;
1383 void jitCompileRegex(JSGlobalData
* globalData
, RegexCodeBlock
& jitObject
, const UString
& patternString
, unsigned& numSubpatterns
, const char*& error
, bool ignoreCase
, bool multiline
)
1385 RegexPattern
pattern(ignoreCase
, multiline
);
1387 if ((error
= compileRegex(patternString
, pattern
)))
1390 numSubpatterns
= pattern
.m_numSubpatterns
;
1392 RegexGenerator
generator(pattern
);
1393 generator
.compile(globalData
, jitObject
);
1395 if (generator
.generationFailed()) {
1396 JSRegExpIgnoreCaseOption ignoreCaseOption
= ignoreCase
? JSRegExpIgnoreCase
: JSRegExpDoNotIgnoreCase
;
1397 JSRegExpMultilineOption multilineOption
= multiline
? JSRegExpMultiline
: JSRegExpSingleLine
;
1398 jitObject
.setFallback(jsRegExpCompile(reinterpret_cast<const UChar
*>(patternString
.data()), patternString
.size(), ignoreCaseOption
, multilineOption
, &numSubpatterns
, &error
));
1402 int executeRegex(RegexCodeBlock
& jitObject
, const UChar
* input
, unsigned start
, unsigned length
, int* output
, int outputArraySize
)
1404 if (JSRegExp
* fallback
= jitObject
.getFallback())
1405 return (jsRegExpExecute(fallback
, input
, length
, start
, output
, outputArraySize
) < 0) ? -1 : output
[0];
1407 return jitObject
.execute(input
, start
, length
, output
);