]> git.saurik.com Git - apple/javascriptcore.git/blob - yarr/RegexJIT.cpp
8e3640ac0d780c35c82d7223a81e12c4504f7db0
[apple/javascriptcore.git] / yarr / RegexJIT.cpp
1 /*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "config.h"
27 #include "RegexJIT.h"
28
29 #include "ASCIICType.h"
30 #include "JSGlobalData.h"
31 #include "LinkBuffer.h"
32 #include "MacroAssembler.h"
33 #include "RegexCompiler.h"
34
35 #include "pcre.h" // temporary, remove when fallback is removed.
36
37 #if ENABLE(YARR_JIT)
38
39 using namespace WTF;
40
41 namespace JSC { namespace Yarr {
42
43
44 class RegexGenerator : private MacroAssembler {
45 friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
46
47 #if PLATFORM_ARM_ARCH(7)
48 static const RegisterID input = ARM::r0;
49 static const RegisterID index = ARM::r1;
50 static const RegisterID length = ARM::r2;
51
52 static const RegisterID output = ARM::r4;
53 static const RegisterID regT0 = ARM::r5;
54 static const RegisterID regT1 = ARM::r6;
55
56 static const RegisterID returnRegister = ARM::r0;
57 #endif
58 #if PLATFORM(X86)
59 static const RegisterID input = X86::eax;
60 static const RegisterID index = X86::edx;
61 static const RegisterID length = X86::ecx;
62 static const RegisterID output = X86::edi;
63
64 static const RegisterID regT0 = X86::ebx;
65 static const RegisterID regT1 = X86::esi;
66
67 static const RegisterID returnRegister = X86::eax;
68 #endif
69 #if PLATFORM(X86_64)
70 static const RegisterID input = X86::edi;
71 static const RegisterID index = X86::esi;
72 static const RegisterID length = X86::edx;
73 static const RegisterID output = X86::ecx;
74
75 static const RegisterID regT0 = X86::eax;
76 static const RegisterID regT1 = X86::ebx;
77
78 static const RegisterID returnRegister = X86::eax;
79 #endif
80
81 void optimizeAlternative(PatternAlternative* alternative)
82 {
83 if (!alternative->m_terms.size())
84 return;
85
86 for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
87 PatternTerm& term = alternative->m_terms[i];
88 PatternTerm& nextTerm = alternative->m_terms[i + 1];
89
90 if ((term.type == PatternTerm::TypeCharacterClass)
91 && (term.quantityType == QuantifierFixedCount)
92 && (nextTerm.type == PatternTerm::TypePatternCharacter)
93 && (nextTerm.quantityType == QuantifierFixedCount)) {
94 PatternTerm termCopy = term;
95 alternative->m_terms[i] = nextTerm;
96 alternative->m_terms[i + 1] = termCopy;
97 }
98 }
99 }
100
101 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
102 {
103 do {
104 // pick which range we're going to generate
105 int which = count >> 1;
106 char lo = ranges[which].begin;
107 char hi = ranges[which].end;
108
109 // check if there are any ranges or matches below lo. If not, just jl to failure -
110 // if there is anything else to check, check that first, if it falls through jmp to failure.
111 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
112 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
113
114 // generate code for all ranges before this one
115 if (which)
116 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
117
118 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
119 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
120 ++*matchIndex;
121 }
122 failures.append(jump());
123
124 loOrAbove.link(this);
125 } else if (which) {
126 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
127
128 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
129 failures.append(jump());
130
131 loOrAbove.link(this);
132 } else
133 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
134
135 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
136 ++*matchIndex;
137
138 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
139 // fall through to here, the value is above hi.
140
141 // shuffle along & loop around if there are any more matches to handle.
142 unsigned next = which + 1;
143 ranges += next;
144 count -= next;
145 } while (count);
146 }
147
148 void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
149 {
150 Jump unicodeFail;
151 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
152 Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
153
154 if (charClass->m_matchesUnicode.size()) {
155 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
156 UChar ch = charClass->m_matchesUnicode[i];
157 matchDest.append(branch32(Equal, character, Imm32(ch)));
158 }
159 }
160
161 if (charClass->m_rangesUnicode.size()) {
162 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
163 UChar lo = charClass->m_rangesUnicode[i].begin;
164 UChar hi = charClass->m_rangesUnicode[i].end;
165
166 Jump below = branch32(LessThan, character, Imm32(lo));
167 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
168 below.link(this);
169 }
170 }
171
172 unicodeFail = jump();
173 isAscii.link(this);
174 }
175
176 if (charClass->m_ranges.size()) {
177 unsigned matchIndex = 0;
178 JumpList failures;
179 matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
180 while (matchIndex < charClass->m_matches.size())
181 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
182
183 failures.link(this);
184 } else if (charClass->m_matches.size()) {
185 // optimization: gather 'a','A' etc back together, can mask & test once.
186 Vector<char> matchesAZaz;
187
188 for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
189 char ch = charClass->m_matches[i];
190 if (m_pattern.m_ignoreCase) {
191 if (isASCIILower(ch)) {
192 matchesAZaz.append(ch);
193 continue;
194 }
195 if (isASCIIUpper(ch))
196 continue;
197 }
198 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
199 }
200
201 if (unsigned countAZaz = matchesAZaz.size()) {
202 or32(Imm32(32), character);
203 for (unsigned i = 0; i < countAZaz; ++i)
204 matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i])));
205 }
206 }
207
208 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
209 unicodeFail.link(this);
210 }
211
212 // Jumps if input not available; will have (incorrectly) incremented already!
213 Jump jumpIfNoAvailableInput(unsigned countToCheck)
214 {
215 add32(Imm32(countToCheck), index);
216 return branch32(Above, index, length);
217 }
218
219 Jump jumpIfAvailableInput(unsigned countToCheck)
220 {
221 add32(Imm32(countToCheck), index);
222 return branch32(BelowOrEqual, index, length);
223 }
224
225 Jump checkInput()
226 {
227 return branch32(BelowOrEqual, index, length);
228 }
229
230 Jump atEndOfInput()
231 {
232 return branch32(Equal, index, length);
233 }
234
235 Jump notAtEndOfInput()
236 {
237 return branch32(NotEqual, index, length);
238 }
239
240 Jump jumpIfCharEquals(UChar ch, int inputPosition)
241 {
242 return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
243 }
244
245 Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
246 {
247 return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
248 }
249
250 void readCharacter(int inputPosition, RegisterID reg)
251 {
252 load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
253 }
254
255 void storeToFrame(RegisterID reg, unsigned frameLocation)
256 {
257 poke(reg, frameLocation);
258 }
259
260 void storeToFrame(Imm32 imm, unsigned frameLocation)
261 {
262 poke(imm, frameLocation);
263 }
264
265 DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
266 {
267 return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
268 }
269
270 void loadFromFrame(unsigned frameLocation, RegisterID reg)
271 {
272 peek(reg, frameLocation);
273 }
274
275 void loadFromFrameAndJump(unsigned frameLocation)
276 {
277 jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
278 }
279
280 struct AlternativeBacktrackRecord {
281 DataLabelPtr dataLabel;
282 Label backtrackLocation;
283
284 AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation)
285 : dataLabel(dataLabel)
286 , backtrackLocation(backtrackLocation)
287 {
288 }
289 };
290
291 struct TermGenerationState {
292 TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal)
293 : disjunction(disjunction)
294 , checkedTotal(checkedTotal)
295 {
296 }
297
298 void resetAlternative()
299 {
300 isBackTrackGenerated = false;
301 alt = 0;
302 }
303 bool alternativeValid()
304 {
305 return alt < disjunction->m_alternatives.size();
306 }
307 void nextAlternative()
308 {
309 ++alt;
310 }
311 PatternAlternative* alternative()
312 {
313 return disjunction->m_alternatives[alt];
314 }
315
316 void resetTerm()
317 {
318 ASSERT(alternativeValid());
319 t = 0;
320 }
321 bool termValid()
322 {
323 ASSERT(alternativeValid());
324 return t < alternative()->m_terms.size();
325 }
326 void nextTerm()
327 {
328 ASSERT(alternativeValid());
329 ++t;
330 }
331 PatternTerm& term()
332 {
333 ASSERT(alternativeValid());
334 return alternative()->m_terms[t];
335 }
336
337 PatternTerm& lookaheadTerm()
338 {
339 ASSERT(alternativeValid());
340 ASSERT((t + 1) < alternative()->m_terms.size());
341 return alternative()->m_terms[t + 1];
342 }
343 bool isSinglePatternCharacterLookaheadTerm()
344 {
345 ASSERT(alternativeValid());
346 return ((t + 1) < alternative()->m_terms.size())
347 && (lookaheadTerm().type == PatternTerm::TypePatternCharacter)
348 && (lookaheadTerm().quantityType == QuantifierFixedCount)
349 && (lookaheadTerm().quantityCount == 1);
350 }
351
352 int inputOffset()
353 {
354 return term().inputPosition - checkedTotal;
355 }
356
357 void jumpToBacktrack(Jump jump, MacroAssembler* masm)
358 {
359 if (isBackTrackGenerated)
360 jump.linkTo(backtrackLabel, masm);
361 else
362 backTrackJumps.append(jump);
363 }
364 void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm)
365 {
366 if (isBackTrackGenerated)
367 jumps.linkTo(backtrackLabel, masm);
368 else
369 backTrackJumps.append(jumps);
370 }
371 bool plantJumpToBacktrackIfExists(MacroAssembler* masm)
372 {
373 if (isBackTrackGenerated) {
374 masm->jump(backtrackLabel);
375 return true;
376 }
377 return false;
378 }
379 void addBacktrackJump(Jump jump)
380 {
381 backTrackJumps.append(jump);
382 }
383 void setBacktrackGenerated(Label label)
384 {
385 isBackTrackGenerated = true;
386 backtrackLabel = label;
387 }
388 void linkAlternativeBacktracks(MacroAssembler* masm)
389 {
390 isBackTrackGenerated = false;
391 backTrackJumps.link(masm);
392 }
393 void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm)
394 {
395 isBackTrackGenerated = false;
396 backTrackJumps.linkTo(label, masm);
397 }
398 void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm)
399 {
400 jumpToBacktrack(nestedParenthesesState.backTrackJumps, masm);
401 if (nestedParenthesesState.isBackTrackGenerated)
402 setBacktrackGenerated(nestedParenthesesState.backtrackLabel);
403 }
404
405 PatternDisjunction* disjunction;
406 int checkedTotal;
407 private:
408 unsigned alt;
409 unsigned t;
410 JumpList backTrackJumps;
411 Label backtrackLabel;
412 bool isBackTrackGenerated;
413 };
414
415 void generateAssertionBOL(TermGenerationState& state)
416 {
417 PatternTerm& term = state.term();
418
419 if (m_pattern.m_multiline) {
420 const RegisterID character = regT0;
421
422 JumpList matchDest;
423 if (!term.inputPosition)
424 matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal)));
425
426 readCharacter(state.inputOffset() - 1, character);
427 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
428 state.jumpToBacktrack(jump(), this);
429
430 matchDest.link(this);
431 } else {
432 // Erk, really should poison out these alternatives early. :-/
433 if (term.inputPosition)
434 state.jumpToBacktrack(jump(), this);
435 else
436 state.jumpToBacktrack(branch32(NotEqual, index, Imm32(state.checkedTotal)), this);
437 }
438 }
439
440 void generateAssertionEOL(TermGenerationState& state)
441 {
442 PatternTerm& term = state.term();
443
444 if (m_pattern.m_multiline) {
445 const RegisterID character = regT0;
446
447 JumpList matchDest;
448 if (term.inputPosition == state.checkedTotal)
449 matchDest.append(atEndOfInput());
450
451 readCharacter(state.inputOffset(), character);
452 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
453 state.jumpToBacktrack(jump(), this);
454
455 matchDest.link(this);
456 } else {
457 if (term.inputPosition == state.checkedTotal)
458 state.jumpToBacktrack(notAtEndOfInput(), this);
459 // Erk, really should poison out these alternatives early. :-/
460 else
461 state.jumpToBacktrack(jump(), this);
462 }
463 }
464
465 // Also falls though on nextIsNotWordChar.
466 void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
467 {
468 const RegisterID character = regT0;
469 PatternTerm& term = state.term();
470
471 if (term.inputPosition == state.checkedTotal)
472 nextIsNotWordChar.append(atEndOfInput());
473
474 readCharacter(state.inputOffset(), character);
475 matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
476 }
477
478 void generateAssertionWordBoundary(TermGenerationState& state)
479 {
480 const RegisterID character = regT0;
481 PatternTerm& term = state.term();
482
483 Jump atBegin;
484 JumpList matchDest;
485 if (!term.inputPosition)
486 atBegin = branch32(Equal, index, Imm32(state.checkedTotal));
487 readCharacter(state.inputOffset() - 1, character);
488 matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
489 if (!term.inputPosition)
490 atBegin.link(this);
491
492 // We fall through to here if the last character was not a wordchar.
493 JumpList nonWordCharThenWordChar;
494 JumpList nonWordCharThenNonWordChar;
495 if (term.invertOrCapture) {
496 matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
497 nonWordCharThenWordChar.append(jump());
498 } else {
499 matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
500 nonWordCharThenNonWordChar.append(jump());
501 }
502 state.jumpToBacktrack(nonWordCharThenNonWordChar, this);
503
504 // We jump here if the last character was a wordchar.
505 matchDest.link(this);
506 JumpList wordCharThenWordChar;
507 JumpList wordCharThenNonWordChar;
508 if (term.invertOrCapture) {
509 matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar);
510 wordCharThenWordChar.append(jump());
511 } else {
512 matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar);
513 // This can fall-though!
514 }
515
516 state.jumpToBacktrack(wordCharThenWordChar, this);
517
518 nonWordCharThenWordChar.link(this);
519 wordCharThenNonWordChar.link(this);
520 }
521
522 void generatePatternCharacterSingle(TermGenerationState& state)
523 {
524 const RegisterID character = regT0;
525 UChar ch = state.term().patternCharacter;
526
527 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
528 readCharacter(state.inputOffset(), character);
529 or32(Imm32(32), character);
530 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
531 } else {
532 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
533 state.jumpToBacktrack(jumpIfCharNotEquals(ch, state.inputOffset()), this);
534 }
535 }
536
537 void generatePatternCharacterPair(TermGenerationState& state)
538 {
539 const RegisterID character = regT0;
540 UChar ch1 = state.term().patternCharacter;
541 UChar ch2 = state.lookaheadTerm().patternCharacter;
542
543 int mask = 0;
544 int chPair = ch1 | (ch2 << 16);
545
546 if (m_pattern.m_ignoreCase) {
547 if (isASCIIAlpha(ch1))
548 mask |= 32;
549 if (isASCIIAlpha(ch2))
550 mask |= 32 << 16;
551 }
552
553 if (mask) {
554 load32(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character);
555 or32(Imm32(mask), character);
556 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(chPair | mask)), this);
557 } else
558 state.jumpToBacktrack(branch32(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)), this);
559 }
560
561 void generatePatternCharacterFixed(TermGenerationState& state)
562 {
563 const RegisterID character = regT0;
564 const RegisterID countRegister = regT1;
565 PatternTerm& term = state.term();
566 UChar ch = term.patternCharacter;
567
568 move(index, countRegister);
569 sub32(Imm32(term.quantityCount), countRegister);
570
571 Label loop(this);
572 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
573 load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
574 or32(Imm32(32), character);
575 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
576 } else {
577 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
578 state.jumpToBacktrack(branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)), this);
579 }
580 add32(Imm32(1), countRegister);
581 branch32(NotEqual, countRegister, index).linkTo(loop, this);
582 }
583
584 void generatePatternCharacterGreedy(TermGenerationState& state)
585 {
586 const RegisterID character = regT0;
587 const RegisterID countRegister = regT1;
588 PatternTerm& term = state.term();
589 UChar ch = term.patternCharacter;
590
591 move(Imm32(0), countRegister);
592
593 JumpList failures;
594 Label loop(this);
595 failures.append(atEndOfInput());
596 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
597 readCharacter(state.inputOffset(), character);
598 or32(Imm32(32), character);
599 failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
600 } else {
601 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
602 failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
603 }
604 add32(Imm32(1), countRegister);
605 add32(Imm32(1), index);
606 branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
607 failures.append(jump());
608
609 Label backtrackBegin(this);
610 loadFromFrame(term.frameLocation, countRegister);
611 state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
612 sub32(Imm32(1), countRegister);
613 sub32(Imm32(1), index);
614
615 failures.link(this);
616
617 storeToFrame(countRegister, term.frameLocation);
618
619 state.setBacktrackGenerated(backtrackBegin);
620 }
621
622 void generatePatternCharacterNonGreedy(TermGenerationState& state)
623 {
624 const RegisterID character = regT0;
625 const RegisterID countRegister = regT1;
626 PatternTerm& term = state.term();
627 UChar ch = term.patternCharacter;
628
629 move(Imm32(0), countRegister);
630
631 Jump firstTimeDoNothing = jump();
632
633 Label hardFail(this);
634 sub32(countRegister, index);
635 state.jumpToBacktrack(jump(), this);
636
637 Label backtrackBegin(this);
638 loadFromFrame(term.frameLocation, countRegister);
639
640 atEndOfInput().linkTo(hardFail, this);
641 branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
642 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
643 readCharacter(state.inputOffset(), character);
644 or32(Imm32(32), character);
645 branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this);
646 } else {
647 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
648 jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this);
649 }
650
651 add32(Imm32(1), countRegister);
652 add32(Imm32(1), index);
653
654 firstTimeDoNothing.link(this);
655 storeToFrame(countRegister, term.frameLocation);
656
657 state.setBacktrackGenerated(backtrackBegin);
658 }
659
660 void generateCharacterClassSingle(TermGenerationState& state)
661 {
662 const RegisterID character = regT0;
663 PatternTerm& term = state.term();
664
665 JumpList matchDest;
666 readCharacter(state.inputOffset(), character);
667 matchCharacterClass(character, matchDest, term.characterClass);
668
669 if (term.invertOrCapture)
670 state.jumpToBacktrack(matchDest, this);
671 else {
672 state.jumpToBacktrack(jump(), this);
673 matchDest.link(this);
674 }
675 }
676
677 void generateCharacterClassFixed(TermGenerationState& state)
678 {
679 const RegisterID character = regT0;
680 const RegisterID countRegister = regT1;
681 PatternTerm& term = state.term();
682
683 move(index, countRegister);
684 sub32(Imm32(term.quantityCount), countRegister);
685
686 Label loop(this);
687 JumpList matchDest;
688 load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
689 matchCharacterClass(character, matchDest, term.characterClass);
690
691 if (term.invertOrCapture)
692 state.jumpToBacktrack(matchDest, this);
693 else {
694 state.jumpToBacktrack(jump(), this);
695 matchDest.link(this);
696 }
697
698 add32(Imm32(1), countRegister);
699 branch32(NotEqual, countRegister, index).linkTo(loop, this);
700 }
701
702 void generateCharacterClassGreedy(TermGenerationState& state)
703 {
704 const RegisterID character = regT0;
705 const RegisterID countRegister = regT1;
706 PatternTerm& term = state.term();
707
708 move(Imm32(0), countRegister);
709
710 JumpList failures;
711 Label loop(this);
712 failures.append(atEndOfInput());
713
714 if (term.invertOrCapture) {
715 readCharacter(state.inputOffset(), character);
716 matchCharacterClass(character, failures, term.characterClass);
717 } else {
718 JumpList matchDest;
719 readCharacter(state.inputOffset(), character);
720 matchCharacterClass(character, matchDest, term.characterClass);
721 failures.append(jump());
722 matchDest.link(this);
723 }
724
725 add32(Imm32(1), countRegister);
726 add32(Imm32(1), index);
727 branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
728 failures.append(jump());
729
730 Label backtrackBegin(this);
731 loadFromFrame(term.frameLocation, countRegister);
732 state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
733 sub32(Imm32(1), countRegister);
734 sub32(Imm32(1), index);
735
736 failures.link(this);
737
738 storeToFrame(countRegister, term.frameLocation);
739
740 state.setBacktrackGenerated(backtrackBegin);
741 }
742
743 void generateCharacterClassNonGreedy(TermGenerationState& state)
744 {
745 const RegisterID character = regT0;
746 const RegisterID countRegister = regT1;
747 PatternTerm& term = state.term();
748
749 move(Imm32(0), countRegister);
750
751 Jump firstTimeDoNothing = jump();
752
753 Label hardFail(this);
754 sub32(countRegister, index);
755 state.jumpToBacktrack(jump(), this);
756
757 Label backtrackBegin(this);
758 loadFromFrame(term.frameLocation, countRegister);
759
760 atEndOfInput().linkTo(hardFail, this);
761 branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
762
763 JumpList matchDest;
764 readCharacter(state.inputOffset(), character);
765 matchCharacterClass(character, matchDest, term.characterClass);
766
767 if (term.invertOrCapture)
768 matchDest.linkTo(hardFail, this);
769 else {
770 jump(hardFail);
771 matchDest.link(this);
772 }
773
774 add32(Imm32(1), countRegister);
775 add32(Imm32(1), index);
776
777 firstTimeDoNothing.link(this);
778 storeToFrame(countRegister, term.frameLocation);
779
780 state.setBacktrackGenerated(backtrackBegin);
781 }
782
783 void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation)
784 {
785 ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion));
786 ASSERT(parenthesesTerm.quantityCount == 1);
787
788 PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
789 unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0;
790
791 if (disjunction->m_alternatives.size() == 1) {
792 state.resetAlternative();
793 ASSERT(state.alternativeValid());
794 PatternAlternative* alternative = state.alternative();
795 optimizeAlternative(alternative);
796
797 int countToCheck = alternative->m_minimumSize - preCheckedCount;
798 if (countToCheck) {
799 ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount));
800
801 // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists'
802 // will be forced to always trampoline into here, just to decrement the index.
803 // Ick.
804 Jump skip = jump();
805
806 Label backtrackBegin(this);
807 sub32(Imm32(countToCheck), index);
808 state.addBacktrackJump(jump());
809
810 skip.link(this);
811
812 state.setBacktrackGenerated(backtrackBegin);
813
814 state.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck), this);
815 state.checkedTotal += countToCheck;
816 }
817
818 for (state.resetTerm(); state.termValid(); state.nextTerm())
819 generateTerm(state);
820
821 state.checkedTotal -= countToCheck;
822 } else {
823 JumpList successes;
824
825 for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) {
826
827 PatternAlternative* alternative = state.alternative();
828 optimizeAlternative(alternative);
829
830 ASSERT(alternative->m_minimumSize >= preCheckedCount);
831 int countToCheck = alternative->m_minimumSize - preCheckedCount;
832 if (countToCheck) {
833 state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
834 state.checkedTotal += countToCheck;
835 }
836
837 for (state.resetTerm(); state.termValid(); state.nextTerm())
838 generateTerm(state);
839
840 // Matched an alternative.
841 DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation);
842 successes.append(jump());
843
844 // Alternative did not match.
845 Label backtrackLocation(this);
846
847 // Can we backtrack the alternative? - if so, do so. If not, just fall through to the next one.
848 state.plantJumpToBacktrackIfExists(this);
849
850 state.linkAlternativeBacktracks(this);
851
852 if (countToCheck) {
853 sub32(Imm32(countToCheck), index);
854 state.checkedTotal -= countToCheck;
855 }
856
857 m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, backtrackLocation));
858 }
859 // We fall through to here when the last alternative fails.
860 // Add a backtrack out of here for the parenthese handling code to link up.
861 state.addBacktrackJump(jump());
862
863 // Generate a trampoline for the parens code to backtrack to, to retry the
864 // next alternative.
865 state.setBacktrackGenerated(label());
866 loadFromFrameAndJump(alternativeFrameLocation);
867
868 // FIXME: both of the above hooks are a little inefficient, in that you
869 // may end up trampolining here, just to trampoline back out to the
870 // parentheses code, or vice versa. We can probably eliminate a jump
871 // by restructuring, but coding this way for now for simplicity during
872 // development.
873
874 successes.link(this);
875 }
876 }
877
878 void generateParenthesesSingle(TermGenerationState& state)
879 {
880 const RegisterID indexTemporary = regT0;
881 PatternTerm& term = state.term();
882 PatternDisjunction* disjunction = term.parentheses.disjunction;
883 ASSERT(term.quantityCount == 1);
884
885 unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0;
886
887 unsigned parenthesesFrameLocation = term.frameLocation;
888 unsigned alternativeFrameLocation = parenthesesFrameLocation;
889 if (term.quantityType != QuantifierFixedCount)
890 alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce;
891
892 // optimized case - no capture & no quantifier can be handled in a light-weight manner.
893 if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) {
894 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
895 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
896 // this expects that any backtracks back out of the parentheses will be in the
897 // parenthesesState's backTrackJumps vector, and that if they need backtracking
898 // they will have set an entry point on the parenthesesState's backtrackLabel.
899 state.propagateBacktrackingFrom(parenthesesState, this);
900 } else {
901 Jump nonGreedySkipParentheses;
902 Label nonGreedyTryParentheses;
903 if (term.quantityType == QuantifierGreedy)
904 storeToFrame(Imm32(1), parenthesesFrameLocation);
905 else if (term.quantityType == QuantifierNonGreedy) {
906 storeToFrame(Imm32(0), parenthesesFrameLocation);
907 nonGreedySkipParentheses = jump();
908 nonGreedyTryParentheses = label();
909 storeToFrame(Imm32(1), parenthesesFrameLocation);
910 }
911
912 // store the match start index
913 if (term.invertOrCapture) {
914 int inputOffset = state.inputOffset() - preCheckedCount;
915 if (inputOffset) {
916 move(index, indexTemporary);
917 add32(Imm32(inputOffset), indexTemporary);
918 store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
919 } else
920 store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
921 }
922
923 // generate the body of the parentheses
924 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
925 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
926
927 // store the match end index
928 if (term.invertOrCapture) {
929 int inputOffset = state.inputOffset();
930 if (inputOffset) {
931 move(index, indexTemporary);
932 add32(Imm32(state.inputOffset()), indexTemporary);
933 store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
934 } else
935 store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
936 }
937 Jump success = jump();
938
939 // A failure AFTER the parens jumps here
940 Label backtrackFromAfterParens(this);
941
942 if (term.quantityType == QuantifierGreedy) {
943 // If this is zero we have now tested with both with and without the parens.
944 loadFromFrame(parenthesesFrameLocation, indexTemporary);
945 state.jumpToBacktrack(branchTest32(Zero, indexTemporary), this);
946 } else if (term.quantityType == QuantifierNonGreedy) {
947 // If this is zero we have now tested with both with and without the parens.
948 loadFromFrame(parenthesesFrameLocation, indexTemporary);
949 branchTest32(Zero, indexTemporary).linkTo(nonGreedyTryParentheses, this);
950 }
951
952 parenthesesState.plantJumpToBacktrackIfExists(this);
953 // A failure WITHIN the parens jumps here
954 parenthesesState.linkAlternativeBacktracks(this);
955 if (term.invertOrCapture) {
956 store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
957 store32(Imm32(-1), Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
958 }
959
960 if (term.quantityType == QuantifierGreedy)
961 storeToFrame(Imm32(0), parenthesesFrameLocation);
962 else
963 state.jumpToBacktrack(jump(), this);
964
965 state.setBacktrackGenerated(backtrackFromAfterParens);
966 if (term.quantityType == QuantifierNonGreedy)
967 nonGreedySkipParentheses.link(this);
968 success.link(this);
969 }
970 }
971
972 void generateParentheticalAssertion(TermGenerationState& state)
973 {
974 PatternTerm& term = state.term();
975 PatternDisjunction* disjunction = term.parentheses.disjunction;
976 ASSERT(term.quantityCount == 1);
977 ASSERT(term.quantityType == QuantifierFixedCount);
978
979 unsigned parenthesesFrameLocation = term.frameLocation;
980 unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
981
982 int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
983
984 if (term.invertOrCapture) {
985 // Inverted case
986 storeToFrame(index, parenthesesFrameLocation);
987
988 state.checkedTotal -= countCheckedAfterAssertion;
989 if (countCheckedAfterAssertion)
990 sub32(Imm32(countCheckedAfterAssertion), index);
991
992 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
993 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
994 // Success! - which means - Fail!
995 loadFromFrame(parenthesesFrameLocation, index);
996 state.jumpToBacktrack(jump(), this);
997
998 // And fail means success.
999 parenthesesState.linkAlternativeBacktracks(this);
1000 loadFromFrame(parenthesesFrameLocation, index);
1001
1002 state.checkedTotal += countCheckedAfterAssertion;
1003 } else {
1004 // Normal case
1005 storeToFrame(index, parenthesesFrameLocation);
1006
1007 state.checkedTotal -= countCheckedAfterAssertion;
1008 if (countCheckedAfterAssertion)
1009 sub32(Imm32(countCheckedAfterAssertion), index);
1010
1011 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
1012 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
1013 // Success! - which means - Success!
1014 loadFromFrame(parenthesesFrameLocation, index);
1015 Jump success = jump();
1016
1017 parenthesesState.linkAlternativeBacktracks(this);
1018 loadFromFrame(parenthesesFrameLocation, index);
1019 state.jumpToBacktrack(jump(), this);
1020
1021 success.link(this);
1022
1023 state.checkedTotal += countCheckedAfterAssertion;
1024 }
1025 }
1026
1027 void generateTerm(TermGenerationState& state)
1028 {
1029 PatternTerm& term = state.term();
1030
1031 switch (term.type) {
1032 case PatternTerm::TypeAssertionBOL:
1033 generateAssertionBOL(state);
1034 break;
1035
1036 case PatternTerm::TypeAssertionEOL:
1037 generateAssertionEOL(state);
1038 break;
1039
1040 case PatternTerm::TypeAssertionWordBoundary:
1041 generateAssertionWordBoundary(state);
1042 break;
1043
1044 case PatternTerm::TypePatternCharacter:
1045 switch (term.quantityType) {
1046 case QuantifierFixedCount:
1047 if (term.quantityCount == 1) {
1048 if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) {
1049 generatePatternCharacterPair(state);
1050 state.nextTerm();
1051 } else
1052 generatePatternCharacterSingle(state);
1053 } else
1054 generatePatternCharacterFixed(state);
1055 break;
1056 case QuantifierGreedy:
1057 generatePatternCharacterGreedy(state);
1058 break;
1059 case QuantifierNonGreedy:
1060 generatePatternCharacterNonGreedy(state);
1061 break;
1062 }
1063 break;
1064
1065 case PatternTerm::TypeCharacterClass:
1066 switch (term.quantityType) {
1067 case QuantifierFixedCount:
1068 if (term.quantityCount == 1)
1069 generateCharacterClassSingle(state);
1070 else
1071 generateCharacterClassFixed(state);
1072 break;
1073 case QuantifierGreedy:
1074 generateCharacterClassGreedy(state);
1075 break;
1076 case QuantifierNonGreedy:
1077 generateCharacterClassNonGreedy(state);
1078 break;
1079 }
1080 break;
1081
1082 case PatternTerm::TypeBackReference:
1083 m_generationFailed = true;
1084 break;
1085
1086 case PatternTerm::TypeForwardReference:
1087 break;
1088
1089 case PatternTerm::TypeParenthesesSubpattern:
1090 if ((term.quantityCount == 1) && !term.parentheses.isCopy)
1091 generateParenthesesSingle(state);
1092 else
1093 m_generationFailed = true;
1094 break;
1095
1096 case PatternTerm::TypeParentheticalAssertion:
1097 generateParentheticalAssertion(state);
1098 break;
1099 }
1100 }
1101
1102 void generateDisjunction(PatternDisjunction* disjunction)
1103 {
1104 TermGenerationState state(disjunction, 0);
1105 state.resetAlternative();
1106
1107 // Plant a check to see if there is sufficient input available to run the first alternative.
1108 // Jumping back to the label 'firstAlternative' will get to this check, jumping to
1109 // 'firstAlternativeInputChecked' will jump directly to matching the alternative having
1110 // skipped this check.
1111
1112 Label firstAlternative(this);
1113
1114 // check availability for the next alternative
1115 int countCheckedForCurrentAlternative = 0;
1116 int countToCheckForFirstAlternative = 0;
1117 bool hasShorterAlternatives = false;
1118 JumpList notEnoughInputForPreviousAlternative;
1119
1120 if (state.alternativeValid()) {
1121 PatternAlternative* alternative = state.alternative();
1122 countToCheckForFirstAlternative = alternative->m_minimumSize;
1123 state.checkedTotal += countToCheckForFirstAlternative;
1124 if (countToCheckForFirstAlternative)
1125 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
1126 countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
1127 }
1128
1129 Label firstAlternativeInputChecked(this);
1130
1131 while (state.alternativeValid()) {
1132 // Track whether any alternatives are shorter than the first one.
1133 hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
1134
1135 PatternAlternative* alternative = state.alternative();
1136 optimizeAlternative(alternative);
1137
1138 for (state.resetTerm(); state.termValid(); state.nextTerm())
1139 generateTerm(state);
1140
1141 // If we get here, the alternative matched.
1142 if (m_pattern.m_body->m_callFrameSize)
1143 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1144
1145 ASSERT(index != returnRegister);
1146 if (m_pattern.m_body->m_hasFixedSize) {
1147 move(index, returnRegister);
1148 if (alternative->m_minimumSize)
1149 sub32(Imm32(alternative->m_minimumSize), returnRegister);
1150 } else
1151 pop(returnRegister);
1152 store32(index, Address(output, 4));
1153 store32(returnRegister, output);
1154
1155 generateReturn();
1156
1157 state.nextAlternative();
1158
1159 // if there are any more alternatives, plant the check for input before looping.
1160 if (state.alternativeValid()) {
1161 PatternAlternative* nextAlternative = state.alternative();
1162 int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
1163
1164 if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
1165 // If we get here, there the last input checked failed.
1166 notEnoughInputForPreviousAlternative.link(this);
1167
1168 // Check if sufficent input available to run the next alternative
1169 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
1170 // We are now in the correct state to enter the next alternative; this add is only required
1171 // to mirror and revert operation of the sub32, just below.
1172 add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
1173
1174 // If we get here, there the last input checked passed.
1175 state.linkAlternativeBacktracks(this);
1176 // No need to check if we can run the next alternative, since it is shorter -
1177 // just update index.
1178 sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
1179 } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one.
1180 // If we get here, there the last input checked failed.
1181 // If there is insufficient input to run the current alternative, and the next alternative is longer,
1182 // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
1183 // we had checked.
1184 notEnoughInputForPreviousAlternative.link(this);
1185 add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index);
1186 notEnoughInputForPreviousAlternative.append(jump());
1187
1188 // The next alternative is longer than the current one; check the difference.
1189 state.linkAlternativeBacktracks(this);
1190 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
1191 } else { // CASE 3: Both alternatives are the same length.
1192 ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
1193
1194 // If the next alterative is the same length as this one, then no need to check the input -
1195 // if there was sufficent input to run the current alternative then there is sufficient
1196 // input to run the next one; if not, there isn't.
1197 state.linkAlternativeBacktracks(this);
1198 }
1199
1200 state.checkedTotal -= countCheckedForCurrentAlternative;
1201 countCheckedForCurrentAlternative = countToCheckForNextAlternative;
1202 state.checkedTotal += countCheckedForCurrentAlternative;
1203 }
1204 }
1205
1206 // If we get here, all Alternatives failed...
1207
1208 state.checkedTotal -= countCheckedForCurrentAlternative;
1209
1210 // How much more input need there be to be able to retry from the first alternative?
1211 // examples:
1212 // /yarr_jit/ or /wrec|pcre/
1213 // In these examples we need check for one more input before looping.
1214 // /yarr_jit|pcre/
1215 // In this case we need check for 5 more input to loop (+4 to allow for the first alterative
1216 // being four longer than the last alternative checked, and another +1 to effectively move
1217 // the start position along by one).
1218 // /yarr|rules/ or /wrec|notsomuch/
1219 // In these examples, provided that there was sufficient input to have just been matching for
1220 // the second alternative we can loop without checking for available input (since the second
1221 // alternative is longer than the first). In the latter example we need to decrement index
1222 // (by 4) so the start position is only progressed by 1 from the last iteration.
1223 int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1;
1224
1225 // First, deal with the cases where there was sufficient input to try the last alternative.
1226 if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
1227 state.linkAlternativeBacktracks(this);
1228 else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
1229 state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this);
1230 else { // no need to check the input, but we do have some bookkeeping to do first.
1231 state.linkAlternativeBacktracks(this);
1232
1233 // Where necessary update our preserved start position.
1234 if (!m_pattern.m_body->m_hasFixedSize) {
1235 move(index, regT0);
1236 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
1237 poke(regT0, m_pattern.m_body->m_callFrameSize);
1238 }
1239
1240 // Update index if necessary, and loop (without checking).
1241 if (incrementForNextIter)
1242 add32(Imm32(incrementForNextIter), index);
1243 jump().linkTo(firstAlternativeInputChecked, this);
1244 }
1245
1246 notEnoughInputForPreviousAlternative.link(this);
1247 // Update our idea of the start position, if we're tracking this.
1248 if (!m_pattern.m_body->m_hasFixedSize) {
1249 if (countCheckedForCurrentAlternative - 1) {
1250 move(index, regT0);
1251 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
1252 poke(regT0, m_pattern.m_body->m_callFrameSize);
1253 } else
1254 poke(index, m_pattern.m_body->m_callFrameSize);
1255 }
1256 // Check if there is sufficent input to run the first alternative again.
1257 jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this);
1258 // No - insufficent input to run the first alteranative, are there any other alternatives we
1259 // might need to check? If so, the last check will have left the index incremented by
1260 // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
1261 // LESS input is available, to have the effect of just progressing the start position by 1
1262 // from the last iteration. If this check passes we can just jump up to the check associated
1263 // with the first alternative in the loop. This is a bit sad, since we'll end up trying the
1264 // first alternative again, and this check will fail (otherwise the check planted just above
1265 // here would have passed). This is a bit sad, however it saves trying to do something more
1266 // complex here in compilation, and in the common case we should end up coallescing the checks.
1267 //
1268 // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
1269 // of the minimum-alterantive-lengths. E.g. if I have two alternatives of length 200 and 150,
1270 // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
1271 // is sufficient input to run either alternative (constantly failing). If there had been only
1272 // one alternative, or if the shorter alternative had come first, we would have terminated
1273 // immediately. :-/
1274 if (hasShorterAlternatives)
1275 jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this);
1276 // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
1277 // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ...
1278 // but since we're about to return a failure this doesn't really matter!)
1279
1280 unsigned frameSize = m_pattern.m_body->m_callFrameSize;
1281 if (!m_pattern.m_body->m_hasFixedSize)
1282 ++frameSize;
1283 if (frameSize)
1284 addPtr(Imm32(frameSize * sizeof(void*)), stackPointerRegister);
1285
1286 move(Imm32(-1), returnRegister);
1287
1288 generateReturn();
1289 }
1290
1291 void generateEnter()
1292 {
1293 #if PLATFORM(X86_64)
1294 push(X86::ebp);
1295 move(stackPointerRegister, X86::ebp);
1296 push(X86::ebx);
1297 #elif PLATFORM(X86)
1298 push(X86::ebp);
1299 move(stackPointerRegister, X86::ebp);
1300 // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
1301 push(X86::ebx);
1302 push(X86::edi);
1303 push(X86::esi);
1304 // load output into edi (2 = saved ebp + return address).
1305 #if COMPILER(MSVC)
1306 loadPtr(Address(X86::ebp, 2 * sizeof(void*)), input);
1307 loadPtr(Address(X86::ebp, 3 * sizeof(void*)), index);
1308 loadPtr(Address(X86::ebp, 4 * sizeof(void*)), length);
1309 loadPtr(Address(X86::ebp, 5 * sizeof(void*)), output);
1310 #else
1311 loadPtr(Address(X86::ebp, 2 * sizeof(void*)), output);
1312 #endif
1313 #elif PLATFORM_ARM_ARCH(7)
1314 push(ARM::r4);
1315 push(ARM::r5);
1316 push(ARM::r6);
1317 move(ARM::r3, output);
1318 #endif
1319 }
1320
1321 void generateReturn()
1322 {
1323 #if PLATFORM(X86_64)
1324 pop(X86::ebx);
1325 pop(X86::ebp);
1326 #elif PLATFORM(X86)
1327 pop(X86::esi);
1328 pop(X86::edi);
1329 pop(X86::ebx);
1330 pop(X86::ebp);
1331 #elif PLATFORM_ARM_ARCH(7)
1332 pop(ARM::r6);
1333 pop(ARM::r5);
1334 pop(ARM::r4);
1335 #endif
1336 ret();
1337 }
1338
1339 public:
1340 RegexGenerator(RegexPattern& pattern)
1341 : m_pattern(pattern)
1342 , m_generationFailed(false)
1343 {
1344 }
1345
1346 void generate()
1347 {
1348 generateEnter();
1349
1350 // TODO: do I really want this on the stack?
1351 if (!m_pattern.m_body->m_hasFixedSize)
1352 push(index);
1353
1354 if (m_pattern.m_body->m_callFrameSize)
1355 subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1356
1357 generateDisjunction(m_pattern.m_body);
1358 }
1359
1360 void compile(JSGlobalData* globalData, RegexCodeBlock& jitObject)
1361 {
1362 generate();
1363
1364 LinkBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(size()));
1365
1366 for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
1367 patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation));
1368
1369 jitObject.set(patchBuffer.finalizeCode());
1370 }
1371
1372 bool generationFailed()
1373 {
1374 return m_generationFailed;
1375 }
1376
1377 private:
1378 RegexPattern& m_pattern;
1379 Vector<AlternativeBacktrackRecord> m_backtrackRecords;
1380 bool m_generationFailed;
1381 };
1382
1383 void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
1384 {
1385 RegexPattern pattern(ignoreCase, multiline);
1386
1387 if ((error = compileRegex(patternString, pattern)))
1388 return;
1389
1390 numSubpatterns = pattern.m_numSubpatterns;
1391
1392 RegexGenerator generator(pattern);
1393 generator.compile(globalData, jitObject);
1394
1395 if (generator.generationFailed()) {
1396 JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
1397 JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine;
1398 jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error));
1399 }
1400 }
1401
1402 int executeRegex(RegexCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output, int outputArraySize)
1403 {
1404 if (JSRegExp* fallback = jitObject.getFallback())
1405 return (jsRegExpExecute(fallback, input, length, start, output, outputArraySize) < 0) ? -1 : output[0];
1406
1407 return jitObject.execute(input, start, length, output);
1408 }
1409
1410 }}
1411
1412 #endif
1413
1414
1415
1416
1417