]> git.saurik.com Git - apple/javascriptcore.git/blob - yarr/RegexJIT.cpp
b954b1c6b9f91985d6b34fd63f853618665d017d
[apple/javascriptcore.git] / yarr / RegexJIT.cpp
1 /*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "config.h"
27 #include "RegexJIT.h"
28
29 #include "ASCIICType.h"
30 #include "JSGlobalData.h"
31 #include "LinkBuffer.h"
32 #include "MacroAssembler.h"
33 #include "RegExpCache.h"
34 #include "RegexCompiler.h"
35
36 #include "pcre.h" // temporary, remove when fallback is removed.
37
38 #if ENABLE(YARR_JIT)
39
40 using namespace WTF;
41
42 namespace JSC { namespace Yarr {
43
44 class RegexGenerator : private MacroAssembler {
45 friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
46
47 #if CPU(ARM)
48 static const RegisterID input = ARMRegisters::r0;
49 static const RegisterID index = ARMRegisters::r1;
50 static const RegisterID length = ARMRegisters::r2;
51 static const RegisterID output = ARMRegisters::r4;
52
53 static const RegisterID regT0 = ARMRegisters::r5;
54 static const RegisterID regT1 = ARMRegisters::r6;
55
56 static const RegisterID returnRegister = ARMRegisters::r0;
57 #elif CPU(MIPS)
58 static const RegisterID input = MIPSRegisters::a0;
59 static const RegisterID index = MIPSRegisters::a1;
60 static const RegisterID length = MIPSRegisters::a2;
61 static const RegisterID output = MIPSRegisters::a3;
62
63 static const RegisterID regT0 = MIPSRegisters::t4;
64 static const RegisterID regT1 = MIPSRegisters::t5;
65
66 static const RegisterID returnRegister = MIPSRegisters::v0;
67 #elif CPU(X86)
68 static const RegisterID input = X86Registers::eax;
69 static const RegisterID index = X86Registers::edx;
70 static const RegisterID length = X86Registers::ecx;
71 static const RegisterID output = X86Registers::edi;
72
73 static const RegisterID regT0 = X86Registers::ebx;
74 static const RegisterID regT1 = X86Registers::esi;
75
76 static const RegisterID returnRegister = X86Registers::eax;
77 #elif CPU(X86_64)
78 static const RegisterID input = X86Registers::edi;
79 static const RegisterID index = X86Registers::esi;
80 static const RegisterID length = X86Registers::edx;
81 static const RegisterID output = X86Registers::ecx;
82
83 static const RegisterID regT0 = X86Registers::eax;
84 static const RegisterID regT1 = X86Registers::ebx;
85
86 static const RegisterID returnRegister = X86Registers::eax;
87 #endif
88
89 void optimizeAlternative(PatternAlternative* alternative)
90 {
91 if (!alternative->m_terms.size())
92 return;
93
94 for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
95 PatternTerm& term = alternative->m_terms[i];
96 PatternTerm& nextTerm = alternative->m_terms[i + 1];
97
98 if ((term.type == PatternTerm::TypeCharacterClass)
99 && (term.quantityType == QuantifierFixedCount)
100 && (nextTerm.type == PatternTerm::TypePatternCharacter)
101 && (nextTerm.quantityType == QuantifierFixedCount)) {
102 PatternTerm termCopy = term;
103 alternative->m_terms[i] = nextTerm;
104 alternative->m_terms[i + 1] = termCopy;
105 }
106 }
107 }
108
109 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
110 {
111 do {
112 // pick which range we're going to generate
113 int which = count >> 1;
114 char lo = ranges[which].begin;
115 char hi = ranges[which].end;
116
117 // check if there are any ranges or matches below lo. If not, just jl to failure -
118 // if there is anything else to check, check that first, if it falls through jmp to failure.
119 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
120 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
121
122 // generate code for all ranges before this one
123 if (which)
124 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
125
126 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
127 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
128 ++*matchIndex;
129 }
130 failures.append(jump());
131
132 loOrAbove.link(this);
133 } else if (which) {
134 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
135
136 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
137 failures.append(jump());
138
139 loOrAbove.link(this);
140 } else
141 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
142
143 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
144 ++*matchIndex;
145
146 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
147 // fall through to here, the value is above hi.
148
149 // shuffle along & loop around if there are any more matches to handle.
150 unsigned next = which + 1;
151 ranges += next;
152 count -= next;
153 } while (count);
154 }
155
156 void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
157 {
158 if (charClass->m_table) {
159 ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
160 matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));
161 return;
162 }
163 Jump unicodeFail;
164 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
165 Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
166
167 if (charClass->m_matchesUnicode.size()) {
168 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
169 UChar ch = charClass->m_matchesUnicode[i];
170 matchDest.append(branch32(Equal, character, Imm32(ch)));
171 }
172 }
173
174 if (charClass->m_rangesUnicode.size()) {
175 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
176 UChar lo = charClass->m_rangesUnicode[i].begin;
177 UChar hi = charClass->m_rangesUnicode[i].end;
178
179 Jump below = branch32(LessThan, character, Imm32(lo));
180 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
181 below.link(this);
182 }
183 }
184
185 unicodeFail = jump();
186 isAscii.link(this);
187 }
188
189 if (charClass->m_ranges.size()) {
190 unsigned matchIndex = 0;
191 JumpList failures;
192 matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
193 while (matchIndex < charClass->m_matches.size())
194 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
195
196 failures.link(this);
197 } else if (charClass->m_matches.size()) {
198 // optimization: gather 'a','A' etc back together, can mask & test once.
199 Vector<char> matchesAZaz;
200
201 for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
202 char ch = charClass->m_matches[i];
203 if (m_pattern.m_ignoreCase) {
204 if (isASCIILower(ch)) {
205 matchesAZaz.append(ch);
206 continue;
207 }
208 if (isASCIIUpper(ch))
209 continue;
210 }
211 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
212 }
213
214 if (unsigned countAZaz = matchesAZaz.size()) {
215 or32(Imm32(32), character);
216 for (unsigned i = 0; i < countAZaz; ++i)
217 matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i])));
218 }
219 }
220
221 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
222 unicodeFail.link(this);
223 }
224
225 // Jumps if input not available; will have (incorrectly) incremented already!
226 Jump jumpIfNoAvailableInput(unsigned countToCheck)
227 {
228 add32(Imm32(countToCheck), index);
229 return branch32(Above, index, length);
230 }
231
232 Jump jumpIfAvailableInput(unsigned countToCheck)
233 {
234 add32(Imm32(countToCheck), index);
235 return branch32(BelowOrEqual, index, length);
236 }
237
238 Jump checkInput()
239 {
240 return branch32(BelowOrEqual, index, length);
241 }
242
243 Jump atEndOfInput()
244 {
245 return branch32(Equal, index, length);
246 }
247
248 Jump notAtEndOfInput()
249 {
250 return branch32(NotEqual, index, length);
251 }
252
253 Jump jumpIfCharEquals(UChar ch, int inputPosition)
254 {
255 return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
256 }
257
258 Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
259 {
260 return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
261 }
262
263 void readCharacter(int inputPosition, RegisterID reg)
264 {
265 load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
266 }
267
268 void storeToFrame(RegisterID reg, unsigned frameLocation)
269 {
270 poke(reg, frameLocation);
271 }
272
273 void storeToFrame(Imm32 imm, unsigned frameLocation)
274 {
275 poke(imm, frameLocation);
276 }
277
278 DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
279 {
280 return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
281 }
282
283 void loadFromFrame(unsigned frameLocation, RegisterID reg)
284 {
285 peek(reg, frameLocation);
286 }
287
288 void loadFromFrameAndJump(unsigned frameLocation)
289 {
290 jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
291 }
292
293 struct AlternativeBacktrackRecord {
294 DataLabelPtr dataLabel;
295 Label backtrackLocation;
296
297 AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation)
298 : dataLabel(dataLabel)
299 , backtrackLocation(backtrackLocation)
300 {
301 }
302 };
303
304 struct TermGenerationState {
305 TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal)
306 : disjunction(disjunction)
307 , checkedTotal(checkedTotal)
308 {
309 }
310
311 void resetAlternative()
312 {
313 isBackTrackGenerated = false;
314 alt = 0;
315 }
316 bool alternativeValid()
317 {
318 return alt < disjunction->m_alternatives.size();
319 }
320 void nextAlternative()
321 {
322 ++alt;
323 }
324 PatternAlternative* alternative()
325 {
326 return disjunction->m_alternatives[alt];
327 }
328
329 void resetTerm()
330 {
331 ASSERT(alternativeValid());
332 t = 0;
333 }
334 bool termValid()
335 {
336 ASSERT(alternativeValid());
337 return t < alternative()->m_terms.size();
338 }
339 void nextTerm()
340 {
341 ASSERT(alternativeValid());
342 ++t;
343 }
344 PatternTerm& term()
345 {
346 ASSERT(alternativeValid());
347 return alternative()->m_terms[t];
348 }
349
350 PatternTerm& lookaheadTerm()
351 {
352 ASSERT(alternativeValid());
353 ASSERT((t + 1) < alternative()->m_terms.size());
354 return alternative()->m_terms[t + 1];
355 }
356 bool isSinglePatternCharacterLookaheadTerm()
357 {
358 ASSERT(alternativeValid());
359 return ((t + 1) < alternative()->m_terms.size())
360 && (lookaheadTerm().type == PatternTerm::TypePatternCharacter)
361 && (lookaheadTerm().quantityType == QuantifierFixedCount)
362 && (lookaheadTerm().quantityCount == 1);
363 }
364
365 int inputOffset()
366 {
367 return term().inputPosition - checkedTotal;
368 }
369
370 void jumpToBacktrack(Jump jump, MacroAssembler* masm)
371 {
372 if (isBackTrackGenerated)
373 jump.linkTo(backtrackLabel, masm);
374 else
375 backTrackJumps.append(jump);
376 }
377 void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm)
378 {
379 if (isBackTrackGenerated)
380 jumps.linkTo(backtrackLabel, masm);
381 else
382 backTrackJumps.append(jumps);
383 }
384 bool plantJumpToBacktrackIfExists(MacroAssembler* masm)
385 {
386 if (isBackTrackGenerated) {
387 masm->jump(backtrackLabel);
388 return true;
389 }
390 return false;
391 }
392 void addBacktrackJump(Jump jump)
393 {
394 backTrackJumps.append(jump);
395 }
396 void setBacktrackGenerated(Label label)
397 {
398 isBackTrackGenerated = true;
399 backtrackLabel = label;
400 }
401 void linkAlternativeBacktracks(MacroAssembler* masm)
402 {
403 isBackTrackGenerated = false;
404 backTrackJumps.link(masm);
405 }
406 void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm)
407 {
408 isBackTrackGenerated = false;
409 backTrackJumps.linkTo(label, masm);
410 }
411 void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm)
412 {
413 jumpToBacktrack(nestedParenthesesState.backTrackJumps, masm);
414 if (nestedParenthesesState.isBackTrackGenerated)
415 setBacktrackGenerated(nestedParenthesesState.backtrackLabel);
416 }
417
418 PatternDisjunction* disjunction;
419 int checkedTotal;
420 private:
421 unsigned alt;
422 unsigned t;
423 JumpList backTrackJumps;
424 Label backtrackLabel;
425 bool isBackTrackGenerated;
426 };
427
428 void generateAssertionBOL(TermGenerationState& state)
429 {
430 PatternTerm& term = state.term();
431
432 if (m_pattern.m_multiline) {
433 const RegisterID character = regT0;
434
435 JumpList matchDest;
436 if (!term.inputPosition)
437 matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal)));
438
439 readCharacter(state.inputOffset() - 1, character);
440 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
441 state.jumpToBacktrack(jump(), this);
442
443 matchDest.link(this);
444 } else {
445 // Erk, really should poison out these alternatives early. :-/
446 if (term.inputPosition)
447 state.jumpToBacktrack(jump(), this);
448 else
449 state.jumpToBacktrack(branch32(NotEqual, index, Imm32(state.checkedTotal)), this);
450 }
451 }
452
453 void generateAssertionEOL(TermGenerationState& state)
454 {
455 PatternTerm& term = state.term();
456
457 if (m_pattern.m_multiline) {
458 const RegisterID character = regT0;
459
460 JumpList matchDest;
461 if (term.inputPosition == state.checkedTotal)
462 matchDest.append(atEndOfInput());
463
464 readCharacter(state.inputOffset(), character);
465 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
466 state.jumpToBacktrack(jump(), this);
467
468 matchDest.link(this);
469 } else {
470 if (term.inputPosition == state.checkedTotal)
471 state.jumpToBacktrack(notAtEndOfInput(), this);
472 // Erk, really should poison out these alternatives early. :-/
473 else
474 state.jumpToBacktrack(jump(), this);
475 }
476 }
477
478 // Also falls though on nextIsNotWordChar.
479 void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
480 {
481 const RegisterID character = regT0;
482 PatternTerm& term = state.term();
483
484 if (term.inputPosition == state.checkedTotal)
485 nextIsNotWordChar.append(atEndOfInput());
486
487 readCharacter(state.inputOffset(), character);
488 matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
489 }
490
491 void generateAssertionWordBoundary(TermGenerationState& state)
492 {
493 const RegisterID character = regT0;
494 PatternTerm& term = state.term();
495
496 Jump atBegin;
497 JumpList matchDest;
498 if (!term.inputPosition)
499 atBegin = branch32(Equal, index, Imm32(state.checkedTotal));
500 readCharacter(state.inputOffset() - 1, character);
501 matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
502 if (!term.inputPosition)
503 atBegin.link(this);
504
505 // We fall through to here if the last character was not a wordchar.
506 JumpList nonWordCharThenWordChar;
507 JumpList nonWordCharThenNonWordChar;
508 if (term.invertOrCapture) {
509 matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
510 nonWordCharThenWordChar.append(jump());
511 } else {
512 matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
513 nonWordCharThenNonWordChar.append(jump());
514 }
515 state.jumpToBacktrack(nonWordCharThenNonWordChar, this);
516
517 // We jump here if the last character was a wordchar.
518 matchDest.link(this);
519 JumpList wordCharThenWordChar;
520 JumpList wordCharThenNonWordChar;
521 if (term.invertOrCapture) {
522 matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar);
523 wordCharThenWordChar.append(jump());
524 } else {
525 matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar);
526 // This can fall-though!
527 }
528
529 state.jumpToBacktrack(wordCharThenWordChar, this);
530
531 nonWordCharThenWordChar.link(this);
532 wordCharThenNonWordChar.link(this);
533 }
534
535 void generatePatternCharacterSingle(TermGenerationState& state)
536 {
537 const RegisterID character = regT0;
538 UChar ch = state.term().patternCharacter;
539
540 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
541 readCharacter(state.inputOffset(), character);
542 or32(Imm32(32), character);
543 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
544 } else {
545 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
546 state.jumpToBacktrack(jumpIfCharNotEquals(ch, state.inputOffset()), this);
547 }
548 }
549
550 void generatePatternCharacterPair(TermGenerationState& state)
551 {
552 const RegisterID character = regT0;
553 UChar ch1 = state.term().patternCharacter;
554 UChar ch2 = state.lookaheadTerm().patternCharacter;
555
556 int mask = 0;
557 int chPair = ch1 | (ch2 << 16);
558
559 if (m_pattern.m_ignoreCase) {
560 if (isASCIIAlpha(ch1))
561 mask |= 32;
562 if (isASCIIAlpha(ch2))
563 mask |= 32 << 16;
564 }
565
566 if (mask) {
567 load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character);
568 or32(Imm32(mask), character);
569 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(chPair | mask)), this);
570 } else
571 state.jumpToBacktrack(branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)), this);
572 }
573
574 void generatePatternCharacterFixed(TermGenerationState& state)
575 {
576 const RegisterID character = regT0;
577 const RegisterID countRegister = regT1;
578 PatternTerm& term = state.term();
579 UChar ch = term.patternCharacter;
580
581 move(index, countRegister);
582 sub32(Imm32(term.quantityCount), countRegister);
583
584 Label loop(this);
585 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
586 load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
587 or32(Imm32(32), character);
588 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
589 } else {
590 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
591 state.jumpToBacktrack(branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)), this);
592 }
593 add32(Imm32(1), countRegister);
594 branch32(NotEqual, countRegister, index).linkTo(loop, this);
595 }
596
597 void generatePatternCharacterGreedy(TermGenerationState& state)
598 {
599 const RegisterID character = regT0;
600 const RegisterID countRegister = regT1;
601 PatternTerm& term = state.term();
602 UChar ch = term.patternCharacter;
603
604 move(Imm32(0), countRegister);
605
606 JumpList failures;
607 Label loop(this);
608 failures.append(atEndOfInput());
609 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
610 readCharacter(state.inputOffset(), character);
611 or32(Imm32(32), character);
612 failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
613 } else {
614 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
615 failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
616 }
617
618 add32(Imm32(1), countRegister);
619 add32(Imm32(1), index);
620 if (term.quantityCount != 0xffffffff)
621 branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
622 else
623 jump(loop);
624
625 failures.append(jump());
626
627 Label backtrackBegin(this);
628 loadFromFrame(term.frameLocation, countRegister);
629 state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
630 sub32(Imm32(1), countRegister);
631 sub32(Imm32(1), index);
632
633 failures.link(this);
634
635 storeToFrame(countRegister, term.frameLocation);
636
637 state.setBacktrackGenerated(backtrackBegin);
638 }
639
640 void generatePatternCharacterNonGreedy(TermGenerationState& state)
641 {
642 const RegisterID character = regT0;
643 const RegisterID countRegister = regT1;
644 PatternTerm& term = state.term();
645 UChar ch = term.patternCharacter;
646
647 move(Imm32(0), countRegister);
648
649 Jump firstTimeDoNothing = jump();
650
651 Label hardFail(this);
652 sub32(countRegister, index);
653 state.jumpToBacktrack(jump(), this);
654
655 Label backtrackBegin(this);
656 loadFromFrame(term.frameLocation, countRegister);
657
658 atEndOfInput().linkTo(hardFail, this);
659 if (term.quantityCount != 0xffffffff)
660 branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
661 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
662 readCharacter(state.inputOffset(), character);
663 or32(Imm32(32), character);
664 branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this);
665 } else {
666 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
667 jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this);
668 }
669
670 add32(Imm32(1), countRegister);
671 add32(Imm32(1), index);
672
673 firstTimeDoNothing.link(this);
674 storeToFrame(countRegister, term.frameLocation);
675
676 state.setBacktrackGenerated(backtrackBegin);
677 }
678
679 void generateCharacterClassSingle(TermGenerationState& state)
680 {
681 const RegisterID character = regT0;
682 PatternTerm& term = state.term();
683
684 JumpList matchDest;
685 readCharacter(state.inputOffset(), character);
686 matchCharacterClass(character, matchDest, term.characterClass);
687
688 if (term.invertOrCapture)
689 state.jumpToBacktrack(matchDest, this);
690 else {
691 state.jumpToBacktrack(jump(), this);
692 matchDest.link(this);
693 }
694 }
695
696 void generateCharacterClassFixed(TermGenerationState& state)
697 {
698 const RegisterID character = regT0;
699 const RegisterID countRegister = regT1;
700 PatternTerm& term = state.term();
701
702 move(index, countRegister);
703 sub32(Imm32(term.quantityCount), countRegister);
704
705 Label loop(this);
706 JumpList matchDest;
707 load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
708 matchCharacterClass(character, matchDest, term.characterClass);
709
710 if (term.invertOrCapture)
711 state.jumpToBacktrack(matchDest, this);
712 else {
713 state.jumpToBacktrack(jump(), this);
714 matchDest.link(this);
715 }
716
717 add32(Imm32(1), countRegister);
718 branch32(NotEqual, countRegister, index).linkTo(loop, this);
719 }
720
721 void generateCharacterClassGreedy(TermGenerationState& state)
722 {
723 const RegisterID character = regT0;
724 const RegisterID countRegister = regT1;
725 PatternTerm& term = state.term();
726
727 move(Imm32(0), countRegister);
728
729 JumpList failures;
730 Label loop(this);
731 failures.append(atEndOfInput());
732
733 if (term.invertOrCapture) {
734 readCharacter(state.inputOffset(), character);
735 matchCharacterClass(character, failures, term.characterClass);
736 } else {
737 JumpList matchDest;
738 readCharacter(state.inputOffset(), character);
739 matchCharacterClass(character, matchDest, term.characterClass);
740 failures.append(jump());
741 matchDest.link(this);
742 }
743
744 add32(Imm32(1), countRegister);
745 add32(Imm32(1), index);
746 if (term.quantityCount != 0xffffffff)
747 branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
748 else
749 jump(loop);
750
751 failures.append(jump());
752
753 Label backtrackBegin(this);
754 loadFromFrame(term.frameLocation, countRegister);
755 state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
756 sub32(Imm32(1), countRegister);
757 sub32(Imm32(1), index);
758
759 failures.link(this);
760
761 storeToFrame(countRegister, term.frameLocation);
762
763 state.setBacktrackGenerated(backtrackBegin);
764 }
765
766 void generateCharacterClassNonGreedy(TermGenerationState& state)
767 {
768 const RegisterID character = regT0;
769 const RegisterID countRegister = regT1;
770 PatternTerm& term = state.term();
771
772 move(Imm32(0), countRegister);
773
774 Jump firstTimeDoNothing = jump();
775
776 Label hardFail(this);
777 sub32(countRegister, index);
778 state.jumpToBacktrack(jump(), this);
779
780 Label backtrackBegin(this);
781 loadFromFrame(term.frameLocation, countRegister);
782
783 atEndOfInput().linkTo(hardFail, this);
784 branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
785
786 JumpList matchDest;
787 readCharacter(state.inputOffset(), character);
788 matchCharacterClass(character, matchDest, term.characterClass);
789
790 if (term.invertOrCapture)
791 matchDest.linkTo(hardFail, this);
792 else {
793 jump(hardFail);
794 matchDest.link(this);
795 }
796
797 add32(Imm32(1), countRegister);
798 add32(Imm32(1), index);
799
800 firstTimeDoNothing.link(this);
801 storeToFrame(countRegister, term.frameLocation);
802
803 state.setBacktrackGenerated(backtrackBegin);
804 }
805
806 void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation)
807 {
808 ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion));
809 ASSERT(parenthesesTerm.quantityCount == 1);
810
811 PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
812 unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0;
813
814 if (disjunction->m_alternatives.size() == 1) {
815 state.resetAlternative();
816 ASSERT(state.alternativeValid());
817 PatternAlternative* alternative = state.alternative();
818 optimizeAlternative(alternative);
819
820 int countToCheck = alternative->m_minimumSize - preCheckedCount;
821 if (countToCheck) {
822 ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount));
823
824 // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists'
825 // will be forced to always trampoline into here, just to decrement the index.
826 // Ick.
827 Jump skip = jump();
828
829 Label backtrackBegin(this);
830 sub32(Imm32(countToCheck), index);
831 state.addBacktrackJump(jump());
832
833 skip.link(this);
834
835 state.setBacktrackGenerated(backtrackBegin);
836
837 state.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck), this);
838 state.checkedTotal += countToCheck;
839 }
840
841 for (state.resetTerm(); state.termValid(); state.nextTerm())
842 generateTerm(state);
843
844 state.checkedTotal -= countToCheck;
845 } else {
846 JumpList successes;
847
848 for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) {
849
850 PatternAlternative* alternative = state.alternative();
851 optimizeAlternative(alternative);
852
853 ASSERT(alternative->m_minimumSize >= preCheckedCount);
854 int countToCheck = alternative->m_minimumSize - preCheckedCount;
855 if (countToCheck) {
856 state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
857 state.checkedTotal += countToCheck;
858 }
859
860 for (state.resetTerm(); state.termValid(); state.nextTerm())
861 generateTerm(state);
862
863 // Matched an alternative.
864 DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation);
865 successes.append(jump());
866
867 // Alternative did not match.
868 Label backtrackLocation(this);
869
870 // Can we backtrack the alternative? - if so, do so. If not, just fall through to the next one.
871 state.plantJumpToBacktrackIfExists(this);
872
873 state.linkAlternativeBacktracks(this);
874
875 if (countToCheck) {
876 sub32(Imm32(countToCheck), index);
877 state.checkedTotal -= countToCheck;
878 }
879
880 m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, backtrackLocation));
881 }
882 // We fall through to here when the last alternative fails.
883 // Add a backtrack out of here for the parenthese handling code to link up.
884 state.addBacktrackJump(jump());
885
886 // Generate a trampoline for the parens code to backtrack to, to retry the
887 // next alternative.
888 state.setBacktrackGenerated(label());
889 loadFromFrameAndJump(alternativeFrameLocation);
890
891 // FIXME: both of the above hooks are a little inefficient, in that you
892 // may end up trampolining here, just to trampoline back out to the
893 // parentheses code, or vice versa. We can probably eliminate a jump
894 // by restructuring, but coding this way for now for simplicity during
895 // development.
896
897 successes.link(this);
898 }
899 }
900
901 void generateParenthesesSingle(TermGenerationState& state)
902 {
903 const RegisterID indexTemporary = regT0;
904 PatternTerm& term = state.term();
905 PatternDisjunction* disjunction = term.parentheses.disjunction;
906 ASSERT(term.quantityCount == 1);
907
908 unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0;
909
910 unsigned parenthesesFrameLocation = term.frameLocation;
911 unsigned alternativeFrameLocation = parenthesesFrameLocation;
912 if (term.quantityType != QuantifierFixedCount)
913 alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce;
914
915 // optimized case - no capture & no quantifier can be handled in a light-weight manner.
916 if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) {
917 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
918 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
919 // this expects that any backtracks back out of the parentheses will be in the
920 // parenthesesState's backTrackJumps vector, and that if they need backtracking
921 // they will have set an entry point on the parenthesesState's backtrackLabel.
922 state.propagateBacktrackingFrom(parenthesesState, this);
923 } else {
924 Jump nonGreedySkipParentheses;
925 Label nonGreedyTryParentheses;
926 if (term.quantityType == QuantifierGreedy)
927 storeToFrame(Imm32(1), parenthesesFrameLocation);
928 else if (term.quantityType == QuantifierNonGreedy) {
929 storeToFrame(Imm32(0), parenthesesFrameLocation);
930 nonGreedySkipParentheses = jump();
931 nonGreedyTryParentheses = label();
932 storeToFrame(Imm32(1), parenthesesFrameLocation);
933 }
934
935 // store the match start index
936 if (term.invertOrCapture) {
937 int inputOffset = state.inputOffset() - preCheckedCount;
938 if (inputOffset) {
939 move(index, indexTemporary);
940 add32(Imm32(inputOffset), indexTemporary);
941 store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
942 } else
943 store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
944 }
945
946 // generate the body of the parentheses
947 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
948 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
949
950 // store the match end index
951 if (term.invertOrCapture) {
952 int inputOffset = state.inputOffset();
953 if (inputOffset) {
954 move(index, indexTemporary);
955 add32(Imm32(state.inputOffset()), indexTemporary);
956 store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
957 } else
958 store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
959 }
960 Jump success = jump();
961
962 // A failure AFTER the parens jumps here
963 Label backtrackFromAfterParens(this);
964
965 if (term.quantityType == QuantifierGreedy) {
966 // If this is zero we have now tested with both with and without the parens.
967 loadFromFrame(parenthesesFrameLocation, indexTemporary);
968 state.jumpToBacktrack(branchTest32(Zero, indexTemporary), this);
969 } else if (term.quantityType == QuantifierNonGreedy) {
970 // If this is zero we have now tested with both with and without the parens.
971 loadFromFrame(parenthesesFrameLocation, indexTemporary);
972 branchTest32(Zero, indexTemporary).linkTo(nonGreedyTryParentheses, this);
973 }
974
975 parenthesesState.plantJumpToBacktrackIfExists(this);
976 // A failure WITHIN the parens jumps here
977 parenthesesState.linkAlternativeBacktracks(this);
978 if (term.invertOrCapture) {
979 store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
980 store32(Imm32(-1), Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
981 }
982
983 if (term.quantityType == QuantifierGreedy)
984 storeToFrame(Imm32(0), parenthesesFrameLocation);
985 else
986 state.jumpToBacktrack(jump(), this);
987
988 state.setBacktrackGenerated(backtrackFromAfterParens);
989 if (term.quantityType == QuantifierNonGreedy)
990 nonGreedySkipParentheses.link(this);
991 success.link(this);
992 }
993 }
994
995 void generateParentheticalAssertion(TermGenerationState& state)
996 {
997 PatternTerm& term = state.term();
998 PatternDisjunction* disjunction = term.parentheses.disjunction;
999 ASSERT(term.quantityCount == 1);
1000 ASSERT(term.quantityType == QuantifierFixedCount);
1001
1002 unsigned parenthesesFrameLocation = term.frameLocation;
1003 unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
1004
1005 int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
1006
1007 if (term.invertOrCapture) {
1008 // Inverted case
1009 storeToFrame(index, parenthesesFrameLocation);
1010
1011 state.checkedTotal -= countCheckedAfterAssertion;
1012 if (countCheckedAfterAssertion)
1013 sub32(Imm32(countCheckedAfterAssertion), index);
1014
1015 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
1016 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
1017 // Success! - which means - Fail!
1018 loadFromFrame(parenthesesFrameLocation, index);
1019 state.jumpToBacktrack(jump(), this);
1020
1021 // And fail means success.
1022 parenthesesState.linkAlternativeBacktracks(this);
1023 loadFromFrame(parenthesesFrameLocation, index);
1024
1025 state.checkedTotal += countCheckedAfterAssertion;
1026 } else {
1027 // Normal case
1028 storeToFrame(index, parenthesesFrameLocation);
1029
1030 state.checkedTotal -= countCheckedAfterAssertion;
1031 if (countCheckedAfterAssertion)
1032 sub32(Imm32(countCheckedAfterAssertion), index);
1033
1034 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
1035 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
1036 // Success! - which means - Success!
1037 loadFromFrame(parenthesesFrameLocation, index);
1038 Jump success = jump();
1039
1040 parenthesesState.linkAlternativeBacktracks(this);
1041 loadFromFrame(parenthesesFrameLocation, index);
1042 state.jumpToBacktrack(jump(), this);
1043
1044 success.link(this);
1045
1046 state.checkedTotal += countCheckedAfterAssertion;
1047 }
1048 }
1049
1050 void generateTerm(TermGenerationState& state)
1051 {
1052 PatternTerm& term = state.term();
1053
1054 switch (term.type) {
1055 case PatternTerm::TypeAssertionBOL:
1056 generateAssertionBOL(state);
1057 break;
1058
1059 case PatternTerm::TypeAssertionEOL:
1060 generateAssertionEOL(state);
1061 break;
1062
1063 case PatternTerm::TypeAssertionWordBoundary:
1064 generateAssertionWordBoundary(state);
1065 break;
1066
1067 case PatternTerm::TypePatternCharacter:
1068 switch (term.quantityType) {
1069 case QuantifierFixedCount:
1070 if (term.quantityCount == 1) {
1071 if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) {
1072 generatePatternCharacterPair(state);
1073 state.nextTerm();
1074 } else
1075 generatePatternCharacterSingle(state);
1076 } else
1077 generatePatternCharacterFixed(state);
1078 break;
1079 case QuantifierGreedy:
1080 generatePatternCharacterGreedy(state);
1081 break;
1082 case QuantifierNonGreedy:
1083 generatePatternCharacterNonGreedy(state);
1084 break;
1085 }
1086 break;
1087
1088 case PatternTerm::TypeCharacterClass:
1089 switch (term.quantityType) {
1090 case QuantifierFixedCount:
1091 if (term.quantityCount == 1)
1092 generateCharacterClassSingle(state);
1093 else
1094 generateCharacterClassFixed(state);
1095 break;
1096 case QuantifierGreedy:
1097 generateCharacterClassGreedy(state);
1098 break;
1099 case QuantifierNonGreedy:
1100 generateCharacterClassNonGreedy(state);
1101 break;
1102 }
1103 break;
1104
1105 case PatternTerm::TypeBackReference:
1106 ASSERT_NOT_REACHED();
1107 break;
1108
1109 case PatternTerm::TypeForwardReference:
1110 break;
1111
1112 case PatternTerm::TypeParenthesesSubpattern:
1113 ASSERT((term.quantityCount == 1) && !term.parentheses.isCopy); // must fallback to pcre before this point
1114 generateParenthesesSingle(state);
1115 break;
1116
1117 case PatternTerm::TypeParentheticalAssertion:
1118 generateParentheticalAssertion(state);
1119 break;
1120 }
1121 }
1122
1123 void generateDisjunction(PatternDisjunction* disjunction)
1124 {
1125 TermGenerationState state(disjunction, 0);
1126 state.resetAlternative();
1127
1128 // Plant a check to see if there is sufficient input available to run the first alternative.
1129 // Jumping back to the label 'firstAlternative' will get to this check, jumping to
1130 // 'firstAlternativeInputChecked' will jump directly to matching the alternative having
1131 // skipped this check.
1132
1133 Label firstAlternative(this);
1134
1135 // check availability for the next alternative
1136 int countCheckedForCurrentAlternative = 0;
1137 int countToCheckForFirstAlternative = 0;
1138 bool hasShorterAlternatives = false;
1139 JumpList notEnoughInputForPreviousAlternative;
1140
1141 if (state.alternativeValid()) {
1142 PatternAlternative* alternative = state.alternative();
1143 countToCheckForFirstAlternative = alternative->m_minimumSize;
1144 state.checkedTotal += countToCheckForFirstAlternative;
1145 if (countToCheckForFirstAlternative)
1146 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
1147 countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
1148 }
1149
1150 Label firstAlternativeInputChecked(this);
1151
1152 while (state.alternativeValid()) {
1153 // Track whether any alternatives are shorter than the first one.
1154 hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
1155
1156 PatternAlternative* alternative = state.alternative();
1157 optimizeAlternative(alternative);
1158
1159 for (state.resetTerm(); state.termValid(); state.nextTerm())
1160 generateTerm(state);
1161
1162 // If we get here, the alternative matched.
1163 if (m_pattern.m_body->m_callFrameSize)
1164 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1165
1166 ASSERT(index != returnRegister);
1167 if (m_pattern.m_body->m_hasFixedSize) {
1168 move(index, returnRegister);
1169 if (alternative->m_minimumSize)
1170 sub32(Imm32(alternative->m_minimumSize), returnRegister);
1171 } else
1172 pop(returnRegister);
1173 store32(index, Address(output, 4));
1174 store32(returnRegister, output);
1175
1176 generateReturn();
1177
1178 state.nextAlternative();
1179
1180 // if there are any more alternatives, plant the check for input before looping.
1181 if (state.alternativeValid()) {
1182 PatternAlternative* nextAlternative = state.alternative();
1183 int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
1184
1185 if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
1186 // If we get here, there the last input checked failed.
1187 notEnoughInputForPreviousAlternative.link(this);
1188
1189 // Check if sufficent input available to run the next alternative
1190 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
1191 // We are now in the correct state to enter the next alternative; this add is only required
1192 // to mirror and revert operation of the sub32, just below.
1193 add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
1194
1195 // If we get here, there the last input checked passed.
1196 state.linkAlternativeBacktracks(this);
1197 // No need to check if we can run the next alternative, since it is shorter -
1198 // just update index.
1199 sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
1200 } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one.
1201 // If we get here, there the last input checked failed.
1202 // If there is insufficient input to run the current alternative, and the next alternative is longer,
1203 // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
1204 // we had checked.
1205 notEnoughInputForPreviousAlternative.link(this);
1206 add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index);
1207 notEnoughInputForPreviousAlternative.append(jump());
1208
1209 // The next alternative is longer than the current one; check the difference.
1210 state.linkAlternativeBacktracks(this);
1211 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
1212 } else { // CASE 3: Both alternatives are the same length.
1213 ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
1214
1215 // If the next alterative is the same length as this one, then no need to check the input -
1216 // if there was sufficent input to run the current alternative then there is sufficient
1217 // input to run the next one; if not, there isn't.
1218 state.linkAlternativeBacktracks(this);
1219 }
1220
1221 state.checkedTotal -= countCheckedForCurrentAlternative;
1222 countCheckedForCurrentAlternative = countToCheckForNextAlternative;
1223 state.checkedTotal += countCheckedForCurrentAlternative;
1224 }
1225 }
1226
1227 // If we get here, all Alternatives failed...
1228
1229 state.checkedTotal -= countCheckedForCurrentAlternative;
1230
1231 // How much more input need there be to be able to retry from the first alternative?
1232 // examples:
1233 // /yarr_jit/ or /wrec|pcre/
1234 // In these examples we need check for one more input before looping.
1235 // /yarr_jit|pcre/
1236 // In this case we need check for 5 more input to loop (+4 to allow for the first alterative
1237 // being four longer than the last alternative checked, and another +1 to effectively move
1238 // the start position along by one).
1239 // /yarr|rules/ or /wrec|notsomuch/
1240 // In these examples, provided that there was sufficient input to have just been matching for
1241 // the second alternative we can loop without checking for available input (since the second
1242 // alternative is longer than the first). In the latter example we need to decrement index
1243 // (by 4) so the start position is only progressed by 1 from the last iteration.
1244 int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1;
1245
1246 // First, deal with the cases where there was sufficient input to try the last alternative.
1247 if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
1248 state.linkAlternativeBacktracks(this);
1249 else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
1250 state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this);
1251 else { // no need to check the input, but we do have some bookkeeping to do first.
1252 state.linkAlternativeBacktracks(this);
1253
1254 // Where necessary update our preserved start position.
1255 if (!m_pattern.m_body->m_hasFixedSize) {
1256 move(index, regT0);
1257 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
1258 poke(regT0, m_pattern.m_body->m_callFrameSize);
1259 }
1260
1261 // Update index if necessary, and loop (without checking).
1262 if (incrementForNextIter)
1263 add32(Imm32(incrementForNextIter), index);
1264 jump().linkTo(firstAlternativeInputChecked, this);
1265 }
1266
1267 notEnoughInputForPreviousAlternative.link(this);
1268 // Update our idea of the start position, if we're tracking this.
1269 if (!m_pattern.m_body->m_hasFixedSize) {
1270 if (countCheckedForCurrentAlternative - 1) {
1271 move(index, regT0);
1272 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
1273 poke(regT0, m_pattern.m_body->m_callFrameSize);
1274 } else
1275 poke(index, m_pattern.m_body->m_callFrameSize);
1276 }
1277 // Check if there is sufficent input to run the first alternative again.
1278 jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this);
1279 // No - insufficent input to run the first alteranative, are there any other alternatives we
1280 // might need to check? If so, the last check will have left the index incremented by
1281 // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
1282 // LESS input is available, to have the effect of just progressing the start position by 1
1283 // from the last iteration. If this check passes we can just jump up to the check associated
1284 // with the first alternative in the loop. This is a bit sad, since we'll end up trying the
1285 // first alternative again, and this check will fail (otherwise the check planted just above
1286 // here would have passed). This is a bit sad, however it saves trying to do something more
1287 // complex here in compilation, and in the common case we should end up coallescing the checks.
1288 //
1289 // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
1290 // of the minimum-alternative-lengths. E.g. if I have two alternatives of length 200 and 150,
1291 // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
1292 // is sufficient input to run either alternative (constantly failing). If there had been only
1293 // one alternative, or if the shorter alternative had come first, we would have terminated
1294 // immediately. :-/
1295 if (hasShorterAlternatives)
1296 jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this);
1297 // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
1298 // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ...
1299 // but since we're about to return a failure this doesn't really matter!)
1300
1301 unsigned frameSize = m_pattern.m_body->m_callFrameSize;
1302 if (!m_pattern.m_body->m_hasFixedSize)
1303 ++frameSize;
1304 if (frameSize)
1305 addPtr(Imm32(frameSize * sizeof(void*)), stackPointerRegister);
1306
1307 move(Imm32(-1), returnRegister);
1308
1309 generateReturn();
1310 }
1311
1312 void generateEnter()
1313 {
1314 #if CPU(X86_64)
1315 push(X86Registers::ebp);
1316 move(stackPointerRegister, X86Registers::ebp);
1317 push(X86Registers::ebx);
1318 #elif CPU(X86)
1319 push(X86Registers::ebp);
1320 move(stackPointerRegister, X86Registers::ebp);
1321 // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
1322 push(X86Registers::ebx);
1323 push(X86Registers::edi);
1324 push(X86Registers::esi);
1325 // load output into edi (2 = saved ebp + return address).
1326 #if COMPILER(MSVC)
1327 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
1328 loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
1329 loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
1330 loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
1331 #else
1332 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
1333 #endif
1334 #elif CPU(ARM)
1335 push(ARMRegisters::r4);
1336 push(ARMRegisters::r5);
1337 push(ARMRegisters::r6);
1338 move(ARMRegisters::r3, output);
1339 #elif CPU(MIPS)
1340 // Do nothing.
1341 #endif
1342 }
1343
1344 void generateReturn()
1345 {
1346 #if CPU(X86_64)
1347 pop(X86Registers::ebx);
1348 pop(X86Registers::ebp);
1349 #elif CPU(X86)
1350 pop(X86Registers::esi);
1351 pop(X86Registers::edi);
1352 pop(X86Registers::ebx);
1353 pop(X86Registers::ebp);
1354 #elif CPU(ARM)
1355 pop(ARMRegisters::r6);
1356 pop(ARMRegisters::r5);
1357 pop(ARMRegisters::r4);
1358 #elif CPU(MIPS)
1359 // Do nothing
1360 #endif
1361 ret();
1362 }
1363
1364 public:
1365 RegexGenerator(RegexPattern& pattern)
1366 : m_pattern(pattern)
1367 {
1368 }
1369
1370 void generate()
1371 {
1372 generateEnter();
1373
1374 // TODO: do I really want this on the stack?
1375 if (!m_pattern.m_body->m_hasFixedSize)
1376 push(index);
1377
1378 if (m_pattern.m_body->m_callFrameSize)
1379 subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1380
1381 generateDisjunction(m_pattern.m_body);
1382 }
1383
1384 void compile(JSGlobalData* globalData, RegexCodeBlock& jitObject)
1385 {
1386 generate();
1387
1388 LinkBuffer patchBuffer(this, globalData->regexAllocator.poolForSize(size()), 0);
1389
1390 for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
1391 patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation));
1392
1393 jitObject.set(patchBuffer.finalizeCode());
1394 }
1395
1396 private:
1397 RegexPattern& m_pattern;
1398 Vector<AlternativeBacktrackRecord> m_backtrackRecords;
1399 };
1400
1401 void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
1402 {
1403 RegexPattern pattern(ignoreCase, multiline);
1404 if ((error = compileRegex(patternString, pattern)))
1405 return;
1406 numSubpatterns = pattern.m_numSubpatterns;
1407
1408 if (!pattern.m_shouldFallBack && globalData->canUseJIT() && RegExpCache::isCacheable(patternString)) {
1409 RegexGenerator generator(pattern);
1410 generator.compile(globalData, jitObject);
1411 return;
1412 }
1413
1414 JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
1415 JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine;
1416 jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error));
1417 }
1418
1419 }}
1420
1421 #endif
1422
1423
1424
1425
1426