]> git.saurik.com Git - apple/javascriptcore.git/blob - yarr/RegexJIT.cpp
JavaScriptCore-621.1.tar.gz
[apple/javascriptcore.git] / yarr / RegexJIT.cpp
1 /*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "config.h"
27 #include "RegexJIT.h"
28
29 #include "ASCIICType.h"
30 #include "JSGlobalData.h"
31 #include "LinkBuffer.h"
32 #include "MacroAssembler.h"
33 #include "RegexCompiler.h"
34
35 #include "pcre.h" // temporary, remove when fallback is removed.
36
37 #if ENABLE(YARR_JIT)
38
39 using namespace WTF;
40
41 namespace JSC { namespace Yarr {
42
43 class RegexGenerator : private MacroAssembler {
44 friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
45
46 #if CPU(ARM)
47 static const RegisterID input = ARMRegisters::r0;
48 static const RegisterID index = ARMRegisters::r1;
49 static const RegisterID length = ARMRegisters::r2;
50 static const RegisterID output = ARMRegisters::r4;
51
52 static const RegisterID regT0 = ARMRegisters::r5;
53 static const RegisterID regT1 = ARMRegisters::r6;
54
55 static const RegisterID returnRegister = ARMRegisters::r0;
56 #elif CPU(MIPS)
57 static const RegisterID input = MIPSRegisters::a0;
58 static const RegisterID index = MIPSRegisters::a1;
59 static const RegisterID length = MIPSRegisters::a2;
60 static const RegisterID output = MIPSRegisters::a3;
61
62 static const RegisterID regT0 = MIPSRegisters::t4;
63 static const RegisterID regT1 = MIPSRegisters::t5;
64
65 static const RegisterID returnRegister = MIPSRegisters::v0;
66 #elif CPU(X86)
67 static const RegisterID input = X86Registers::eax;
68 static const RegisterID index = X86Registers::edx;
69 static const RegisterID length = X86Registers::ecx;
70 static const RegisterID output = X86Registers::edi;
71
72 static const RegisterID regT0 = X86Registers::ebx;
73 static const RegisterID regT1 = X86Registers::esi;
74
75 static const RegisterID returnRegister = X86Registers::eax;
76 #elif CPU(X86_64)
77 static const RegisterID input = X86Registers::edi;
78 static const RegisterID index = X86Registers::esi;
79 static const RegisterID length = X86Registers::edx;
80 static const RegisterID output = X86Registers::ecx;
81
82 static const RegisterID regT0 = X86Registers::eax;
83 static const RegisterID regT1 = X86Registers::ebx;
84
85 static const RegisterID returnRegister = X86Registers::eax;
86 #endif
87
88 void optimizeAlternative(PatternAlternative* alternative)
89 {
90 if (!alternative->m_terms.size())
91 return;
92
93 for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
94 PatternTerm& term = alternative->m_terms[i];
95 PatternTerm& nextTerm = alternative->m_terms[i + 1];
96
97 if ((term.type == PatternTerm::TypeCharacterClass)
98 && (term.quantityType == QuantifierFixedCount)
99 && (nextTerm.type == PatternTerm::TypePatternCharacter)
100 && (nextTerm.quantityType == QuantifierFixedCount)) {
101 PatternTerm termCopy = term;
102 alternative->m_terms[i] = nextTerm;
103 alternative->m_terms[i + 1] = termCopy;
104 }
105 }
106 }
107
108 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
109 {
110 do {
111 // pick which range we're going to generate
112 int which = count >> 1;
113 char lo = ranges[which].begin;
114 char hi = ranges[which].end;
115
116 // check if there are any ranges or matches below lo. If not, just jl to failure -
117 // if there is anything else to check, check that first, if it falls through jmp to failure.
118 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
119 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
120
121 // generate code for all ranges before this one
122 if (which)
123 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
124
125 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
126 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
127 ++*matchIndex;
128 }
129 failures.append(jump());
130
131 loOrAbove.link(this);
132 } else if (which) {
133 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
134
135 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
136 failures.append(jump());
137
138 loOrAbove.link(this);
139 } else
140 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
141
142 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
143 ++*matchIndex;
144
145 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
146 // fall through to here, the value is above hi.
147
148 // shuffle along & loop around if there are any more matches to handle.
149 unsigned next = which + 1;
150 ranges += next;
151 count -= next;
152 } while (count);
153 }
154
155 void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
156 {
157 if (charClass->m_table) {
158 ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
159 matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));
160 return;
161 }
162 Jump unicodeFail;
163 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
164 Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
165
166 if (charClass->m_matchesUnicode.size()) {
167 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
168 UChar ch = charClass->m_matchesUnicode[i];
169 matchDest.append(branch32(Equal, character, Imm32(ch)));
170 }
171 }
172
173 if (charClass->m_rangesUnicode.size()) {
174 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
175 UChar lo = charClass->m_rangesUnicode[i].begin;
176 UChar hi = charClass->m_rangesUnicode[i].end;
177
178 Jump below = branch32(LessThan, character, Imm32(lo));
179 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
180 below.link(this);
181 }
182 }
183
184 unicodeFail = jump();
185 isAscii.link(this);
186 }
187
188 if (charClass->m_ranges.size()) {
189 unsigned matchIndex = 0;
190 JumpList failures;
191 matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
192 while (matchIndex < charClass->m_matches.size())
193 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
194
195 failures.link(this);
196 } else if (charClass->m_matches.size()) {
197 // optimization: gather 'a','A' etc back together, can mask & test once.
198 Vector<char> matchesAZaz;
199
200 for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
201 char ch = charClass->m_matches[i];
202 if (m_pattern.m_ignoreCase) {
203 if (isASCIILower(ch)) {
204 matchesAZaz.append(ch);
205 continue;
206 }
207 if (isASCIIUpper(ch))
208 continue;
209 }
210 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
211 }
212
213 if (unsigned countAZaz = matchesAZaz.size()) {
214 or32(Imm32(32), character);
215 for (unsigned i = 0; i < countAZaz; ++i)
216 matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i])));
217 }
218 }
219
220 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
221 unicodeFail.link(this);
222 }
223
224 // Jumps if input not available; will have (incorrectly) incremented already!
225 Jump jumpIfNoAvailableInput(unsigned countToCheck)
226 {
227 add32(Imm32(countToCheck), index);
228 return branch32(Above, index, length);
229 }
230
231 Jump jumpIfAvailableInput(unsigned countToCheck)
232 {
233 add32(Imm32(countToCheck), index);
234 return branch32(BelowOrEqual, index, length);
235 }
236
237 Jump checkInput()
238 {
239 return branch32(BelowOrEqual, index, length);
240 }
241
242 Jump atEndOfInput()
243 {
244 return branch32(Equal, index, length);
245 }
246
247 Jump notAtEndOfInput()
248 {
249 return branch32(NotEqual, index, length);
250 }
251
252 Jump jumpIfCharEquals(UChar ch, int inputPosition)
253 {
254 return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
255 }
256
257 Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
258 {
259 return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
260 }
261
262 void readCharacter(int inputPosition, RegisterID reg)
263 {
264 load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
265 }
266
267 void storeToFrame(RegisterID reg, unsigned frameLocation)
268 {
269 poke(reg, frameLocation);
270 }
271
272 void storeToFrame(Imm32 imm, unsigned frameLocation)
273 {
274 poke(imm, frameLocation);
275 }
276
277 DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
278 {
279 return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
280 }
281
282 void loadFromFrame(unsigned frameLocation, RegisterID reg)
283 {
284 peek(reg, frameLocation);
285 }
286
287 void loadFromFrameAndJump(unsigned frameLocation)
288 {
289 jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
290 }
291
292 struct AlternativeBacktrackRecord {
293 DataLabelPtr dataLabel;
294 Label backtrackLocation;
295
296 AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation)
297 : dataLabel(dataLabel)
298 , backtrackLocation(backtrackLocation)
299 {
300 }
301 };
302
303 struct TermGenerationState {
304 TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal)
305 : disjunction(disjunction)
306 , checkedTotal(checkedTotal)
307 {
308 }
309
310 void resetAlternative()
311 {
312 isBackTrackGenerated = false;
313 alt = 0;
314 }
315 bool alternativeValid()
316 {
317 return alt < disjunction->m_alternatives.size();
318 }
319 void nextAlternative()
320 {
321 ++alt;
322 }
323 PatternAlternative* alternative()
324 {
325 return disjunction->m_alternatives[alt];
326 }
327
328 void resetTerm()
329 {
330 ASSERT(alternativeValid());
331 t = 0;
332 }
333 bool termValid()
334 {
335 ASSERT(alternativeValid());
336 return t < alternative()->m_terms.size();
337 }
338 void nextTerm()
339 {
340 ASSERT(alternativeValid());
341 ++t;
342 }
343 PatternTerm& term()
344 {
345 ASSERT(alternativeValid());
346 return alternative()->m_terms[t];
347 }
348
349 PatternTerm& lookaheadTerm()
350 {
351 ASSERT(alternativeValid());
352 ASSERT((t + 1) < alternative()->m_terms.size());
353 return alternative()->m_terms[t + 1];
354 }
355 bool isSinglePatternCharacterLookaheadTerm()
356 {
357 ASSERT(alternativeValid());
358 return ((t + 1) < alternative()->m_terms.size())
359 && (lookaheadTerm().type == PatternTerm::TypePatternCharacter)
360 && (lookaheadTerm().quantityType == QuantifierFixedCount)
361 && (lookaheadTerm().quantityCount == 1);
362 }
363
364 int inputOffset()
365 {
366 return term().inputPosition - checkedTotal;
367 }
368
369 void jumpToBacktrack(Jump jump, MacroAssembler* masm)
370 {
371 if (isBackTrackGenerated)
372 jump.linkTo(backtrackLabel, masm);
373 else
374 backTrackJumps.append(jump);
375 }
376 void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm)
377 {
378 if (isBackTrackGenerated)
379 jumps.linkTo(backtrackLabel, masm);
380 else
381 backTrackJumps.append(jumps);
382 }
383 bool plantJumpToBacktrackIfExists(MacroAssembler* masm)
384 {
385 if (isBackTrackGenerated) {
386 masm->jump(backtrackLabel);
387 return true;
388 }
389 return false;
390 }
391 void addBacktrackJump(Jump jump)
392 {
393 backTrackJumps.append(jump);
394 }
395 void setBacktrackGenerated(Label label)
396 {
397 isBackTrackGenerated = true;
398 backtrackLabel = label;
399 }
400 void linkAlternativeBacktracks(MacroAssembler* masm)
401 {
402 isBackTrackGenerated = false;
403 backTrackJumps.link(masm);
404 }
405 void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm)
406 {
407 isBackTrackGenerated = false;
408 backTrackJumps.linkTo(label, masm);
409 }
410 void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm)
411 {
412 jumpToBacktrack(nestedParenthesesState.backTrackJumps, masm);
413 if (nestedParenthesesState.isBackTrackGenerated)
414 setBacktrackGenerated(nestedParenthesesState.backtrackLabel);
415 }
416
417 PatternDisjunction* disjunction;
418 int checkedTotal;
419 private:
420 unsigned alt;
421 unsigned t;
422 JumpList backTrackJumps;
423 Label backtrackLabel;
424 bool isBackTrackGenerated;
425 };
426
427 void generateAssertionBOL(TermGenerationState& state)
428 {
429 PatternTerm& term = state.term();
430
431 if (m_pattern.m_multiline) {
432 const RegisterID character = regT0;
433
434 JumpList matchDest;
435 if (!term.inputPosition)
436 matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal)));
437
438 readCharacter(state.inputOffset() - 1, character);
439 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
440 state.jumpToBacktrack(jump(), this);
441
442 matchDest.link(this);
443 } else {
444 // Erk, really should poison out these alternatives early. :-/
445 if (term.inputPosition)
446 state.jumpToBacktrack(jump(), this);
447 else
448 state.jumpToBacktrack(branch32(NotEqual, index, Imm32(state.checkedTotal)), this);
449 }
450 }
451
452 void generateAssertionEOL(TermGenerationState& state)
453 {
454 PatternTerm& term = state.term();
455
456 if (m_pattern.m_multiline) {
457 const RegisterID character = regT0;
458
459 JumpList matchDest;
460 if (term.inputPosition == state.checkedTotal)
461 matchDest.append(atEndOfInput());
462
463 readCharacter(state.inputOffset(), character);
464 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
465 state.jumpToBacktrack(jump(), this);
466
467 matchDest.link(this);
468 } else {
469 if (term.inputPosition == state.checkedTotal)
470 state.jumpToBacktrack(notAtEndOfInput(), this);
471 // Erk, really should poison out these alternatives early. :-/
472 else
473 state.jumpToBacktrack(jump(), this);
474 }
475 }
476
477 // Also falls though on nextIsNotWordChar.
478 void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
479 {
480 const RegisterID character = regT0;
481 PatternTerm& term = state.term();
482
483 if (term.inputPosition == state.checkedTotal)
484 nextIsNotWordChar.append(atEndOfInput());
485
486 readCharacter(state.inputOffset(), character);
487 matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
488 }
489
490 void generateAssertionWordBoundary(TermGenerationState& state)
491 {
492 const RegisterID character = regT0;
493 PatternTerm& term = state.term();
494
495 Jump atBegin;
496 JumpList matchDest;
497 if (!term.inputPosition)
498 atBegin = branch32(Equal, index, Imm32(state.checkedTotal));
499 readCharacter(state.inputOffset() - 1, character);
500 matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
501 if (!term.inputPosition)
502 atBegin.link(this);
503
504 // We fall through to here if the last character was not a wordchar.
505 JumpList nonWordCharThenWordChar;
506 JumpList nonWordCharThenNonWordChar;
507 if (term.invertOrCapture) {
508 matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
509 nonWordCharThenWordChar.append(jump());
510 } else {
511 matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
512 nonWordCharThenNonWordChar.append(jump());
513 }
514 state.jumpToBacktrack(nonWordCharThenNonWordChar, this);
515
516 // We jump here if the last character was a wordchar.
517 matchDest.link(this);
518 JumpList wordCharThenWordChar;
519 JumpList wordCharThenNonWordChar;
520 if (term.invertOrCapture) {
521 matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar);
522 wordCharThenWordChar.append(jump());
523 } else {
524 matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar);
525 // This can fall-though!
526 }
527
528 state.jumpToBacktrack(wordCharThenWordChar, this);
529
530 nonWordCharThenWordChar.link(this);
531 wordCharThenNonWordChar.link(this);
532 }
533
534 void generatePatternCharacterSingle(TermGenerationState& state)
535 {
536 const RegisterID character = regT0;
537 UChar ch = state.term().patternCharacter;
538
539 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
540 readCharacter(state.inputOffset(), character);
541 or32(Imm32(32), character);
542 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
543 } else {
544 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
545 state.jumpToBacktrack(jumpIfCharNotEquals(ch, state.inputOffset()), this);
546 }
547 }
548
549 void generatePatternCharacterPair(TermGenerationState& state)
550 {
551 const RegisterID character = regT0;
552 UChar ch1 = state.term().patternCharacter;
553 UChar ch2 = state.lookaheadTerm().patternCharacter;
554
555 int mask = 0;
556 int chPair = ch1 | (ch2 << 16);
557
558 if (m_pattern.m_ignoreCase) {
559 if (isASCIIAlpha(ch1))
560 mask |= 32;
561 if (isASCIIAlpha(ch2))
562 mask |= 32 << 16;
563 }
564
565 if (mask) {
566 load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character);
567 or32(Imm32(mask), character);
568 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(chPair | mask)), this);
569 } else
570 state.jumpToBacktrack(branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)), this);
571 }
572
573 void generatePatternCharacterFixed(TermGenerationState& state)
574 {
575 const RegisterID character = regT0;
576 const RegisterID countRegister = regT1;
577 PatternTerm& term = state.term();
578 UChar ch = term.patternCharacter;
579
580 move(index, countRegister);
581 sub32(Imm32(term.quantityCount), countRegister);
582
583 Label loop(this);
584 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
585 load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
586 or32(Imm32(32), character);
587 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
588 } else {
589 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
590 state.jumpToBacktrack(branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)), this);
591 }
592 add32(Imm32(1), countRegister);
593 branch32(NotEqual, countRegister, index).linkTo(loop, this);
594 }
595
596 void generatePatternCharacterGreedy(TermGenerationState& state)
597 {
598 const RegisterID character = regT0;
599 const RegisterID countRegister = regT1;
600 PatternTerm& term = state.term();
601 UChar ch = term.patternCharacter;
602
603 move(Imm32(0), countRegister);
604
605 JumpList failures;
606 Label loop(this);
607 failures.append(atEndOfInput());
608 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
609 readCharacter(state.inputOffset(), character);
610 or32(Imm32(32), character);
611 failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
612 } else {
613 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
614 failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
615 }
616
617 add32(Imm32(1), countRegister);
618 add32(Imm32(1), index);
619 if (term.quantityCount != 0xffffffff)
620 branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
621 else
622 jump(loop);
623
624 failures.append(jump());
625
626 Label backtrackBegin(this);
627 loadFromFrame(term.frameLocation, countRegister);
628 state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
629 sub32(Imm32(1), countRegister);
630 sub32(Imm32(1), index);
631
632 failures.link(this);
633
634 storeToFrame(countRegister, term.frameLocation);
635
636 state.setBacktrackGenerated(backtrackBegin);
637 }
638
639 void generatePatternCharacterNonGreedy(TermGenerationState& state)
640 {
641 const RegisterID character = regT0;
642 const RegisterID countRegister = regT1;
643 PatternTerm& term = state.term();
644 UChar ch = term.patternCharacter;
645
646 move(Imm32(0), countRegister);
647
648 Jump firstTimeDoNothing = jump();
649
650 Label hardFail(this);
651 sub32(countRegister, index);
652 state.jumpToBacktrack(jump(), this);
653
654 Label backtrackBegin(this);
655 loadFromFrame(term.frameLocation, countRegister);
656
657 atEndOfInput().linkTo(hardFail, this);
658 if (term.quantityCount != 0xffffffff)
659 branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
660 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
661 readCharacter(state.inputOffset(), character);
662 or32(Imm32(32), character);
663 branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this);
664 } else {
665 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
666 jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this);
667 }
668
669 add32(Imm32(1), countRegister);
670 add32(Imm32(1), index);
671
672 firstTimeDoNothing.link(this);
673 storeToFrame(countRegister, term.frameLocation);
674
675 state.setBacktrackGenerated(backtrackBegin);
676 }
677
678 void generateCharacterClassSingle(TermGenerationState& state)
679 {
680 const RegisterID character = regT0;
681 PatternTerm& term = state.term();
682
683 JumpList matchDest;
684 readCharacter(state.inputOffset(), character);
685 matchCharacterClass(character, matchDest, term.characterClass);
686
687 if (term.invertOrCapture)
688 state.jumpToBacktrack(matchDest, this);
689 else {
690 state.jumpToBacktrack(jump(), this);
691 matchDest.link(this);
692 }
693 }
694
695 void generateCharacterClassFixed(TermGenerationState& state)
696 {
697 const RegisterID character = regT0;
698 const RegisterID countRegister = regT1;
699 PatternTerm& term = state.term();
700
701 move(index, countRegister);
702 sub32(Imm32(term.quantityCount), countRegister);
703
704 Label loop(this);
705 JumpList matchDest;
706 load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
707 matchCharacterClass(character, matchDest, term.characterClass);
708
709 if (term.invertOrCapture)
710 state.jumpToBacktrack(matchDest, this);
711 else {
712 state.jumpToBacktrack(jump(), this);
713 matchDest.link(this);
714 }
715
716 add32(Imm32(1), countRegister);
717 branch32(NotEqual, countRegister, index).linkTo(loop, this);
718 }
719
720 void generateCharacterClassGreedy(TermGenerationState& state)
721 {
722 const RegisterID character = regT0;
723 const RegisterID countRegister = regT1;
724 PatternTerm& term = state.term();
725
726 move(Imm32(0), countRegister);
727
728 JumpList failures;
729 Label loop(this);
730 failures.append(atEndOfInput());
731
732 if (term.invertOrCapture) {
733 readCharacter(state.inputOffset(), character);
734 matchCharacterClass(character, failures, term.characterClass);
735 } else {
736 JumpList matchDest;
737 readCharacter(state.inputOffset(), character);
738 matchCharacterClass(character, matchDest, term.characterClass);
739 failures.append(jump());
740 matchDest.link(this);
741 }
742
743 add32(Imm32(1), countRegister);
744 add32(Imm32(1), index);
745 if (term.quantityCount != 0xffffffff)
746 branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
747 else
748 jump(loop);
749
750 failures.append(jump());
751
752 Label backtrackBegin(this);
753 loadFromFrame(term.frameLocation, countRegister);
754 state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
755 sub32(Imm32(1), countRegister);
756 sub32(Imm32(1), index);
757
758 failures.link(this);
759
760 storeToFrame(countRegister, term.frameLocation);
761
762 state.setBacktrackGenerated(backtrackBegin);
763 }
764
765 void generateCharacterClassNonGreedy(TermGenerationState& state)
766 {
767 const RegisterID character = regT0;
768 const RegisterID countRegister = regT1;
769 PatternTerm& term = state.term();
770
771 move(Imm32(0), countRegister);
772
773 Jump firstTimeDoNothing = jump();
774
775 Label hardFail(this);
776 sub32(countRegister, index);
777 state.jumpToBacktrack(jump(), this);
778
779 Label backtrackBegin(this);
780 loadFromFrame(term.frameLocation, countRegister);
781
782 atEndOfInput().linkTo(hardFail, this);
783 branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
784
785 JumpList matchDest;
786 readCharacter(state.inputOffset(), character);
787 matchCharacterClass(character, matchDest, term.characterClass);
788
789 if (term.invertOrCapture)
790 matchDest.linkTo(hardFail, this);
791 else {
792 jump(hardFail);
793 matchDest.link(this);
794 }
795
796 add32(Imm32(1), countRegister);
797 add32(Imm32(1), index);
798
799 firstTimeDoNothing.link(this);
800 storeToFrame(countRegister, term.frameLocation);
801
802 state.setBacktrackGenerated(backtrackBegin);
803 }
804
805 void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation)
806 {
807 ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion));
808 ASSERT(parenthesesTerm.quantityCount == 1);
809
810 PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
811 unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0;
812
813 if (disjunction->m_alternatives.size() == 1) {
814 state.resetAlternative();
815 ASSERT(state.alternativeValid());
816 PatternAlternative* alternative = state.alternative();
817 optimizeAlternative(alternative);
818
819 int countToCheck = alternative->m_minimumSize - preCheckedCount;
820 if (countToCheck) {
821 ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount));
822
823 // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists'
824 // will be forced to always trampoline into here, just to decrement the index.
825 // Ick.
826 Jump skip = jump();
827
828 Label backtrackBegin(this);
829 sub32(Imm32(countToCheck), index);
830 state.addBacktrackJump(jump());
831
832 skip.link(this);
833
834 state.setBacktrackGenerated(backtrackBegin);
835
836 state.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck), this);
837 state.checkedTotal += countToCheck;
838 }
839
840 for (state.resetTerm(); state.termValid(); state.nextTerm())
841 generateTerm(state);
842
843 state.checkedTotal -= countToCheck;
844 } else {
845 JumpList successes;
846
847 for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) {
848
849 PatternAlternative* alternative = state.alternative();
850 optimizeAlternative(alternative);
851
852 ASSERT(alternative->m_minimumSize >= preCheckedCount);
853 int countToCheck = alternative->m_minimumSize - preCheckedCount;
854 if (countToCheck) {
855 state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
856 state.checkedTotal += countToCheck;
857 }
858
859 for (state.resetTerm(); state.termValid(); state.nextTerm())
860 generateTerm(state);
861
862 // Matched an alternative.
863 DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation);
864 successes.append(jump());
865
866 // Alternative did not match.
867 Label backtrackLocation(this);
868
869 // Can we backtrack the alternative? - if so, do so. If not, just fall through to the next one.
870 state.plantJumpToBacktrackIfExists(this);
871
872 state.linkAlternativeBacktracks(this);
873
874 if (countToCheck) {
875 sub32(Imm32(countToCheck), index);
876 state.checkedTotal -= countToCheck;
877 }
878
879 m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, backtrackLocation));
880 }
881 // We fall through to here when the last alternative fails.
882 // Add a backtrack out of here for the parenthese handling code to link up.
883 state.addBacktrackJump(jump());
884
885 // Generate a trampoline for the parens code to backtrack to, to retry the
886 // next alternative.
887 state.setBacktrackGenerated(label());
888 loadFromFrameAndJump(alternativeFrameLocation);
889
890 // FIXME: both of the above hooks are a little inefficient, in that you
891 // may end up trampolining here, just to trampoline back out to the
892 // parentheses code, or vice versa. We can probably eliminate a jump
893 // by restructuring, but coding this way for now for simplicity during
894 // development.
895
896 successes.link(this);
897 }
898 }
899
900 void generateParenthesesSingle(TermGenerationState& state)
901 {
902 const RegisterID indexTemporary = regT0;
903 PatternTerm& term = state.term();
904 PatternDisjunction* disjunction = term.parentheses.disjunction;
905 ASSERT(term.quantityCount == 1);
906
907 unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0;
908
909 unsigned parenthesesFrameLocation = term.frameLocation;
910 unsigned alternativeFrameLocation = parenthesesFrameLocation;
911 if (term.quantityType != QuantifierFixedCount)
912 alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce;
913
914 // optimized case - no capture & no quantifier can be handled in a light-weight manner.
915 if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) {
916 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
917 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
918 // this expects that any backtracks back out of the parentheses will be in the
919 // parenthesesState's backTrackJumps vector, and that if they need backtracking
920 // they will have set an entry point on the parenthesesState's backtrackLabel.
921 state.propagateBacktrackingFrom(parenthesesState, this);
922 } else {
923 Jump nonGreedySkipParentheses;
924 Label nonGreedyTryParentheses;
925 if (term.quantityType == QuantifierGreedy)
926 storeToFrame(Imm32(1), parenthesesFrameLocation);
927 else if (term.quantityType == QuantifierNonGreedy) {
928 storeToFrame(Imm32(0), parenthesesFrameLocation);
929 nonGreedySkipParentheses = jump();
930 nonGreedyTryParentheses = label();
931 storeToFrame(Imm32(1), parenthesesFrameLocation);
932 }
933
934 // store the match start index
935 if (term.invertOrCapture) {
936 int inputOffset = state.inputOffset() - preCheckedCount;
937 if (inputOffset) {
938 move(index, indexTemporary);
939 add32(Imm32(inputOffset), indexTemporary);
940 store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
941 } else
942 store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
943 }
944
945 // generate the body of the parentheses
946 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
947 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
948
949 // store the match end index
950 if (term.invertOrCapture) {
951 int inputOffset = state.inputOffset();
952 if (inputOffset) {
953 move(index, indexTemporary);
954 add32(Imm32(state.inputOffset()), indexTemporary);
955 store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
956 } else
957 store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
958 }
959 Jump success = jump();
960
961 // A failure AFTER the parens jumps here
962 Label backtrackFromAfterParens(this);
963
964 if (term.quantityType == QuantifierGreedy) {
965 // If this is zero we have now tested with both with and without the parens.
966 loadFromFrame(parenthesesFrameLocation, indexTemporary);
967 state.jumpToBacktrack(branchTest32(Zero, indexTemporary), this);
968 } else if (term.quantityType == QuantifierNonGreedy) {
969 // If this is zero we have now tested with both with and without the parens.
970 loadFromFrame(parenthesesFrameLocation, indexTemporary);
971 branchTest32(Zero, indexTemporary).linkTo(nonGreedyTryParentheses, this);
972 }
973
974 parenthesesState.plantJumpToBacktrackIfExists(this);
975 // A failure WITHIN the parens jumps here
976 parenthesesState.linkAlternativeBacktracks(this);
977 if (term.invertOrCapture) {
978 store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
979 store32(Imm32(-1), Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
980 }
981
982 if (term.quantityType == QuantifierGreedy)
983 storeToFrame(Imm32(0), parenthesesFrameLocation);
984 else
985 state.jumpToBacktrack(jump(), this);
986
987 state.setBacktrackGenerated(backtrackFromAfterParens);
988 if (term.quantityType == QuantifierNonGreedy)
989 nonGreedySkipParentheses.link(this);
990 success.link(this);
991 }
992 }
993
994 void generateParentheticalAssertion(TermGenerationState& state)
995 {
996 PatternTerm& term = state.term();
997 PatternDisjunction* disjunction = term.parentheses.disjunction;
998 ASSERT(term.quantityCount == 1);
999 ASSERT(term.quantityType == QuantifierFixedCount);
1000
1001 unsigned parenthesesFrameLocation = term.frameLocation;
1002 unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
1003
1004 int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
1005
1006 if (term.invertOrCapture) {
1007 // Inverted case
1008 storeToFrame(index, parenthesesFrameLocation);
1009
1010 state.checkedTotal -= countCheckedAfterAssertion;
1011 if (countCheckedAfterAssertion)
1012 sub32(Imm32(countCheckedAfterAssertion), index);
1013
1014 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
1015 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
1016 // Success! - which means - Fail!
1017 loadFromFrame(parenthesesFrameLocation, index);
1018 state.jumpToBacktrack(jump(), this);
1019
1020 // And fail means success.
1021 parenthesesState.linkAlternativeBacktracks(this);
1022 loadFromFrame(parenthesesFrameLocation, index);
1023
1024 state.checkedTotal += countCheckedAfterAssertion;
1025 } else {
1026 // Normal case
1027 storeToFrame(index, parenthesesFrameLocation);
1028
1029 state.checkedTotal -= countCheckedAfterAssertion;
1030 if (countCheckedAfterAssertion)
1031 sub32(Imm32(countCheckedAfterAssertion), index);
1032
1033 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
1034 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
1035 // Success! - which means - Success!
1036 loadFromFrame(parenthesesFrameLocation, index);
1037 Jump success = jump();
1038
1039 parenthesesState.linkAlternativeBacktracks(this);
1040 loadFromFrame(parenthesesFrameLocation, index);
1041 state.jumpToBacktrack(jump(), this);
1042
1043 success.link(this);
1044
1045 state.checkedTotal += countCheckedAfterAssertion;
1046 }
1047 }
1048
1049 void generateTerm(TermGenerationState& state)
1050 {
1051 PatternTerm& term = state.term();
1052
1053 switch (term.type) {
1054 case PatternTerm::TypeAssertionBOL:
1055 generateAssertionBOL(state);
1056 break;
1057
1058 case PatternTerm::TypeAssertionEOL:
1059 generateAssertionEOL(state);
1060 break;
1061
1062 case PatternTerm::TypeAssertionWordBoundary:
1063 generateAssertionWordBoundary(state);
1064 break;
1065
1066 case PatternTerm::TypePatternCharacter:
1067 switch (term.quantityType) {
1068 case QuantifierFixedCount:
1069 if (term.quantityCount == 1) {
1070 if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) {
1071 generatePatternCharacterPair(state);
1072 state.nextTerm();
1073 } else
1074 generatePatternCharacterSingle(state);
1075 } else
1076 generatePatternCharacterFixed(state);
1077 break;
1078 case QuantifierGreedy:
1079 generatePatternCharacterGreedy(state);
1080 break;
1081 case QuantifierNonGreedy:
1082 generatePatternCharacterNonGreedy(state);
1083 break;
1084 }
1085 break;
1086
1087 case PatternTerm::TypeCharacterClass:
1088 switch (term.quantityType) {
1089 case QuantifierFixedCount:
1090 if (term.quantityCount == 1)
1091 generateCharacterClassSingle(state);
1092 else
1093 generateCharacterClassFixed(state);
1094 break;
1095 case QuantifierGreedy:
1096 generateCharacterClassGreedy(state);
1097 break;
1098 case QuantifierNonGreedy:
1099 generateCharacterClassNonGreedy(state);
1100 break;
1101 }
1102 break;
1103
1104 case PatternTerm::TypeBackReference:
1105 ASSERT_NOT_REACHED();
1106 break;
1107
1108 case PatternTerm::TypeForwardReference:
1109 break;
1110
1111 case PatternTerm::TypeParenthesesSubpattern:
1112 ASSERT((term.quantityCount == 1) && !term.parentheses.isCopy); // must fallback to pcre before this point
1113 generateParenthesesSingle(state);
1114 break;
1115
1116 case PatternTerm::TypeParentheticalAssertion:
1117 generateParentheticalAssertion(state);
1118 break;
1119 }
1120 }
1121
1122 void generateDisjunction(PatternDisjunction* disjunction)
1123 {
1124 TermGenerationState state(disjunction, 0);
1125 state.resetAlternative();
1126
1127 // Plant a check to see if there is sufficient input available to run the first alternative.
1128 // Jumping back to the label 'firstAlternative' will get to this check, jumping to
1129 // 'firstAlternativeInputChecked' will jump directly to matching the alternative having
1130 // skipped this check.
1131
1132 Label firstAlternative(this);
1133
1134 // check availability for the next alternative
1135 int countCheckedForCurrentAlternative = 0;
1136 int countToCheckForFirstAlternative = 0;
1137 bool hasShorterAlternatives = false;
1138 JumpList notEnoughInputForPreviousAlternative;
1139
1140 if (state.alternativeValid()) {
1141 PatternAlternative* alternative = state.alternative();
1142 countToCheckForFirstAlternative = alternative->m_minimumSize;
1143 state.checkedTotal += countToCheckForFirstAlternative;
1144 if (countToCheckForFirstAlternative)
1145 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
1146 countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
1147 }
1148
1149 Label firstAlternativeInputChecked(this);
1150
1151 while (state.alternativeValid()) {
1152 // Track whether any alternatives are shorter than the first one.
1153 hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
1154
1155 PatternAlternative* alternative = state.alternative();
1156 optimizeAlternative(alternative);
1157
1158 for (state.resetTerm(); state.termValid(); state.nextTerm())
1159 generateTerm(state);
1160
1161 // If we get here, the alternative matched.
1162 if (m_pattern.m_body->m_callFrameSize)
1163 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1164
1165 ASSERT(index != returnRegister);
1166 if (m_pattern.m_body->m_hasFixedSize) {
1167 move(index, returnRegister);
1168 if (alternative->m_minimumSize)
1169 sub32(Imm32(alternative->m_minimumSize), returnRegister);
1170 } else
1171 pop(returnRegister);
1172 store32(index, Address(output, 4));
1173 store32(returnRegister, output);
1174
1175 generateReturn();
1176
1177 state.nextAlternative();
1178
1179 // if there are any more alternatives, plant the check for input before looping.
1180 if (state.alternativeValid()) {
1181 PatternAlternative* nextAlternative = state.alternative();
1182 int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
1183
1184 if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
1185 // If we get here, there the last input checked failed.
1186 notEnoughInputForPreviousAlternative.link(this);
1187
1188 // Check if sufficent input available to run the next alternative
1189 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
1190 // We are now in the correct state to enter the next alternative; this add is only required
1191 // to mirror and revert operation of the sub32, just below.
1192 add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
1193
1194 // If we get here, there the last input checked passed.
1195 state.linkAlternativeBacktracks(this);
1196 // No need to check if we can run the next alternative, since it is shorter -
1197 // just update index.
1198 sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
1199 } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one.
1200 // If we get here, there the last input checked failed.
1201 // If there is insufficient input to run the current alternative, and the next alternative is longer,
1202 // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
1203 // we had checked.
1204 notEnoughInputForPreviousAlternative.link(this);
1205 add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index);
1206 notEnoughInputForPreviousAlternative.append(jump());
1207
1208 // The next alternative is longer than the current one; check the difference.
1209 state.linkAlternativeBacktracks(this);
1210 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
1211 } else { // CASE 3: Both alternatives are the same length.
1212 ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
1213
1214 // If the next alterative is the same length as this one, then no need to check the input -
1215 // if there was sufficent input to run the current alternative then there is sufficient
1216 // input to run the next one; if not, there isn't.
1217 state.linkAlternativeBacktracks(this);
1218 }
1219
1220 state.checkedTotal -= countCheckedForCurrentAlternative;
1221 countCheckedForCurrentAlternative = countToCheckForNextAlternative;
1222 state.checkedTotal += countCheckedForCurrentAlternative;
1223 }
1224 }
1225
1226 // If we get here, all Alternatives failed...
1227
1228 state.checkedTotal -= countCheckedForCurrentAlternative;
1229
1230 // How much more input need there be to be able to retry from the first alternative?
1231 // examples:
1232 // /yarr_jit/ or /wrec|pcre/
1233 // In these examples we need check for one more input before looping.
1234 // /yarr_jit|pcre/
1235 // In this case we need check for 5 more input to loop (+4 to allow for the first alterative
1236 // being four longer than the last alternative checked, and another +1 to effectively move
1237 // the start position along by one).
1238 // /yarr|rules/ or /wrec|notsomuch/
1239 // In these examples, provided that there was sufficient input to have just been matching for
1240 // the second alternative we can loop without checking for available input (since the second
1241 // alternative is longer than the first). In the latter example we need to decrement index
1242 // (by 4) so the start position is only progressed by 1 from the last iteration.
1243 int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1;
1244
1245 // First, deal with the cases where there was sufficient input to try the last alternative.
1246 if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
1247 state.linkAlternativeBacktracks(this);
1248 else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
1249 state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this);
1250 else { // no need to check the input, but we do have some bookkeeping to do first.
1251 state.linkAlternativeBacktracks(this);
1252
1253 // Where necessary update our preserved start position.
1254 if (!m_pattern.m_body->m_hasFixedSize) {
1255 move(index, regT0);
1256 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
1257 poke(regT0, m_pattern.m_body->m_callFrameSize);
1258 }
1259
1260 // Update index if necessary, and loop (without checking).
1261 if (incrementForNextIter)
1262 add32(Imm32(incrementForNextIter), index);
1263 jump().linkTo(firstAlternativeInputChecked, this);
1264 }
1265
1266 notEnoughInputForPreviousAlternative.link(this);
1267 // Update our idea of the start position, if we're tracking this.
1268 if (!m_pattern.m_body->m_hasFixedSize) {
1269 if (countCheckedForCurrentAlternative - 1) {
1270 move(index, regT0);
1271 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
1272 poke(regT0, m_pattern.m_body->m_callFrameSize);
1273 } else
1274 poke(index, m_pattern.m_body->m_callFrameSize);
1275 }
1276 // Check if there is sufficent input to run the first alternative again.
1277 jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this);
1278 // No - insufficent input to run the first alteranative, are there any other alternatives we
1279 // might need to check? If so, the last check will have left the index incremented by
1280 // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
1281 // LESS input is available, to have the effect of just progressing the start position by 1
1282 // from the last iteration. If this check passes we can just jump up to the check associated
1283 // with the first alternative in the loop. This is a bit sad, since we'll end up trying the
1284 // first alternative again, and this check will fail (otherwise the check planted just above
1285 // here would have passed). This is a bit sad, however it saves trying to do something more
1286 // complex here in compilation, and in the common case we should end up coallescing the checks.
1287 //
1288 // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
1289 // of the minimum-alternative-lengths. E.g. if I have two alternatives of length 200 and 150,
1290 // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
1291 // is sufficient input to run either alternative (constantly failing). If there had been only
1292 // one alternative, or if the shorter alternative had come first, we would have terminated
1293 // immediately. :-/
1294 if (hasShorterAlternatives)
1295 jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this);
1296 // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
1297 // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ...
1298 // but since we're about to return a failure this doesn't really matter!)
1299
1300 unsigned frameSize = m_pattern.m_body->m_callFrameSize;
1301 if (!m_pattern.m_body->m_hasFixedSize)
1302 ++frameSize;
1303 if (frameSize)
1304 addPtr(Imm32(frameSize * sizeof(void*)), stackPointerRegister);
1305
1306 move(Imm32(-1), returnRegister);
1307
1308 generateReturn();
1309 }
1310
1311 void generateEnter()
1312 {
1313 #if CPU(X86_64)
1314 push(X86Registers::ebp);
1315 move(stackPointerRegister, X86Registers::ebp);
1316 push(X86Registers::ebx);
1317 #elif CPU(X86)
1318 push(X86Registers::ebp);
1319 move(stackPointerRegister, X86Registers::ebp);
1320 // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
1321 push(X86Registers::ebx);
1322 push(X86Registers::edi);
1323 push(X86Registers::esi);
1324 // load output into edi (2 = saved ebp + return address).
1325 #if COMPILER(MSVC)
1326 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
1327 loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
1328 loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
1329 loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
1330 #else
1331 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
1332 #endif
1333 #elif CPU(ARM)
1334 push(ARMRegisters::r4);
1335 push(ARMRegisters::r5);
1336 push(ARMRegisters::r6);
1337 move(ARMRegisters::r3, output);
1338 #elif CPU(MIPS)
1339 // Do nothing.
1340 #endif
1341 }
1342
1343 void generateReturn()
1344 {
1345 #if CPU(X86_64)
1346 pop(X86Registers::ebx);
1347 pop(X86Registers::ebp);
1348 #elif CPU(X86)
1349 pop(X86Registers::esi);
1350 pop(X86Registers::edi);
1351 pop(X86Registers::ebx);
1352 pop(X86Registers::ebp);
1353 #elif CPU(ARM)
1354 pop(ARMRegisters::r6);
1355 pop(ARMRegisters::r5);
1356 pop(ARMRegisters::r4);
1357 #elif CPU(MIPS)
1358 // Do nothing
1359 #endif
1360 ret();
1361 }
1362
1363 public:
1364 RegexGenerator(RegexPattern& pattern)
1365 : m_pattern(pattern)
1366 {
1367 }
1368
1369 void generate()
1370 {
1371 generateEnter();
1372
1373 // TODO: do I really want this on the stack?
1374 if (!m_pattern.m_body->m_hasFixedSize)
1375 push(index);
1376
1377 if (m_pattern.m_body->m_callFrameSize)
1378 subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1379
1380 generateDisjunction(m_pattern.m_body);
1381 }
1382
1383 void compile(JSGlobalData* globalData, RegexCodeBlock& jitObject)
1384 {
1385 generate();
1386
1387 LinkBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(size()));
1388
1389 for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
1390 patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation));
1391
1392 jitObject.set(patchBuffer.finalizeCode());
1393 }
1394
1395 private:
1396 RegexPattern& m_pattern;
1397 Vector<AlternativeBacktrackRecord> m_backtrackRecords;
1398 };
1399
1400 void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
1401 {
1402 RegexPattern pattern(ignoreCase, multiline);
1403 if ((error = compileRegex(patternString, pattern)))
1404 return;
1405 numSubpatterns = pattern.m_numSubpatterns;
1406
1407 if (!pattern.m_shouldFallBack && globalData->canUseJIT()) {
1408 RegexGenerator generator(pattern);
1409 generator.compile(globalData, jitObject);
1410 return;
1411 }
1412
1413 JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
1414 JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine;
1415 jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error));
1416 }
1417
1418 }}
1419
1420 #endif
1421
1422
1423
1424
1425