]> git.saurik.com Git - apple/javascriptcore.git/blob - yarr/YarrJIT.cpp
JavaScriptCore-903.5.tar.gz
[apple/javascriptcore.git] / yarr / YarrJIT.cpp
1 /*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "config.h"
27 #include "YarrJIT.h"
28
29 #include "ASCIICType.h"
30 #include "LinkBuffer.h"
31 #include "Yarr.h"
32
33 #if ENABLE(YARR_JIT)
34
35 using namespace WTF;
36
37 namespace JSC { namespace Yarr {
38
39 class YarrGenerator : private MacroAssembler {
40 friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
41
42 #if CPU(ARM)
43 static const RegisterID input = ARMRegisters::r0;
44 static const RegisterID index = ARMRegisters::r1;
45 static const RegisterID length = ARMRegisters::r2;
46 static const RegisterID output = ARMRegisters::r4;
47
48 static const RegisterID regT0 = ARMRegisters::r5;
49 static const RegisterID regT1 = ARMRegisters::r6;
50
51 static const RegisterID returnRegister = ARMRegisters::r0;
52 #elif CPU(MIPS)
53 static const RegisterID input = MIPSRegisters::a0;
54 static const RegisterID index = MIPSRegisters::a1;
55 static const RegisterID length = MIPSRegisters::a2;
56 static const RegisterID output = MIPSRegisters::a3;
57
58 static const RegisterID regT0 = MIPSRegisters::t4;
59 static const RegisterID regT1 = MIPSRegisters::t5;
60
61 static const RegisterID returnRegister = MIPSRegisters::v0;
62 #elif CPU(SH4)
63 static const RegisterID input = SH4Registers::r4;
64 static const RegisterID index = SH4Registers::r5;
65 static const RegisterID length = SH4Registers::r6;
66 static const RegisterID output = SH4Registers::r7;
67
68 static const RegisterID regT0 = SH4Registers::r0;
69 static const RegisterID regT1 = SH4Registers::r1;
70
71 static const RegisterID returnRegister = SH4Registers::r0;
72 #elif CPU(X86)
73 static const RegisterID input = X86Registers::eax;
74 static const RegisterID index = X86Registers::edx;
75 static const RegisterID length = X86Registers::ecx;
76 static const RegisterID output = X86Registers::edi;
77
78 static const RegisterID regT0 = X86Registers::ebx;
79 static const RegisterID regT1 = X86Registers::esi;
80
81 static const RegisterID returnRegister = X86Registers::eax;
82 #elif CPU(X86_64)
83 static const RegisterID input = X86Registers::edi;
84 static const RegisterID index = X86Registers::esi;
85 static const RegisterID length = X86Registers::edx;
86 static const RegisterID output = X86Registers::ecx;
87
88 static const RegisterID regT0 = X86Registers::eax;
89 static const RegisterID regT1 = X86Registers::ebx;
90
91 static const RegisterID returnRegister = X86Registers::eax;
92 #endif
93
94 void optimizeAlternative(PatternAlternative* alternative)
95 {
96 if (!alternative->m_terms.size())
97 return;
98
99 for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
100 PatternTerm& term = alternative->m_terms[i];
101 PatternTerm& nextTerm = alternative->m_terms[i + 1];
102
103 if ((term.type == PatternTerm::TypeCharacterClass)
104 && (term.quantityType == QuantifierFixedCount)
105 && (nextTerm.type == PatternTerm::TypePatternCharacter)
106 && (nextTerm.quantityType == QuantifierFixedCount)) {
107 PatternTerm termCopy = term;
108 alternative->m_terms[i] = nextTerm;
109 alternative->m_terms[i + 1] = termCopy;
110 }
111 }
112 }
113
114 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
115 {
116 do {
117 // pick which range we're going to generate
118 int which = count >> 1;
119 char lo = ranges[which].begin;
120 char hi = ranges[which].end;
121
122 // check if there are any ranges or matches below lo. If not, just jl to failure -
123 // if there is anything else to check, check that first, if it falls through jmp to failure.
124 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
125 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
126
127 // generate code for all ranges before this one
128 if (which)
129 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
130
131 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
132 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
133 ++*matchIndex;
134 }
135 failures.append(jump());
136
137 loOrAbove.link(this);
138 } else if (which) {
139 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
140
141 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
142 failures.append(jump());
143
144 loOrAbove.link(this);
145 } else
146 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
147
148 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
149 ++*matchIndex;
150
151 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
152 // fall through to here, the value is above hi.
153
154 // shuffle along & loop around if there are any more matches to handle.
155 unsigned next = which + 1;
156 ranges += next;
157 count -= next;
158 } while (count);
159 }
160
161 void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
162 {
163 if (charClass->m_table) {
164 ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
165 matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));
166 return;
167 }
168 Jump unicodeFail;
169 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
170 Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f));
171
172 if (charClass->m_matchesUnicode.size()) {
173 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
174 UChar ch = charClass->m_matchesUnicode[i];
175 matchDest.append(branch32(Equal, character, Imm32(ch)));
176 }
177 }
178
179 if (charClass->m_rangesUnicode.size()) {
180 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
181 UChar lo = charClass->m_rangesUnicode[i].begin;
182 UChar hi = charClass->m_rangesUnicode[i].end;
183
184 Jump below = branch32(LessThan, character, Imm32(lo));
185 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
186 below.link(this);
187 }
188 }
189
190 unicodeFail = jump();
191 isAscii.link(this);
192 }
193
194 if (charClass->m_ranges.size()) {
195 unsigned matchIndex = 0;
196 JumpList failures;
197 matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
198 while (matchIndex < charClass->m_matches.size())
199 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
200
201 failures.link(this);
202 } else if (charClass->m_matches.size()) {
203 // optimization: gather 'a','A' etc back together, can mask & test once.
204 Vector<char> matchesAZaz;
205
206 for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
207 char ch = charClass->m_matches[i];
208 if (m_pattern.m_ignoreCase) {
209 if (isASCIILower(ch)) {
210 matchesAZaz.append(ch);
211 continue;
212 }
213 if (isASCIIUpper(ch))
214 continue;
215 }
216 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
217 }
218
219 if (unsigned countAZaz = matchesAZaz.size()) {
220 or32(TrustedImm32(32), character);
221 for (unsigned i = 0; i < countAZaz; ++i)
222 matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
223 }
224 }
225
226 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
227 unicodeFail.link(this);
228 }
229
230 // Jumps if input not available; will have (incorrectly) incremented already!
231 Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
232 {
233 if (countToCheck)
234 add32(Imm32(countToCheck), index);
235 return branch32(Above, index, length);
236 }
237
238 Jump jumpIfAvailableInput(unsigned countToCheck)
239 {
240 add32(Imm32(countToCheck), index);
241 return branch32(BelowOrEqual, index, length);
242 }
243
244 Jump checkInput()
245 {
246 return branch32(BelowOrEqual, index, length);
247 }
248
249 Jump atEndOfInput()
250 {
251 return branch32(Equal, index, length);
252 }
253
254 Jump notAtEndOfInput()
255 {
256 return branch32(NotEqual, index, length);
257 }
258
259 Jump jumpIfCharEquals(UChar ch, int inputPosition)
260 {
261 return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
262 }
263
264 Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
265 {
266 return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
267 }
268
269 void readCharacter(int inputPosition, RegisterID reg)
270 {
271 load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
272 }
273
274 void storeToFrame(RegisterID reg, unsigned frameLocation)
275 {
276 poke(reg, frameLocation);
277 }
278
279 void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
280 {
281 poke(imm, frameLocation);
282 }
283
284 DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
285 {
286 return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
287 }
288
289 void loadFromFrame(unsigned frameLocation, RegisterID reg)
290 {
291 peek(reg, frameLocation);
292 }
293
294 void loadFromFrameAndJump(unsigned frameLocation)
295 {
296 jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
297 }
298
299 enum YarrOpCode {
300 // These nodes wrap body alternatives - those in the main disjunction,
301 // rather than subpatterns or assertions. These are chained together in
302 // a doubly linked list, with a 'begin' node for the first alternative,
303 // a 'next' node for each subsequent alternative, and an 'end' node at
304 // the end. In the case of repeating alternatives, the 'end' node also
305 // has a reference back to 'begin'.
306 OpBodyAlternativeBegin,
307 OpBodyAlternativeNext,
308 OpBodyAlternativeEnd,
309 // Similar to the body alternatives, but used for subpatterns with two
310 // or more alternatives.
311 OpNestedAlternativeBegin,
312 OpNestedAlternativeNext,
313 OpNestedAlternativeEnd,
314 // Used for alternatives in subpatterns where there is only a single
315 // alternative (backtrackingis easier in these cases), or for alternatives
316 // which never need to be backtracked (those in parenthetical assertions,
317 // terminal subpatterns).
318 OpSimpleNestedAlternativeBegin,
319 OpSimpleNestedAlternativeNext,
320 OpSimpleNestedAlternativeEnd,
321 // Used to wrap 'Once' subpattern matches (quantityCount == 1).
322 OpParenthesesSubpatternOnceBegin,
323 OpParenthesesSubpatternOnceEnd,
324 // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
325 OpParenthesesSubpatternTerminalBegin,
326 OpParenthesesSubpatternTerminalEnd,
327 // Used to wrap parenthetical assertions.
328 OpParentheticalAssertionBegin,
329 OpParentheticalAssertionEnd,
330 // Wraps all simple terms (pattern characters, character classes).
331 OpTerm,
332 // Where an expression contains only 'once through' body alternatives
333 // and no repeating ones, this op is used to return match failure.
334 OpMatchFailed
335 };
336
337 // This structure is used to hold the compiled opcode information,
338 // including reference back to the original PatternTerm/PatternAlternatives,
339 // and JIT compilation data structures.
340 struct YarrOp {
341 explicit YarrOp(PatternTerm* term)
342 : m_op(OpTerm)
343 , m_term(term)
344 , m_isDeadCode(false)
345 {
346 }
347
348 explicit YarrOp(YarrOpCode op)
349 : m_op(op)
350 , m_isDeadCode(false)
351 {
352 }
353
354 // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
355 YarrOpCode m_op;
356 PatternTerm* m_term;
357
358 // For alternatives, this holds the PatternAlternative and doubly linked
359 // references to this alternative's siblings. In the case of the
360 // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
361 // m_nextOp will reference the OpBodyAlternativeBegin node of the first
362 // repeating alternative.
363 PatternAlternative* m_alternative;
364 size_t m_previousOp;
365 size_t m_nextOp;
366
367 // Used to record a set of Jumps out of the generated code, typically
368 // used for jumps out to backtracking code, and a single reentry back
369 // into the code for a node (likely where a backtrack will trigger
370 // rematching).
371 Label m_reentry;
372 JumpList m_jumps;
373
374 // This flag is used to null out the second pattern character, when
375 // two are fused to match a pair together.
376 bool m_isDeadCode;
377
378 // Currently used in the case of some of the more complex management of
379 // 'm_checked', to cache the offset used in this alternative, to avoid
380 // recalculating it.
381 int m_checkAdjust;
382
383 // Used by OpNestedAlternativeNext/End to hold the pointer to the
384 // value that will be pushed into the pattern's frame to return to,
385 // upon backtracking back into the disjunction.
386 DataLabelPtr m_returnAddress;
387 };
388
389 // BacktrackingState
390 // This class encapsulates information about the state of code generation
391 // whilst generating the code for backtracking, when a term fails to match.
392 // Upon entry to code generation of the backtracking code for a given node,
393 // the Backtracking state will hold references to all control flow sources
394 // that are outputs in need of further backtracking from the prior node
395 // generated (which is the subsequent operation in the regular expression,
396 // and in the m_ops Vector, since we generated backtracking backwards).
397 // These references to control flow take the form of:
398 // - A jump list of jumps, to be linked to code that will backtrack them
399 // further.
400 // - A set of DataLabelPtr values, to be populated with values to be
401 // treated effectively as return addresses backtracking into complex
402 // subpatterns.
403 // - A flag indicating that the current sequence of generated code up to
404 // this point requires backtracking.
405 class BacktrackingState {
406 public:
407 BacktrackingState()
408 : m_pendingFallthrough(false)
409 {
410 }
411
412 // Add a jump or jumps, a return address, or set the flag indicating
413 // that the current 'fallthrough' control flow requires backtracking.
414 void append(const Jump& jump)
415 {
416 m_laterFailures.append(jump);
417 }
418 void append(JumpList& jumpList)
419 {
420 m_laterFailures.append(jumpList);
421 }
422 void append(const DataLabelPtr& returnAddress)
423 {
424 m_pendingReturns.append(returnAddress);
425 }
426 void fallthrough()
427 {
428 ASSERT(!m_pendingFallthrough);
429 m_pendingFallthrough = true;
430 }
431
432 // These methods clear the backtracking state, either linking to the
433 // current location, a provided label, or copying the backtracking out
434 // to a JumpList. All actions may require code generation to take place,
435 // and as such are passed a pointer to the assembler.
436 void link(MacroAssembler* assembler)
437 {
438 if (m_pendingReturns.size()) {
439 Label here(assembler);
440 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
441 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
442 m_pendingReturns.clear();
443 }
444 m_laterFailures.link(assembler);
445 m_laterFailures.clear();
446 m_pendingFallthrough = false;
447 }
448 void linkTo(Label label, MacroAssembler* assembler)
449 {
450 if (m_pendingReturns.size()) {
451 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
452 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
453 m_pendingReturns.clear();
454 }
455 if (m_pendingFallthrough)
456 assembler->jump(label);
457 m_laterFailures.linkTo(label, assembler);
458 m_laterFailures.clear();
459 m_pendingFallthrough = false;
460 }
461 void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
462 {
463 if (m_pendingReturns.size()) {
464 Label here(assembler);
465 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
466 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
467 m_pendingReturns.clear();
468 m_pendingFallthrough = true;
469 }
470 if (m_pendingFallthrough)
471 jumpList.append(assembler->jump());
472 jumpList.append(m_laterFailures);
473 m_laterFailures.clear();
474 m_pendingFallthrough = false;
475 }
476
477 bool isEmpty()
478 {
479 return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
480 }
481
482 // Called at the end of code generation to link all return addresses.
483 void linkDataLabels(LinkBuffer& linkBuffer)
484 {
485 ASSERT(isEmpty());
486 for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
487 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation));
488 }
489
490 private:
491 struct ReturnAddressRecord {
492 ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
493 : m_dataLabel(dataLabel)
494 , m_backtrackLocation(backtrackLocation)
495 {
496 }
497
498 DataLabelPtr m_dataLabel;
499 Label m_backtrackLocation;
500 };
501
502 JumpList m_laterFailures;
503 bool m_pendingFallthrough;
504 Vector<DataLabelPtr, 4> m_pendingReturns;
505 Vector<ReturnAddressRecord, 4> m_backtrackRecords;
506 };
507
508 // Generation methods:
509 // ===================
510
511 // This method provides a default implementation of backtracking common
512 // to many terms; terms commonly jump out of the forwards matching path
513 // on any failed conditions, and add these jumps to the m_jumps list. If
514 // no special handling is required we can often just backtrack to m_jumps.
515 void backtrackTermDefault(size_t opIndex)
516 {
517 YarrOp& op = m_ops[opIndex];
518 m_backtrackingState.append(op.m_jumps);
519 }
520
521 void generateAssertionBOL(size_t opIndex)
522 {
523 YarrOp& op = m_ops[opIndex];
524 PatternTerm* term = op.m_term;
525
526 if (m_pattern.m_multiline) {
527 const RegisterID character = regT0;
528
529 JumpList matchDest;
530 if (!term->inputPosition)
531 matchDest.append(branch32(Equal, index, Imm32(m_checked)));
532
533 readCharacter((term->inputPosition - m_checked) - 1, character);
534 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
535 op.m_jumps.append(jump());
536
537 matchDest.link(this);
538 } else {
539 // Erk, really should poison out these alternatives early. :-/
540 if (term->inputPosition)
541 op.m_jumps.append(jump());
542 else
543 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked)));
544 }
545 }
546 void backtrackAssertionBOL(size_t opIndex)
547 {
548 backtrackTermDefault(opIndex);
549 }
550
551 void generateAssertionEOL(size_t opIndex)
552 {
553 YarrOp& op = m_ops[opIndex];
554 PatternTerm* term = op.m_term;
555
556 if (m_pattern.m_multiline) {
557 const RegisterID character = regT0;
558
559 JumpList matchDest;
560 if (term->inputPosition == m_checked)
561 matchDest.append(atEndOfInput());
562
563 readCharacter(term->inputPosition - m_checked, character);
564 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
565 op.m_jumps.append(jump());
566
567 matchDest.link(this);
568 } else {
569 if (term->inputPosition == m_checked)
570 op.m_jumps.append(notAtEndOfInput());
571 // Erk, really should poison out these alternatives early. :-/
572 else
573 op.m_jumps.append(jump());
574 }
575 }
576 void backtrackAssertionEOL(size_t opIndex)
577 {
578 backtrackTermDefault(opIndex);
579 }
580
581 // Also falls though on nextIsNotWordChar.
582 void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
583 {
584 YarrOp& op = m_ops[opIndex];
585 PatternTerm* term = op.m_term;
586
587 const RegisterID character = regT0;
588
589 if (term->inputPosition == m_checked)
590 nextIsNotWordChar.append(atEndOfInput());
591
592 readCharacter((term->inputPosition - m_checked), character);
593 matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
594 }
595
596 void generateAssertionWordBoundary(size_t opIndex)
597 {
598 YarrOp& op = m_ops[opIndex];
599 PatternTerm* term = op.m_term;
600
601 const RegisterID character = regT0;
602
603 Jump atBegin;
604 JumpList matchDest;
605 if (!term->inputPosition)
606 atBegin = branch32(Equal, index, Imm32(m_checked));
607 readCharacter((term->inputPosition - m_checked) - 1, character);
608 matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
609 if (!term->inputPosition)
610 atBegin.link(this);
611
612 // We fall through to here if the last character was not a wordchar.
613 JumpList nonWordCharThenWordChar;
614 JumpList nonWordCharThenNonWordChar;
615 if (term->invert()) {
616 matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
617 nonWordCharThenWordChar.append(jump());
618 } else {
619 matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
620 nonWordCharThenNonWordChar.append(jump());
621 }
622 op.m_jumps.append(nonWordCharThenNonWordChar);
623
624 // We jump here if the last character was a wordchar.
625 matchDest.link(this);
626 JumpList wordCharThenWordChar;
627 JumpList wordCharThenNonWordChar;
628 if (term->invert()) {
629 matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
630 wordCharThenWordChar.append(jump());
631 } else {
632 matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
633 // This can fall-though!
634 }
635
636 op.m_jumps.append(wordCharThenWordChar);
637
638 nonWordCharThenWordChar.link(this);
639 wordCharThenNonWordChar.link(this);
640 }
641 void backtrackAssertionWordBoundary(size_t opIndex)
642 {
643 backtrackTermDefault(opIndex);
644 }
645
646 void generatePatternCharacterOnce(size_t opIndex)
647 {
648 YarrOp& op = m_ops[opIndex];
649
650 // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
651 // node, so there must always be at least one more node.
652 ASSERT(opIndex + 1 < m_ops.size());
653 YarrOp& nextOp = m_ops[opIndex + 1];
654
655 if (op.m_isDeadCode)
656 return;
657
658 PatternTerm* term = op.m_term;
659 UChar ch = term->patternCharacter;
660
661 const RegisterID character = regT0;
662
663 if (nextOp.m_op == OpTerm) {
664 PatternTerm* nextTerm = nextOp.m_term;
665 if (nextTerm->type == PatternTerm::TypePatternCharacter
666 && nextTerm->quantityType == QuantifierFixedCount
667 && nextTerm->quantityCount == 1
668 && nextTerm->inputPosition == (term->inputPosition + 1)) {
669
670 UChar ch2 = nextTerm->patternCharacter;
671
672 int mask = 0;
673 int chPair = ch | (ch2 << 16);
674
675 if (m_pattern.m_ignoreCase) {
676 if (isASCIIAlpha(ch))
677 mask |= 32;
678 if (isASCIIAlpha(ch2))
679 mask |= 32 << 16;
680 }
681
682 BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
683 if (mask) {
684 load32WithUnalignedHalfWords(address, character);
685 or32(Imm32(mask), character);
686 op.m_jumps.append(branch32(NotEqual, character, Imm32(chPair | mask)));
687 } else
688 op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, address, Imm32(chPair)));
689
690 nextOp.m_isDeadCode = true;
691 return;
692 }
693 }
694
695 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
696 readCharacter(term->inputPosition - m_checked, character);
697 or32(TrustedImm32(32), character);
698 op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
699 } else {
700 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
701 op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
702 }
703 }
704 void backtrackPatternCharacterOnce(size_t opIndex)
705 {
706 backtrackTermDefault(opIndex);
707 }
708
709 void generatePatternCharacterFixed(size_t opIndex)
710 {
711 YarrOp& op = m_ops[opIndex];
712 PatternTerm* term = op.m_term;
713 UChar ch = term->patternCharacter;
714
715 const RegisterID character = regT0;
716 const RegisterID countRegister = regT1;
717
718 move(index, countRegister);
719 sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
720
721 Label loop(this);
722 BaseIndex address(input, countRegister, TimesTwo, ((term->inputPosition - m_checked + Checked<int>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet());
723
724 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
725 load16(address, character);
726 or32(TrustedImm32(32), character);
727 op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
728 } else {
729 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
730 op.m_jumps.append(branch16(NotEqual, address, Imm32(ch)));
731 }
732 add32(TrustedImm32(1), countRegister);
733 branch32(NotEqual, countRegister, index).linkTo(loop, this);
734 }
735 void backtrackPatternCharacterFixed(size_t opIndex)
736 {
737 backtrackTermDefault(opIndex);
738 }
739
740 void generatePatternCharacterGreedy(size_t opIndex)
741 {
742 YarrOp& op = m_ops[opIndex];
743 PatternTerm* term = op.m_term;
744 UChar ch = term->patternCharacter;
745
746 const RegisterID character = regT0;
747 const RegisterID countRegister = regT1;
748
749 move(TrustedImm32(0), countRegister);
750
751 JumpList failures;
752 Label loop(this);
753 failures.append(atEndOfInput());
754 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
755 readCharacter(term->inputPosition - m_checked, character);
756 or32(TrustedImm32(32), character);
757 failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
758 } else {
759 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
760 failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
761 }
762
763 add32(TrustedImm32(1), countRegister);
764 add32(TrustedImm32(1), index);
765 if (term->quantityCount == quantifyInfinite)
766 jump(loop);
767 else
768 branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
769
770 failures.link(this);
771 op.m_reentry = label();
772
773 storeToFrame(countRegister, term->frameLocation);
774
775 }
776 void backtrackPatternCharacterGreedy(size_t opIndex)
777 {
778 YarrOp& op = m_ops[opIndex];
779 PatternTerm* term = op.m_term;
780
781 const RegisterID countRegister = regT1;
782
783 m_backtrackingState.link(this);
784
785 loadFromFrame(term->frameLocation, countRegister);
786 m_backtrackingState.append(branchTest32(Zero, countRegister));
787 sub32(TrustedImm32(1), countRegister);
788 sub32(TrustedImm32(1), index);
789 jump(op.m_reentry);
790 }
791
792 void generatePatternCharacterNonGreedy(size_t opIndex)
793 {
794 YarrOp& op = m_ops[opIndex];
795 PatternTerm* term = op.m_term;
796
797 const RegisterID countRegister = regT1;
798
799 move(TrustedImm32(0), countRegister);
800 op.m_reentry = label();
801 storeToFrame(countRegister, term->frameLocation);
802 }
803 void backtrackPatternCharacterNonGreedy(size_t opIndex)
804 {
805 YarrOp& op = m_ops[opIndex];
806 PatternTerm* term = op.m_term;
807 UChar ch = term->patternCharacter;
808
809 const RegisterID character = regT0;
810 const RegisterID countRegister = regT1;
811
812 JumpList nonGreedyFailures;
813
814 m_backtrackingState.link(this);
815
816 loadFromFrame(term->frameLocation, countRegister);
817
818 nonGreedyFailures.append(atEndOfInput());
819 if (term->quantityCount != quantifyInfinite)
820 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
821 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
822 readCharacter(term->inputPosition - m_checked, character);
823 or32(TrustedImm32(32), character);
824 nonGreedyFailures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
825 } else {
826 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
827 nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
828 }
829
830 add32(TrustedImm32(1), countRegister);
831 add32(TrustedImm32(1), index);
832
833 jump(op.m_reentry);
834
835 nonGreedyFailures.link(this);
836 sub32(countRegister, index);
837 m_backtrackingState.fallthrough();
838 }
839
840 void generateCharacterClassOnce(size_t opIndex)
841 {
842 YarrOp& op = m_ops[opIndex];
843 PatternTerm* term = op.m_term;
844
845 const RegisterID character = regT0;
846
847 JumpList matchDest;
848 readCharacter(term->inputPosition - m_checked, character);
849 matchCharacterClass(character, matchDest, term->characterClass);
850
851 if (term->invert())
852 op.m_jumps.append(matchDest);
853 else {
854 op.m_jumps.append(jump());
855 matchDest.link(this);
856 }
857 }
858 void backtrackCharacterClassOnce(size_t opIndex)
859 {
860 backtrackTermDefault(opIndex);
861 }
862
863 void generateCharacterClassFixed(size_t opIndex)
864 {
865 YarrOp& op = m_ops[opIndex];
866 PatternTerm* term = op.m_term;
867
868 const RegisterID character = regT0;
869 const RegisterID countRegister = regT1;
870
871 move(index, countRegister);
872 sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
873
874 Label loop(this);
875 JumpList matchDest;
876 load16(BaseIndex(input, countRegister, TimesTwo, ((term->inputPosition - m_checked + Checked<int>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet()), character);
877 matchCharacterClass(character, matchDest, term->characterClass);
878
879 if (term->invert())
880 op.m_jumps.append(matchDest);
881 else {
882 op.m_jumps.append(jump());
883 matchDest.link(this);
884 }
885
886 add32(TrustedImm32(1), countRegister);
887 branch32(NotEqual, countRegister, index).linkTo(loop, this);
888 }
889 void backtrackCharacterClassFixed(size_t opIndex)
890 {
891 backtrackTermDefault(opIndex);
892 }
893
894 void generateCharacterClassGreedy(size_t opIndex)
895 {
896 YarrOp& op = m_ops[opIndex];
897 PatternTerm* term = op.m_term;
898
899 const RegisterID character = regT0;
900 const RegisterID countRegister = regT1;
901
902 move(TrustedImm32(0), countRegister);
903
904 JumpList failures;
905 Label loop(this);
906 failures.append(atEndOfInput());
907
908 if (term->invert()) {
909 readCharacter(term->inputPosition - m_checked, character);
910 matchCharacterClass(character, failures, term->characterClass);
911 } else {
912 JumpList matchDest;
913 readCharacter(term->inputPosition - m_checked, character);
914 matchCharacterClass(character, matchDest, term->characterClass);
915 failures.append(jump());
916 matchDest.link(this);
917 }
918
919 add32(TrustedImm32(1), countRegister);
920 add32(TrustedImm32(1), index);
921 if (term->quantityCount != quantifyInfinite) {
922 branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
923 failures.append(jump());
924 } else
925 jump(loop);
926
927 failures.link(this);
928 op.m_reentry = label();
929
930 storeToFrame(countRegister, term->frameLocation);
931 }
932 void backtrackCharacterClassGreedy(size_t opIndex)
933 {
934 YarrOp& op = m_ops[opIndex];
935 PatternTerm* term = op.m_term;
936
937 const RegisterID countRegister = regT1;
938
939 m_backtrackingState.link(this);
940
941 loadFromFrame(term->frameLocation, countRegister);
942 m_backtrackingState.append(branchTest32(Zero, countRegister));
943 sub32(TrustedImm32(1), countRegister);
944 sub32(TrustedImm32(1), index);
945 jump(op.m_reentry);
946 }
947
948 void generateCharacterClassNonGreedy(size_t opIndex)
949 {
950 YarrOp& op = m_ops[opIndex];
951 PatternTerm* term = op.m_term;
952
953 const RegisterID countRegister = regT1;
954
955 move(TrustedImm32(0), countRegister);
956 op.m_reentry = label();
957 storeToFrame(countRegister, term->frameLocation);
958 }
959 void backtrackCharacterClassNonGreedy(size_t opIndex)
960 {
961 YarrOp& op = m_ops[opIndex];
962 PatternTerm* term = op.m_term;
963
964 const RegisterID character = regT0;
965 const RegisterID countRegister = regT1;
966
967 JumpList nonGreedyFailures;
968
969 m_backtrackingState.link(this);
970
971 Label backtrackBegin(this);
972 loadFromFrame(term->frameLocation, countRegister);
973
974 nonGreedyFailures.append(atEndOfInput());
975 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
976
977 JumpList matchDest;
978 readCharacter(term->inputPosition - m_checked, character);
979 matchCharacterClass(character, matchDest, term->characterClass);
980
981 if (term->invert())
982 nonGreedyFailures.append(matchDest);
983 else {
984 nonGreedyFailures.append(jump());
985 matchDest.link(this);
986 }
987
988 add32(TrustedImm32(1), countRegister);
989 add32(TrustedImm32(1), index);
990
991 jump(op.m_reentry);
992
993 nonGreedyFailures.link(this);
994 sub32(countRegister, index);
995 m_backtrackingState.fallthrough();
996 }
997
998 void generateDotStarEnclosure(size_t opIndex)
999 {
1000 YarrOp& op = m_ops[opIndex];
1001 PatternTerm* term = op.m_term;
1002
1003 const RegisterID character = regT0;
1004 const RegisterID matchPos = regT1;
1005
1006 JumpList foundBeginningNewLine;
1007 JumpList saveStartIndex;
1008 JumpList foundEndingNewLine;
1009
1010 if (m_pattern.m_body->m_hasFixedSize) {
1011 move(index, matchPos);
1012 sub32(Imm32(m_checked), matchPos);
1013 } else
1014 load32(Address(output), matchPos);
1015
1016 saveStartIndex.append(branchTest32(Zero, matchPos));
1017 Label findBOLLoop(this);
1018 sub32(TrustedImm32(1), matchPos);
1019 load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
1020 matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
1021 branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this);
1022 saveStartIndex.append(jump());
1023
1024 foundBeginningNewLine.link(this);
1025 add32(TrustedImm32(1), matchPos); // Advance past newline
1026 saveStartIndex.link(this);
1027
1028 if (!m_pattern.m_multiline && term->anchors.bolAnchor)
1029 op.m_jumps.append(branchTest32(NonZero, matchPos));
1030
1031 store32(matchPos, Address(output));
1032
1033 move(index, matchPos);
1034
1035 Label findEOLLoop(this);
1036 foundEndingNewLine.append(branch32(Equal, matchPos, length));
1037 load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
1038 matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
1039 add32(TrustedImm32(1), matchPos);
1040 jump(findEOLLoop);
1041
1042 foundEndingNewLine.link(this);
1043
1044 if (!m_pattern.m_multiline && term->anchors.eolAnchor)
1045 op.m_jumps.append(branch32(NotEqual, matchPos, length));
1046
1047 move(matchPos, index);
1048 }
1049
1050 void backtrackDotStarEnclosure(size_t opIndex)
1051 {
1052 backtrackTermDefault(opIndex);
1053 }
1054
1055 // Code generation/backtracking for simple terms
1056 // (pattern characters, character classes, and assertions).
1057 // These methods farm out work to the set of functions above.
1058 void generateTerm(size_t opIndex)
1059 {
1060 YarrOp& op = m_ops[opIndex];
1061 PatternTerm* term = op.m_term;
1062
1063 switch (term->type) {
1064 case PatternTerm::TypePatternCharacter:
1065 switch (term->quantityType) {
1066 case QuantifierFixedCount:
1067 if (term->quantityCount == 1)
1068 generatePatternCharacterOnce(opIndex);
1069 else
1070 generatePatternCharacterFixed(opIndex);
1071 break;
1072 case QuantifierGreedy:
1073 generatePatternCharacterGreedy(opIndex);
1074 break;
1075 case QuantifierNonGreedy:
1076 generatePatternCharacterNonGreedy(opIndex);
1077 break;
1078 }
1079 break;
1080
1081 case PatternTerm::TypeCharacterClass:
1082 switch (term->quantityType) {
1083 case QuantifierFixedCount:
1084 if (term->quantityCount == 1)
1085 generateCharacterClassOnce(opIndex);
1086 else
1087 generateCharacterClassFixed(opIndex);
1088 break;
1089 case QuantifierGreedy:
1090 generateCharacterClassGreedy(opIndex);
1091 break;
1092 case QuantifierNonGreedy:
1093 generateCharacterClassNonGreedy(opIndex);
1094 break;
1095 }
1096 break;
1097
1098 case PatternTerm::TypeAssertionBOL:
1099 generateAssertionBOL(opIndex);
1100 break;
1101
1102 case PatternTerm::TypeAssertionEOL:
1103 generateAssertionEOL(opIndex);
1104 break;
1105
1106 case PatternTerm::TypeAssertionWordBoundary:
1107 generateAssertionWordBoundary(opIndex);
1108 break;
1109
1110 case PatternTerm::TypeForwardReference:
1111 break;
1112
1113 case PatternTerm::TypeParenthesesSubpattern:
1114 case PatternTerm::TypeParentheticalAssertion:
1115 ASSERT_NOT_REACHED();
1116 case PatternTerm::TypeBackReference:
1117 m_shouldFallBack = true;
1118 break;
1119 case PatternTerm::TypeDotStarEnclosure:
1120 generateDotStarEnclosure(opIndex);
1121 break;
1122 }
1123 }
1124 void backtrackTerm(size_t opIndex)
1125 {
1126 YarrOp& op = m_ops[opIndex];
1127 PatternTerm* term = op.m_term;
1128
1129 switch (term->type) {
1130 case PatternTerm::TypePatternCharacter:
1131 switch (term->quantityType) {
1132 case QuantifierFixedCount:
1133 if (term->quantityCount == 1)
1134 backtrackPatternCharacterOnce(opIndex);
1135 else
1136 backtrackPatternCharacterFixed(opIndex);
1137 break;
1138 case QuantifierGreedy:
1139 backtrackPatternCharacterGreedy(opIndex);
1140 break;
1141 case QuantifierNonGreedy:
1142 backtrackPatternCharacterNonGreedy(opIndex);
1143 break;
1144 }
1145 break;
1146
1147 case PatternTerm::TypeCharacterClass:
1148 switch (term->quantityType) {
1149 case QuantifierFixedCount:
1150 if (term->quantityCount == 1)
1151 backtrackCharacterClassOnce(opIndex);
1152 else
1153 backtrackCharacterClassFixed(opIndex);
1154 break;
1155 case QuantifierGreedy:
1156 backtrackCharacterClassGreedy(opIndex);
1157 break;
1158 case QuantifierNonGreedy:
1159 backtrackCharacterClassNonGreedy(opIndex);
1160 break;
1161 }
1162 break;
1163
1164 case PatternTerm::TypeAssertionBOL:
1165 backtrackAssertionBOL(opIndex);
1166 break;
1167
1168 case PatternTerm::TypeAssertionEOL:
1169 backtrackAssertionEOL(opIndex);
1170 break;
1171
1172 case PatternTerm::TypeAssertionWordBoundary:
1173 backtrackAssertionWordBoundary(opIndex);
1174 break;
1175
1176 case PatternTerm::TypeForwardReference:
1177 break;
1178
1179 case PatternTerm::TypeParenthesesSubpattern:
1180 case PatternTerm::TypeParentheticalAssertion:
1181 ASSERT_NOT_REACHED();
1182
1183 case PatternTerm::TypeDotStarEnclosure:
1184 backtrackDotStarEnclosure(opIndex);
1185 break;
1186
1187 case PatternTerm::TypeBackReference:
1188 m_shouldFallBack = true;
1189 break;
1190 }
1191 }
1192
1193 void generate()
1194 {
1195 // Forwards generate the matching code.
1196 ASSERT(m_ops.size());
1197 size_t opIndex = 0;
1198
1199 do {
1200 YarrOp& op = m_ops[opIndex];
1201 switch (op.m_op) {
1202
1203 case OpTerm:
1204 generateTerm(opIndex);
1205 break;
1206
1207 // OpBodyAlternativeBegin/Next/End
1208 //
1209 // These nodes wrap the set of alternatives in the body of the regular expression.
1210 // There may be either one or two chains of OpBodyAlternative nodes, one representing
1211 // the 'once through' sequence of alternatives (if any exist), and one representing
1212 // the repeating alternatives (again, if any exist).
1213 //
1214 // Upon normal entry to the Begin alternative, we will check that input is available.
1215 // Reentry to the Begin alternative will take place after the check has taken place,
1216 // and will assume that the input position has already been progressed as appropriate.
1217 //
1218 // Entry to subsequent Next/End alternatives occurs when the prior alternative has
1219 // successfully completed a match - return a success state from JIT code.
1220 //
1221 // Next alternatives allow for reentry optimized to suit backtracking from its
1222 // preceding alternative. It expects the input position to still be set to a position
1223 // appropriate to its predecessor, and it will only perform an input check if the
1224 // predecessor had a minimum size less than its own.
1225 //
1226 // In the case 'once through' expressions, the End node will also have a reentry
1227 // point to jump to when the last alternative fails. Again, this expects the input
1228 // position to still reflect that expected by the prior alternative.
1229 case OpBodyAlternativeBegin: {
1230 PatternAlternative* alternative = op.m_alternative;
1231
1232 // Upon entry at the head of the set of alternatives, check if input is available
1233 // to run the first alternative. (This progresses the input position).
1234 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
1235 // We will reenter after the check, and assume the input position to have been
1236 // set as appropriate to this alternative.
1237 op.m_reentry = label();
1238
1239 m_checked += alternative->m_minimumSize;
1240 break;
1241 }
1242 case OpBodyAlternativeNext:
1243 case OpBodyAlternativeEnd: {
1244 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1245 PatternAlternative* alternative = op.m_alternative;
1246
1247 // If we get here, the prior alternative matched - return success.
1248
1249 // Adjust the stack pointer to remove the pattern's frame.
1250 if (m_pattern.m_body->m_callFrameSize)
1251 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1252
1253 // Load appropriate values into the return register and the first output
1254 // slot, and return. In the case of pattern with a fixed size, we will
1255 // not have yet set the value in the first
1256 ASSERT(index != returnRegister);
1257 if (m_pattern.m_body->m_hasFixedSize) {
1258 move(index, returnRegister);
1259 if (priorAlternative->m_minimumSize)
1260 sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
1261 store32(returnRegister, output);
1262 } else
1263 load32(Address(output), returnRegister);
1264 store32(index, Address(output, 4));
1265 generateReturn();
1266
1267 // This is the divide between the tail of the prior alternative, above, and
1268 // the head of the subsequent alternative, below.
1269
1270 if (op.m_op == OpBodyAlternativeNext) {
1271 // This is the reentry point for the Next alternative. We expect any code
1272 // that jumps here to do so with the input position matching that of the
1273 // PRIOR alteranative, and we will only check input availability if we
1274 // need to progress it forwards.
1275 op.m_reentry = label();
1276 if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
1277 add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
1278 op.m_jumps.append(jumpIfNoAvailableInput());
1279 } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
1280 sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
1281 } else if (op.m_nextOp == notFound) {
1282 // This is the reentry point for the End of 'once through' alternatives,
1283 // jumped to when the las alternative fails to match.
1284 op.m_reentry = label();
1285 sub32(Imm32(priorAlternative->m_minimumSize), index);
1286 }
1287
1288 if (op.m_op == OpBodyAlternativeNext)
1289 m_checked += alternative->m_minimumSize;
1290 m_checked -= priorAlternative->m_minimumSize;
1291 break;
1292 }
1293
1294 // OpSimpleNestedAlternativeBegin/Next/End
1295 // OpNestedAlternativeBegin/Next/End
1296 //
1297 // These nodes are used to handle sets of alternatives that are nested within
1298 // subpatterns and parenthetical assertions. The 'simple' forms are used where
1299 // we do not need to be able to backtrack back into any alternative other than
1300 // the last, the normal forms allow backtracking into any alternative.
1301 //
1302 // Each Begin/Next node is responsible for planting an input check to ensure
1303 // sufficient input is available on entry. Next nodes additionally need to
1304 // jump to the end - Next nodes use the End node's m_jumps list to hold this
1305 // set of jumps.
1306 //
1307 // In the non-simple forms, successful alternative matches must store a
1308 // 'return address' using a DataLabelPtr, used to store the address to jump
1309 // to when backtracking, to get to the code for the appropriate alternative.
1310 case OpSimpleNestedAlternativeBegin:
1311 case OpNestedAlternativeBegin: {
1312 PatternTerm* term = op.m_term;
1313 PatternAlternative* alternative = op.m_alternative;
1314 PatternDisjunction* disjunction = term->parentheses.disjunction;
1315
1316 // Calculate how much input we need to check for, and if non-zero check.
1317 op.m_checkAdjust = alternative->m_minimumSize;
1318 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1319 op.m_checkAdjust -= disjunction->m_minimumSize;
1320 if (op.m_checkAdjust)
1321 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
1322
1323 m_checked += op.m_checkAdjust;
1324 break;
1325 }
1326 case OpSimpleNestedAlternativeNext:
1327 case OpNestedAlternativeNext: {
1328 PatternTerm* term = op.m_term;
1329 PatternAlternative* alternative = op.m_alternative;
1330 PatternDisjunction* disjunction = term->parentheses.disjunction;
1331
1332 // In the non-simple case, store a 'return address' so we can backtrack correctly.
1333 if (op.m_op == OpNestedAlternativeNext) {
1334 unsigned parenthesesFrameLocation = term->frameLocation;
1335 unsigned alternativeFrameLocation = parenthesesFrameLocation;
1336 if (term->quantityType != QuantifierFixedCount)
1337 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1338 op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1339 }
1340
1341 // If we reach here then the last alternative has matched - jump to the
1342 // End node, to skip over any further alternatives.
1343 //
1344 // FIXME: this is logically O(N^2) (though N can be expected to be very
1345 // small). We could avoid this either by adding an extra jump to the JIT
1346 // data structures, or by making backtracking code that jumps to Next
1347 // alternatives are responsible for checking that input is available (if
1348 // we didn't need to plant the input checks, then m_jumps would be free).
1349 YarrOp* endOp = &m_ops[op.m_nextOp];
1350 while (endOp->m_nextOp != notFound) {
1351 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
1352 endOp = &m_ops[endOp->m_nextOp];
1353 }
1354 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
1355 endOp->m_jumps.append(jump());
1356
1357 // This is the entry point for the next alternative.
1358 op.m_reentry = label();
1359
1360 // Calculate how much input we need to check for, and if non-zero check.
1361 op.m_checkAdjust = alternative->m_minimumSize;
1362 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1363 op.m_checkAdjust -= disjunction->m_minimumSize;
1364 if (op.m_checkAdjust)
1365 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
1366
1367 YarrOp& lastOp = m_ops[op.m_previousOp];
1368 m_checked -= lastOp.m_checkAdjust;
1369 m_checked += op.m_checkAdjust;
1370 break;
1371 }
1372 case OpSimpleNestedAlternativeEnd:
1373 case OpNestedAlternativeEnd: {
1374 PatternTerm* term = op.m_term;
1375
1376 // In the non-simple case, store a 'return address' so we can backtrack correctly.
1377 if (op.m_op == OpNestedAlternativeEnd) {
1378 unsigned parenthesesFrameLocation = term->frameLocation;
1379 unsigned alternativeFrameLocation = parenthesesFrameLocation;
1380 if (term->quantityType != QuantifierFixedCount)
1381 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1382 op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1383 }
1384
1385 // If this set of alternatives contains more than one alternative,
1386 // then the Next nodes will have planted jumps to the End, and added
1387 // them to this node's m_jumps list.
1388 op.m_jumps.link(this);
1389 op.m_jumps.clear();
1390
1391 YarrOp& lastOp = m_ops[op.m_previousOp];
1392 m_checked -= lastOp.m_checkAdjust;
1393 break;
1394 }
1395
1396 // OpParenthesesSubpatternOnceBegin/End
1397 //
1398 // These nodes support (optionally) capturing subpatterns, that have a
1399 // quantity count of 1 (this covers fixed once, and ?/?? quantifiers).
1400 case OpParenthesesSubpatternOnceBegin: {
1401 PatternTerm* term = op.m_term;
1402 unsigned parenthesesFrameLocation = term->frameLocation;
1403 const RegisterID indexTemporary = regT0;
1404 ASSERT(term->quantityCount == 1);
1405
1406 // Upon entry to a Greedy quantified set of parenthese store the index.
1407 // We'll use this for two purposes:
1408 // - To indicate which iteration we are on of mathing the remainder of
1409 // the expression after the parentheses - the first, including the
1410 // match within the parentheses, or the second having skipped over them.
1411 // - To check for empty matches, which must be rejected.
1412 //
1413 // At the head of a NonGreedy set of parentheses we'll immediately set the
1414 // value on the stack to -1 (indicating a match skipping the subpattern),
1415 // and plant a jump to the end. We'll also plant a label to backtrack to
1416 // to reenter the subpattern later, with a store to set up index on the
1417 // second iteration.
1418 //
1419 // FIXME: for capturing parens, could use the index in the capture array?
1420 if (term->quantityType == QuantifierGreedy)
1421 storeToFrame(index, parenthesesFrameLocation);
1422 else if (term->quantityType == QuantifierNonGreedy) {
1423 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
1424 op.m_jumps.append(jump());
1425 op.m_reentry = label();
1426 storeToFrame(index, parenthesesFrameLocation);
1427 }
1428
1429 // If the parenthese are capturing, store the starting index value to the
1430 // captures array, offsetting as necessary.
1431 //
1432 // FIXME: could avoid offsetting this value in JIT code, apply
1433 // offsets only afterwards, at the point the results array is
1434 // being accessed.
1435 if (term->capture()) {
1436 int offsetId = term->parentheses.subpatternId << 1;
1437 int inputOffset = term->inputPosition - m_checked;
1438 if (term->quantityType == QuantifierFixedCount)
1439 inputOffset -= term->parentheses.disjunction->m_minimumSize;
1440 if (inputOffset) {
1441 move(index, indexTemporary);
1442 add32(Imm32(inputOffset), indexTemporary);
1443 store32(indexTemporary, Address(output, offsetId * sizeof(int)));
1444 } else
1445 store32(index, Address(output, offsetId * sizeof(int)));
1446 }
1447 break;
1448 }
1449 case OpParenthesesSubpatternOnceEnd: {
1450 PatternTerm* term = op.m_term;
1451 unsigned parenthesesFrameLocation = term->frameLocation;
1452 const RegisterID indexTemporary = regT0;
1453 ASSERT(term->quantityCount == 1);
1454
1455 // For Greedy/NonGreedy quantified parentheses, we must reject zero length
1456 // matches. If the minimum size is know to be non-zero we need not check.
1457 if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize)
1458 op.m_jumps.append(branch32(Equal, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*))));
1459
1460 // If the parenthese are capturing, store the ending index value to the
1461 // captures array, offsetting as necessary.
1462 //
1463 // FIXME: could avoid offsetting this value in JIT code, apply
1464 // offsets only afterwards, at the point the results array is
1465 // being accessed.
1466 if (term->capture()) {
1467 int offsetId = (term->parentheses.subpatternId << 1) + 1;
1468 int inputOffset = term->inputPosition - m_checked;
1469 if (inputOffset) {
1470 move(index, indexTemporary);
1471 add32(Imm32(inputOffset), indexTemporary);
1472 store32(indexTemporary, Address(output, offsetId * sizeof(int)));
1473 } else
1474 store32(index, Address(output, offsetId * sizeof(int)));
1475 }
1476
1477 // If the parentheses are quantified Greedy then add a label to jump back
1478 // to if get a failed match from after the parentheses. For NonGreedy
1479 // parentheses, link the jump from before the subpattern to here.
1480 if (term->quantityType == QuantifierGreedy)
1481 op.m_reentry = label();
1482 else if (term->quantityType == QuantifierNonGreedy) {
1483 YarrOp& beginOp = m_ops[op.m_previousOp];
1484 beginOp.m_jumps.link(this);
1485 }
1486 break;
1487 }
1488
1489 // OpParenthesesSubpatternTerminalBegin/End
1490 case OpParenthesesSubpatternTerminalBegin: {
1491 PatternTerm* term = op.m_term;
1492 ASSERT(term->quantityType == QuantifierGreedy);
1493 ASSERT(term->quantityCount == quantifyInfinite);
1494 ASSERT(!term->capture());
1495
1496 // Upon entry set a label to loop back to.
1497 op.m_reentry = label();
1498
1499 // Store the start index of the current match; we need to reject zero
1500 // length matches.
1501 storeToFrame(index, term->frameLocation);
1502 break;
1503 }
1504 case OpParenthesesSubpatternTerminalEnd: {
1505 PatternTerm* term = op.m_term;
1506
1507 // Check for zero length matches - if the match is non-zero, then we
1508 // can accept it & loop back up to the head of the subpattern.
1509 YarrOp& beginOp = m_ops[op.m_previousOp];
1510 branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)), beginOp.m_reentry);
1511
1512 // Reject the match - backtrack back into the subpattern.
1513 op.m_jumps.append(jump());
1514
1515 // This is the entry point to jump to when we stop matching - we will
1516 // do so once the subpattern cannot match any more.
1517 op.m_reentry = label();
1518 break;
1519 }
1520
1521 // OpParentheticalAssertionBegin/End
1522 case OpParentheticalAssertionBegin: {
1523 PatternTerm* term = op.m_term;
1524
1525 // Store the current index - assertions should not update index, so
1526 // we will need to restore it upon a successful match.
1527 unsigned parenthesesFrameLocation = term->frameLocation;
1528 storeToFrame(index, parenthesesFrameLocation);
1529
1530 // Check
1531 op.m_checkAdjust = m_checked - term->inputPosition;
1532 if (op.m_checkAdjust)
1533 sub32(Imm32(op.m_checkAdjust), index);
1534
1535 m_checked -= op.m_checkAdjust;
1536 break;
1537 }
1538 case OpParentheticalAssertionEnd: {
1539 PatternTerm* term = op.m_term;
1540
1541 // Restore the input index value.
1542 unsigned parenthesesFrameLocation = term->frameLocation;
1543 loadFromFrame(parenthesesFrameLocation, index);
1544
1545 // If inverted, a successful match of the assertion must be treated
1546 // as a failure, so jump to backtracking.
1547 if (term->invert()) {
1548 op.m_jumps.append(jump());
1549 op.m_reentry = label();
1550 }
1551
1552 YarrOp& lastOp = m_ops[op.m_previousOp];
1553 m_checked += lastOp.m_checkAdjust;
1554 break;
1555 }
1556
1557 case OpMatchFailed:
1558 if (m_pattern.m_body->m_callFrameSize)
1559 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1560 move(TrustedImm32(-1), returnRegister);
1561 generateReturn();
1562 break;
1563 }
1564
1565 ++opIndex;
1566 } while (opIndex < m_ops.size());
1567 }
1568
1569 void backtrack()
1570 {
1571 // Backwards generate the backtracking code.
1572 size_t opIndex = m_ops.size();
1573 ASSERT(opIndex);
1574
1575 do {
1576 --opIndex;
1577 YarrOp& op = m_ops[opIndex];
1578 switch (op.m_op) {
1579
1580 case OpTerm:
1581 backtrackTerm(opIndex);
1582 break;
1583
1584 // OpBodyAlternativeBegin/Next/End
1585 //
1586 // For each Begin/Next node representing an alternative, we need to decide what to do
1587 // in two circumstances:
1588 // - If we backtrack back into this node, from within the alternative.
1589 // - If the input check at the head of the alternative fails (if this exists).
1590 //
1591 // We treat these two cases differently since in the former case we have slightly
1592 // more information - since we are backtracking out of a prior alternative we know
1593 // that at least enough input was available to run it. For example, given the regular
1594 // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
1595 // character match of 'a'), then we need not perform an additional input availability
1596 // check before running the second alternative.
1597 //
1598 // Backtracking required differs for the last alternative, which in the case of the
1599 // repeating set of alternatives must loop. The code generated for the last alternative
1600 // will also be used to handle all input check failures from any prior alternatives -
1601 // these require similar functionality, in seeking the next available alternative for
1602 // which there is sufficient input.
1603 //
1604 // Since backtracking of all other alternatives simply requires us to link backtracks
1605 // to the reentry point for the subsequent alternative, we will only be generating any
1606 // code when backtracking the last alternative.
1607 case OpBodyAlternativeBegin:
1608 case OpBodyAlternativeNext: {
1609 PatternAlternative* alternative = op.m_alternative;
1610
1611 if (op.m_op == OpBodyAlternativeNext) {
1612 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1613 m_checked += priorAlternative->m_minimumSize;
1614 }
1615 m_checked -= alternative->m_minimumSize;
1616
1617 // Is this the last alternative? If not, then if we backtrack to this point we just
1618 // need to jump to try to match the next alternative.
1619 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
1620 m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
1621 break;
1622 }
1623 YarrOp& endOp = m_ops[op.m_nextOp];
1624
1625 YarrOp* beginOp = &op;
1626 while (beginOp->m_op != OpBodyAlternativeBegin) {
1627 ASSERT(beginOp->m_op == OpBodyAlternativeNext);
1628 beginOp = &m_ops[beginOp->m_previousOp];
1629 }
1630
1631 bool onceThrough = endOp.m_nextOp == notFound;
1632
1633 // First, generate code to handle cases where we backtrack out of an attempted match
1634 // of the last alternative. If this is a 'once through' set of alternatives then we
1635 // have nothing to do - link this straight through to the End.
1636 if (onceThrough)
1637 m_backtrackingState.linkTo(endOp.m_reentry, this);
1638 else {
1639 // If we don't need to move the input poistion, and the pattern has a fixed size
1640 // (in which case we omit the store of the start index until the pattern has matched)
1641 // then we can just link the backtrack out of the last alternative straight to the
1642 // head of the first alternative.
1643 if (m_pattern.m_body->m_hasFixedSize
1644 && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
1645 && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
1646 m_backtrackingState.linkTo(beginOp->m_reentry, this);
1647 else {
1648 // We need to generate a trampoline of code to execute before looping back
1649 // around to the first alternative.
1650 m_backtrackingState.link(this);
1651
1652 // If the pattern size is not fixed, then store the start index, for use if we match.
1653 if (!m_pattern.m_body->m_hasFixedSize) {
1654 if (alternative->m_minimumSize == 1)
1655 store32(index, Address(output));
1656 else {
1657 move(index, regT0);
1658 if (alternative->m_minimumSize)
1659 sub32(Imm32(alternative->m_minimumSize - 1), regT0);
1660 else
1661 add32(Imm32(1), regT0);
1662 store32(regT0, Address(output));
1663 }
1664 }
1665
1666 // Generate code to loop. Check whether the last alternative is longer than the
1667 // first (e.g. /a|xy/ or /a|xyz/).
1668 if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
1669 // We want to loop, and increment input position. If the delta is 1, it is
1670 // already correctly incremented, if more than one then decrement as appropriate.
1671 unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
1672 ASSERT(delta);
1673 if (delta != 1)
1674 sub32(Imm32(delta - 1), index);
1675 jump(beginOp->m_reentry);
1676 } else {
1677 // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
1678 // be sufficent input available to handle this, so just fall through.
1679 unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
1680 if (delta != 0xFFFFFFFFu) {
1681 // We need to check input because we are incrementing the input.
1682 add32(Imm32(delta + 1), index);
1683 checkInput().linkTo(beginOp->m_reentry, this);
1684 }
1685 }
1686 }
1687 }
1688
1689 // We can reach this point in the code in two ways:
1690 // - Fallthrough from the code above (a repeating alternative backtracked out of its
1691 // last alternative, and did not have sufficent input to run the first).
1692 // - We will loop back up to the following label when a releating alternative loops,
1693 // following a failed input check.
1694 //
1695 // Either way, we have just failed the input check for the first alternative.
1696 Label firstInputCheckFailed(this);
1697
1698 // Generate code to handle input check failures from alternatives except the last.
1699 // prevOp is the alternative we're handling a bail out from (initially Begin), and
1700 // nextOp is the alternative we will be attempting to reenter into.
1701 //
1702 // We will link input check failures from the forwards matching path back to the code
1703 // that can handle them.
1704 YarrOp* prevOp = beginOp;
1705 YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
1706 while (nextOp->m_op != OpBodyAlternativeEnd) {
1707 prevOp->m_jumps.link(this);
1708
1709 // We only get here if an input check fails, it is only worth checking again
1710 // if the next alternative has a minimum size less than the last.
1711 if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
1712 // FIXME: if we added an extra label to YarrOp, we could avoid needing to
1713 // subtract delta back out, and reduce this code. Should performance test
1714 // the benefit of this.
1715 unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
1716 sub32(Imm32(delta), index);
1717 Jump fail = jumpIfNoAvailableInput();
1718 add32(Imm32(delta), index);
1719 jump(nextOp->m_reentry);
1720 fail.link(this);
1721 } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
1722 add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
1723 prevOp = nextOp;
1724 nextOp = &m_ops[nextOp->m_nextOp];
1725 }
1726
1727 // We fall through to here if there is insufficient input to run the last alternative.
1728
1729 // If there is insufficient input to run the last alternative, then for 'once through'
1730 // alternatives we are done - just jump back up into the forwards matching path at the End.
1731 if (onceThrough) {
1732 op.m_jumps.linkTo(endOp.m_reentry, this);
1733 jump(endOp.m_reentry);
1734 break;
1735 }
1736
1737 // For repeating alternatives, link any input check failure from the last alternative to
1738 // this point.
1739 op.m_jumps.link(this);
1740
1741 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
1742
1743 // Check for cases where input position is already incremented by 1 for the last
1744 // alternative (this is particularly useful where the minimum size of the body
1745 // disjunction is 0, e.g. /a*|b/).
1746 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
1747 // index is already incremented by 1, so just store it now!
1748 store32(index, Address(output));
1749 needsToUpdateMatchStart = false;
1750 }
1751
1752 // Check whether there is sufficient input to loop. Increment the input position by
1753 // one, and check. Also add in the minimum disjunction size before checking - there
1754 // is no point in looping if we're just going to fail all the input checks around
1755 // the next iteration.
1756 ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
1757 if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
1758 // If the last alternative had the same minimum size as the disjunction,
1759 // just simply increment input pos by 1, no adjustment based on minimum size.
1760 add32(Imm32(1), index);
1761 } else {
1762 // If the minumum for the last alternative was one greater than than that
1763 // for the disjunction, we're already progressed by 1, nothing to do!
1764 unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
1765 if (delta)
1766 sub32(Imm32(delta), index);
1767 }
1768 Jump matchFailed = jumpIfNoAvailableInput();
1769
1770 if (needsToUpdateMatchStart) {
1771 if (!m_pattern.m_body->m_minimumSize)
1772 store32(index, Address(output));
1773 else {
1774 move(index, regT0);
1775 sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
1776 store32(regT0, Address(output));
1777 }
1778 }
1779
1780 // Calculate how much more input the first alternative requires than the minimum
1781 // for the body as a whole. If no more is needed then we dont need an additional
1782 // input check here - jump straight back up to the start of the first alternative.
1783 if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
1784 jump(beginOp->m_reentry);
1785 else {
1786 if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
1787 add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
1788 else
1789 sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
1790 checkInput().linkTo(beginOp->m_reentry, this);
1791 jump(firstInputCheckFailed);
1792 }
1793
1794 // We jump to here if we iterate to the point that there is insufficient input to
1795 // run any matches, and need to return a failure state from JIT code.
1796 matchFailed.link(this);
1797
1798 if (m_pattern.m_body->m_callFrameSize)
1799 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1800 move(TrustedImm32(-1), returnRegister);
1801 generateReturn();
1802 break;
1803 }
1804 case OpBodyAlternativeEnd: {
1805 // We should never backtrack back into a body disjunction.
1806 ASSERT(m_backtrackingState.isEmpty());
1807
1808 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1809 m_checked += priorAlternative->m_minimumSize;
1810 break;
1811 }
1812
1813 // OpSimpleNestedAlternativeBegin/Next/End
1814 // OpNestedAlternativeBegin/Next/End
1815 //
1816 // Generate code for when we backtrack back out of an alternative into
1817 // a Begin or Next node, or when the entry input count check fails. If
1818 // there are more alternatives we need to jump to the next alternative,
1819 // if not we backtrack back out of the current set of parentheses.
1820 //
1821 // In the case of non-simple nested assertions we need to also link the
1822 // 'return address' appropriately to backtrack back out into the correct
1823 // alternative.
1824 case OpSimpleNestedAlternativeBegin:
1825 case OpSimpleNestedAlternativeNext:
1826 case OpNestedAlternativeBegin:
1827 case OpNestedAlternativeNext: {
1828 YarrOp& nextOp = m_ops[op.m_nextOp];
1829 bool isBegin = op.m_previousOp == notFound;
1830 bool isLastAlternative = nextOp.m_nextOp == notFound;
1831 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
1832 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
1833
1834 // Treat an input check failure the same as a failed match.
1835 m_backtrackingState.append(op.m_jumps);
1836
1837 // Set the backtracks to jump to the appropriate place. We may need
1838 // to link the backtracks in one of three different way depending on
1839 // the type of alternative we are dealing with:
1840 // - A single alternative, with no simplings.
1841 // - The last alternative of a set of two or more.
1842 // - An alternative other than the last of a set of two or more.
1843 //
1844 // In the case of a single alternative on its own, we don't need to
1845 // jump anywhere - if the alternative fails to match we can just
1846 // continue to backtrack out of the parentheses without jumping.
1847 //
1848 // In the case of the last alternative in a set of more than one, we
1849 // need to jump to return back out to the beginning. We'll do so by
1850 // adding a jump to the End node's m_jumps list, and linking this
1851 // when we come to generate the Begin node. For alternatives other
1852 // than the last, we need to jump to the next alternative.
1853 //
1854 // If the alternative had adjusted the input position we must link
1855 // backtracking to here, correct, and then jump on. If not we can
1856 // link the backtracks directly to their destination.
1857 if (op.m_checkAdjust) {
1858 // Handle the cases where we need to link the backtracks here.
1859 m_backtrackingState.link(this);
1860 sub32(Imm32(op.m_checkAdjust), index);
1861 if (!isLastAlternative) {
1862 // An alternative that is not the last should jump to its successor.
1863 jump(nextOp.m_reentry);
1864 } else if (!isBegin) {
1865 // The last of more than one alternatives must jump back to the begnning.
1866 nextOp.m_jumps.append(jump());
1867 } else {
1868 // A single alternative on its own can fall through.
1869 m_backtrackingState.fallthrough();
1870 }
1871 } else {
1872 // Handle the cases where we can link the backtracks directly to their destinations.
1873 if (!isLastAlternative) {
1874 // An alternative that is not the last should jump to its successor.
1875 m_backtrackingState.linkTo(nextOp.m_reentry, this);
1876 } else if (!isBegin) {
1877 // The last of more than one alternatives must jump back to the begnning.
1878 m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
1879 }
1880 // In the case of a single alternative on its own do nothing - it can fall through.
1881 }
1882
1883 // At this point we've handled the backtracking back into this node.
1884 // Now link any backtracks that need to jump to here.
1885
1886 // For non-simple alternatives, link the alternative's 'return address'
1887 // so that we backtrack back out into the previous alternative.
1888 if (op.m_op == OpNestedAlternativeNext)
1889 m_backtrackingState.append(op.m_returnAddress);
1890
1891 // If there is more than one alternative, then the last alternative will
1892 // have planted a jump to be linked to the end. This jump was added to the
1893 // End node's m_jumps list. If we are back at the beginning, link it here.
1894 if (isBegin) {
1895 YarrOp* endOp = &m_ops[op.m_nextOp];
1896 while (endOp->m_nextOp != notFound) {
1897 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
1898 endOp = &m_ops[endOp->m_nextOp];
1899 }
1900 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
1901 m_backtrackingState.append(endOp->m_jumps);
1902 }
1903
1904 if (!isBegin) {
1905 YarrOp& lastOp = m_ops[op.m_previousOp];
1906 m_checked += lastOp.m_checkAdjust;
1907 }
1908 m_checked -= op.m_checkAdjust;
1909 break;
1910 }
1911 case OpSimpleNestedAlternativeEnd:
1912 case OpNestedAlternativeEnd: {
1913 PatternTerm* term = op.m_term;
1914
1915 // If we backtrack into the end of a simple subpattern do nothing;
1916 // just continue through into the last alternative. If we backtrack
1917 // into the end of a non-simple set of alterntives we need to jump
1918 // to the backtracking return address set up during generation.
1919 if (op.m_op == OpNestedAlternativeEnd) {
1920 m_backtrackingState.link(this);
1921
1922 // Plant a jump to the return address.
1923 unsigned parenthesesFrameLocation = term->frameLocation;
1924 unsigned alternativeFrameLocation = parenthesesFrameLocation;
1925 if (term->quantityType != QuantifierFixedCount)
1926 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1927 loadFromFrameAndJump(alternativeFrameLocation);
1928
1929 // Link the DataLabelPtr associated with the end of the last
1930 // alternative to this point.
1931 m_backtrackingState.append(op.m_returnAddress);
1932 }
1933
1934 YarrOp& lastOp = m_ops[op.m_previousOp];
1935 m_checked += lastOp.m_checkAdjust;
1936 break;
1937 }
1938
1939 // OpParenthesesSubpatternOnceBegin/End
1940 //
1941 // When we are backtracking back out of a capturing subpattern we need
1942 // to clear the start index in the matches output array, to record that
1943 // this subpattern has not been captured.
1944 //
1945 // When backtracking back out of a Greedy quantified subpattern we need
1946 // to catch this, and try running the remainder of the alternative after
1947 // the subpattern again, skipping the parentheses.
1948 //
1949 // Upon backtracking back into a quantified set of parentheses we need to
1950 // check whether we were currently skipping the subpattern. If not, we
1951 // can backtrack into them, if we were we need to either backtrack back
1952 // out of the start of the parentheses, or jump back to the forwards
1953 // matching start, depending of whether the match is Greedy or NonGreedy.
1954 case OpParenthesesSubpatternOnceBegin: {
1955 PatternTerm* term = op.m_term;
1956 ASSERT(term->quantityCount == 1);
1957
1958 // We only need to backtrack to thispoint if capturing or greedy.
1959 if (term->capture() || term->quantityType == QuantifierGreedy) {
1960 m_backtrackingState.link(this);
1961
1962 // If capturing, clear the capture (we only need to reset start).
1963 if (term->capture())
1964 store32(TrustedImm32(-1), Address(output, (term->parentheses.subpatternId << 1) * sizeof(int)));
1965
1966 // If Greedy, jump to the end.
1967 if (term->quantityType == QuantifierGreedy) {
1968 // Clear the flag in the stackframe indicating we ran through the subpattern.
1969 unsigned parenthesesFrameLocation = term->frameLocation;
1970 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
1971 // Jump to after the parentheses, skipping the subpattern.
1972 jump(m_ops[op.m_nextOp].m_reentry);
1973 // A backtrack from after the parentheses, when skipping the subpattern,
1974 // will jump back to here.
1975 op.m_jumps.link(this);
1976 }
1977
1978 m_backtrackingState.fallthrough();
1979 }
1980 break;
1981 }
1982 case OpParenthesesSubpatternOnceEnd: {
1983 PatternTerm* term = op.m_term;
1984
1985 if (term->quantityType != QuantifierFixedCount) {
1986 m_backtrackingState.link(this);
1987
1988 // Check whether we should backtrack back into the parentheses, or if we
1989 // are currently in a state where we had skipped over the subpattern
1990 // (in which case the flag value on the stack will be -1).
1991 unsigned parenthesesFrameLocation = term->frameLocation;
1992 Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1));
1993
1994 if (term->quantityType == QuantifierGreedy) {
1995 // For Greedy parentheses, we skip after having already tried going
1996 // through the subpattern, so if we get here we're done.
1997 YarrOp& beginOp = m_ops[op.m_previousOp];
1998 beginOp.m_jumps.append(hadSkipped);
1999 } else {
2000 // For NonGreedy parentheses, we try skipping the subpattern first,
2001 // so if we get here we need to try running through the subpattern
2002 // next. Jump back to the start of the parentheses in the forwards
2003 // matching path.
2004 ASSERT(term->quantityType == QuantifierNonGreedy);
2005 YarrOp& beginOp = m_ops[op.m_previousOp];
2006 hadSkipped.linkTo(beginOp.m_reentry, this);
2007 }
2008
2009 m_backtrackingState.fallthrough();
2010 }
2011
2012 m_backtrackingState.append(op.m_jumps);
2013 break;
2014 }
2015
2016 // OpParenthesesSubpatternTerminalBegin/End
2017 //
2018 // Terminal subpatterns will always match - there is nothing after them to
2019 // force a backtrack, and they have a minimum count of 0, and as such will
2020 // always produce an acceptable result.
2021 case OpParenthesesSubpatternTerminalBegin: {
2022 // We will backtrack to this point once the subpattern cannot match any
2023 // more. Since no match is accepted as a successful match (we are Greedy
2024 // quantified with a minimum of zero) jump back to the forwards matching
2025 // path at the end.
2026 YarrOp& endOp = m_ops[op.m_nextOp];
2027 m_backtrackingState.linkTo(endOp.m_reentry, this);
2028 break;
2029 }
2030 case OpParenthesesSubpatternTerminalEnd:
2031 // We should never be backtracking to here (hence the 'terminal' in the name).
2032 ASSERT(m_backtrackingState.isEmpty());
2033 m_backtrackingState.append(op.m_jumps);
2034 break;
2035
2036 // OpParentheticalAssertionBegin/End
2037 case OpParentheticalAssertionBegin: {
2038 PatternTerm* term = op.m_term;
2039 YarrOp& endOp = m_ops[op.m_nextOp];
2040
2041 // We need to handle the backtracks upon backtracking back out
2042 // of a parenthetical assertion if either we need to correct
2043 // the input index, or the assertion was inverted.
2044 if (op.m_checkAdjust || term->invert()) {
2045 m_backtrackingState.link(this);
2046
2047 if (op.m_checkAdjust)
2048 add32(Imm32(op.m_checkAdjust), index);
2049
2050 // In an inverted assertion failure to match the subpattern
2051 // is treated as a successful match - jump to the end of the
2052 // subpattern. We already have adjusted the input position
2053 // back to that before the assertion, which is correct.
2054 if (term->invert())
2055 jump(endOp.m_reentry);
2056
2057 m_backtrackingState.fallthrough();
2058 }
2059
2060 // The End node's jump list will contain any backtracks into
2061 // the end of the assertion. Also, if inverted, we will have
2062 // added the failure caused by a successful match to this.
2063 m_backtrackingState.append(endOp.m_jumps);
2064
2065 m_checked += op.m_checkAdjust;
2066 break;
2067 }
2068 case OpParentheticalAssertionEnd: {
2069 // FIXME: We should really be clearing any nested subpattern
2070 // matches on bailing out from after the pattern. Firefox has
2071 // this bug too (presumably because they use YARR!)
2072
2073 // Never backtrack into an assertion; later failures bail to before the begin.
2074 m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
2075
2076 YarrOp& lastOp = m_ops[op.m_previousOp];
2077 m_checked -= lastOp.m_checkAdjust;
2078 break;
2079 }
2080
2081 case OpMatchFailed:
2082 break;
2083 }
2084
2085 } while (opIndex);
2086 }
2087
2088 // Compilation methods:
2089 // ====================
2090
2091 // opCompileParenthesesSubpattern
2092 // Emits ops for a subpattern (set of parentheses). These consist
2093 // of a set of alternatives wrapped in an outer set of nodes for
2094 // the parentheses.
2095 // Supported types of parentheses are 'Once' (quantityCount == 1)
2096 // and 'Terminal' (non-capturing parentheses quantified as greedy
2097 // and infinite).
2098 // Alternatives will use the 'Simple' set of ops if either the
2099 // subpattern is terminal (in which case we will never need to
2100 // backtrack), or if the subpattern only contains one alternative.
2101 void opCompileParenthesesSubpattern(PatternTerm* term)
2102 {
2103 YarrOpCode parenthesesBeginOpCode;
2104 YarrOpCode parenthesesEndOpCode;
2105 YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
2106 YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
2107 YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
2108
2109 // We can currently only compile quantity 1 subpatterns that are
2110 // not copies. We generate a copy in the case of a range quantifier,
2111 // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
2112 // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
2113 // comes where the subpattern is capturing, in which case we would
2114 // need to restore the capture from the first subpattern upon a
2115 // failure in the second.
2116 if (term->quantityCount == 1 && !term->parentheses.isCopy) {
2117 // Select the 'Once' nodes.
2118 parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
2119 parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
2120
2121 // If there is more than one alternative we cannot use the 'simple' nodes.
2122 if (term->parentheses.disjunction->m_alternatives.size() != 1) {
2123 alternativeBeginOpCode = OpNestedAlternativeBegin;
2124 alternativeNextOpCode = OpNestedAlternativeNext;
2125 alternativeEndOpCode = OpNestedAlternativeEnd;
2126 }
2127 } else if (term->parentheses.isTerminal) {
2128 // Select the 'Terminal' nodes.
2129 parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
2130 parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
2131 } else {
2132 // This subpattern is not supported by the JIT.
2133 m_shouldFallBack = true;
2134 return;
2135 }
2136
2137 size_t parenBegin = m_ops.size();
2138 m_ops.append(parenthesesBeginOpCode);
2139
2140 m_ops.append(alternativeBeginOpCode);
2141 m_ops.last().m_previousOp = notFound;
2142 m_ops.last().m_term = term;
2143 Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives;
2144 for (unsigned i = 0; i < alternatives.size(); ++i) {
2145 size_t lastOpIndex = m_ops.size() - 1;
2146
2147 PatternAlternative* nestedAlternative = alternatives[i];
2148 opCompileAlternative(nestedAlternative);
2149
2150 size_t thisOpIndex = m_ops.size();
2151 m_ops.append(YarrOp(alternativeNextOpCode));
2152
2153 YarrOp& lastOp = m_ops[lastOpIndex];
2154 YarrOp& thisOp = m_ops[thisOpIndex];
2155
2156 lastOp.m_alternative = nestedAlternative;
2157 lastOp.m_nextOp = thisOpIndex;
2158 thisOp.m_previousOp = lastOpIndex;
2159 thisOp.m_term = term;
2160 }
2161 YarrOp& lastOp = m_ops.last();
2162 ASSERT(lastOp.m_op == alternativeNextOpCode);
2163 lastOp.m_op = alternativeEndOpCode;
2164 lastOp.m_alternative = 0;
2165 lastOp.m_nextOp = notFound;
2166
2167 size_t parenEnd = m_ops.size();
2168 m_ops.append(parenthesesEndOpCode);
2169
2170 m_ops[parenBegin].m_term = term;
2171 m_ops[parenBegin].m_previousOp = notFound;
2172 m_ops[parenBegin].m_nextOp = parenEnd;
2173 m_ops[parenEnd].m_term = term;
2174 m_ops[parenEnd].m_previousOp = parenBegin;
2175 m_ops[parenEnd].m_nextOp = notFound;
2176 }
2177
2178 // opCompileParentheticalAssertion
2179 // Emits ops for a parenthetical assertion. These consist of an
2180 // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
2181 // the alternatives, with these wrapped by an outer pair of
2182 // OpParentheticalAssertionBegin/End nodes.
2183 // We can always use the OpSimpleNestedAlternative nodes in the
2184 // case of parenthetical assertions since these only ever match
2185 // once, and will never backtrack back into the assertion.
2186 void opCompileParentheticalAssertion(PatternTerm* term)
2187 {
2188 size_t parenBegin = m_ops.size();
2189 m_ops.append(OpParentheticalAssertionBegin);
2190
2191 m_ops.append(OpSimpleNestedAlternativeBegin);
2192 m_ops.last().m_previousOp = notFound;
2193 m_ops.last().m_term = term;
2194 Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives;
2195 for (unsigned i = 0; i < alternatives.size(); ++i) {
2196 size_t lastOpIndex = m_ops.size() - 1;
2197
2198 PatternAlternative* nestedAlternative = alternatives[i];
2199 opCompileAlternative(nestedAlternative);
2200
2201 size_t thisOpIndex = m_ops.size();
2202 m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
2203
2204 YarrOp& lastOp = m_ops[lastOpIndex];
2205 YarrOp& thisOp = m_ops[thisOpIndex];
2206
2207 lastOp.m_alternative = nestedAlternative;
2208 lastOp.m_nextOp = thisOpIndex;
2209 thisOp.m_previousOp = lastOpIndex;
2210 thisOp.m_term = term;
2211 }
2212 YarrOp& lastOp = m_ops.last();
2213 ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
2214 lastOp.m_op = OpSimpleNestedAlternativeEnd;
2215 lastOp.m_alternative = 0;
2216 lastOp.m_nextOp = notFound;
2217
2218 size_t parenEnd = m_ops.size();
2219 m_ops.append(OpParentheticalAssertionEnd);
2220
2221 m_ops[parenBegin].m_term = term;
2222 m_ops[parenBegin].m_previousOp = notFound;
2223 m_ops[parenBegin].m_nextOp = parenEnd;
2224 m_ops[parenEnd].m_term = term;
2225 m_ops[parenEnd].m_previousOp = parenBegin;
2226 m_ops[parenEnd].m_nextOp = notFound;
2227 }
2228
2229 // opCompileAlternative
2230 // Called to emit nodes for all terms in an alternative.
2231 void opCompileAlternative(PatternAlternative* alternative)
2232 {
2233 optimizeAlternative(alternative);
2234
2235 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
2236 PatternTerm* term = &alternative->m_terms[i];
2237
2238 switch (term->type) {
2239 case PatternTerm::TypeParenthesesSubpattern:
2240 opCompileParenthesesSubpattern(term);
2241 break;
2242
2243 case PatternTerm::TypeParentheticalAssertion:
2244 opCompileParentheticalAssertion(term);
2245 break;
2246
2247 default:
2248 m_ops.append(term);
2249 }
2250 }
2251 }
2252
2253 // opCompileBody
2254 // This method compiles the body disjunction of the regular expression.
2255 // The body consists of two sets of alternatives - zero or more 'once
2256 // through' (BOL anchored) alternatives, followed by zero or more
2257 // repeated alternatives.
2258 // For each of these two sets of alteratives, if not empty they will be
2259 // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
2260 // 'begin' node referencing the first alternative, and 'next' nodes
2261 // referencing any further alternatives. The begin/next/end nodes are
2262 // linked together in a doubly linked list. In the case of repeating
2263 // alternatives, the end node is also linked back to the beginning.
2264 // If no repeating alternatives exist, then a OpMatchFailed node exists
2265 // to return the failing result.
2266 void opCompileBody(PatternDisjunction* disjunction)
2267 {
2268 Vector<PatternAlternative*>& alternatives = disjunction->m_alternatives;
2269 size_t currentAlternativeIndex = 0;
2270
2271 // Emit the 'once through' alternatives.
2272 if (alternatives.size() && alternatives[0]->onceThrough()) {
2273 m_ops.append(YarrOp(OpBodyAlternativeBegin));
2274 m_ops.last().m_previousOp = notFound;
2275
2276 do {
2277 size_t lastOpIndex = m_ops.size() - 1;
2278 PatternAlternative* alternative = alternatives[currentAlternativeIndex];
2279 opCompileAlternative(alternative);
2280
2281 size_t thisOpIndex = m_ops.size();
2282 m_ops.append(YarrOp(OpBodyAlternativeNext));
2283
2284 YarrOp& lastOp = m_ops[lastOpIndex];
2285 YarrOp& thisOp = m_ops[thisOpIndex];
2286
2287 lastOp.m_alternative = alternative;
2288 lastOp.m_nextOp = thisOpIndex;
2289 thisOp.m_previousOp = lastOpIndex;
2290
2291 ++currentAlternativeIndex;
2292 } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough());
2293
2294 YarrOp& lastOp = m_ops.last();
2295
2296 ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2297 lastOp.m_op = OpBodyAlternativeEnd;
2298 lastOp.m_alternative = 0;
2299 lastOp.m_nextOp = notFound;
2300 }
2301
2302 if (currentAlternativeIndex == alternatives.size()) {
2303 m_ops.append(YarrOp(OpMatchFailed));
2304 return;
2305 }
2306
2307 // Emit the repeated alternatives.
2308 size_t repeatLoop = m_ops.size();
2309 m_ops.append(YarrOp(OpBodyAlternativeBegin));
2310 m_ops.last().m_previousOp = notFound;
2311 do {
2312 size_t lastOpIndex = m_ops.size() - 1;
2313 PatternAlternative* alternative = alternatives[currentAlternativeIndex];
2314 ASSERT(!alternative->onceThrough());
2315 opCompileAlternative(alternative);
2316
2317 size_t thisOpIndex = m_ops.size();
2318 m_ops.append(YarrOp(OpBodyAlternativeNext));
2319
2320 YarrOp& lastOp = m_ops[lastOpIndex];
2321 YarrOp& thisOp = m_ops[thisOpIndex];
2322
2323 lastOp.m_alternative = alternative;
2324 lastOp.m_nextOp = thisOpIndex;
2325 thisOp.m_previousOp = lastOpIndex;
2326
2327 ++currentAlternativeIndex;
2328 } while (currentAlternativeIndex < alternatives.size());
2329 YarrOp& lastOp = m_ops.last();
2330 ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2331 lastOp.m_op = OpBodyAlternativeEnd;
2332 lastOp.m_alternative = 0;
2333 lastOp.m_nextOp = repeatLoop;
2334 }
2335
2336 void generateEnter()
2337 {
2338 #if CPU(X86_64)
2339 push(X86Registers::ebp);
2340 move(stackPointerRegister, X86Registers::ebp);
2341 push(X86Registers::ebx);
2342 #elif CPU(X86)
2343 push(X86Registers::ebp);
2344 move(stackPointerRegister, X86Registers::ebp);
2345 // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
2346 push(X86Registers::ebx);
2347 push(X86Registers::edi);
2348 push(X86Registers::esi);
2349 // load output into edi (2 = saved ebp + return address).
2350 #if COMPILER(MSVC)
2351 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
2352 loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
2353 loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
2354 loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
2355 #else
2356 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
2357 #endif
2358 #elif CPU(ARM)
2359 push(ARMRegisters::r4);
2360 push(ARMRegisters::r5);
2361 push(ARMRegisters::r6);
2362 #if CPU(ARM_TRADITIONAL)
2363 push(ARMRegisters::r8); // scratch register
2364 #endif
2365 move(ARMRegisters::r3, output);
2366 #elif CPU(SH4)
2367 push(SH4Registers::r11);
2368 push(SH4Registers::r13);
2369 #elif CPU(MIPS)
2370 // Do nothing.
2371 #endif
2372 }
2373
2374 void generateReturn()
2375 {
2376 #if CPU(X86_64)
2377 pop(X86Registers::ebx);
2378 pop(X86Registers::ebp);
2379 #elif CPU(X86)
2380 pop(X86Registers::esi);
2381 pop(X86Registers::edi);
2382 pop(X86Registers::ebx);
2383 pop(X86Registers::ebp);
2384 #elif CPU(ARM)
2385 #if CPU(ARM_TRADITIONAL)
2386 pop(ARMRegisters::r8); // scratch register
2387 #endif
2388 pop(ARMRegisters::r6);
2389 pop(ARMRegisters::r5);
2390 pop(ARMRegisters::r4);
2391 #elif CPU(SH4)
2392 pop(SH4Registers::r13);
2393 pop(SH4Registers::r11);
2394 #elif CPU(MIPS)
2395 // Do nothing
2396 #endif
2397 ret();
2398 }
2399
2400 public:
2401 YarrGenerator(YarrPattern& pattern)
2402 : m_pattern(pattern)
2403 , m_shouldFallBack(false)
2404 , m_checked(0)
2405 {
2406 }
2407
2408 void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject)
2409 {
2410 generateEnter();
2411
2412 if (!m_pattern.m_body->m_hasFixedSize)
2413 store32(index, Address(output));
2414
2415 if (m_pattern.m_body->m_callFrameSize)
2416 subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
2417
2418 // Compile the pattern to the internal 'YarrOp' representation.
2419 opCompileBody(m_pattern.m_body);
2420
2421 // If we encountered anything we can't handle in the JIT code
2422 // (e.g. backreferences) then return early.
2423 if (m_shouldFallBack) {
2424 jitObject.setFallBack(true);
2425 return;
2426 }
2427
2428 generate();
2429 backtrack();
2430
2431 // Link & finalize the code.
2432 LinkBuffer linkBuffer(*globalData, this, globalData->regexAllocator);
2433 m_backtrackingState.linkDataLabels(linkBuffer);
2434 jitObject.set(linkBuffer.finalizeCode());
2435 jitObject.setFallBack(m_shouldFallBack);
2436 }
2437
2438 private:
2439 YarrPattern& m_pattern;
2440
2441 // Used to detect regular expression constructs that are not currently
2442 // supported in the JIT; fall back to the interpreter when this is detected.
2443 bool m_shouldFallBack;
2444
2445 // The regular expression expressed as a linear sequence of operations.
2446 Vector<YarrOp, 128> m_ops;
2447
2448 // This records the current input offset being applied due to the current
2449 // set of alternatives we are nested within. E.g. when matching the
2450 // character 'b' within the regular expression /abc/, we will know that
2451 // the minimum size for the alternative is 3, checked upon entry to the
2452 // alternative, and that 'b' is at offset 1 from the start, and as such
2453 // when matching 'b' we need to apply an offset of -2 to the load.
2454 //
2455 // FIXME: This should go away. Rather than tracking this value throughout
2456 // code generation, we should gather this information up front & store it
2457 // on the YarrOp structure.
2458 int m_checked;
2459
2460 // This class records state whilst generating the backtracking path of code.
2461 BacktrackingState m_backtrackingState;
2462 };
2463
2464 void jitCompile(YarrPattern& pattern, JSGlobalData* globalData, YarrCodeBlock& jitObject)
2465 {
2466 YarrGenerator(pattern).compile(globalData, jitObject);
2467 }
2468
2469 int execute(YarrCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output)
2470 {
2471 return jitObject.execute(input, start, length, output);
2472 }
2473
2474 }}
2475
2476 #endif