]> git.saurik.com Git - apple/javascriptcore.git/blob - yarr/YarrInterpreter.cpp
903c6922afc8b34e5bb0605d1fbb80dabfbb148c
[apple/javascriptcore.git] / yarr / YarrInterpreter.cpp
1 /*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "config.h"
28 #include "YarrInterpreter.h"
29
30 #include "Yarr.h"
31 #include <wtf/BumpPointerAllocator.h>
32
33 #ifndef NDEBUG
34 #include <stdio.h>
35 #endif
36
37 using namespace WTF;
38
39 namespace JSC { namespace Yarr {
40
41 class Interpreter {
42 public:
43 struct ParenthesesDisjunctionContext;
44
45 struct BackTrackInfoPatternCharacter {
46 uintptr_t matchAmount;
47 };
48 struct BackTrackInfoCharacterClass {
49 uintptr_t matchAmount;
50 };
51 struct BackTrackInfoBackReference {
52 uintptr_t begin; // Not really needed for greedy quantifiers.
53 uintptr_t matchAmount; // Not really needed for fixed quantifiers.
54 };
55 struct BackTrackInfoAlternative {
56 uintptr_t offset;
57 };
58 struct BackTrackInfoParentheticalAssertion {
59 uintptr_t begin;
60 };
61 struct BackTrackInfoParenthesesOnce {
62 uintptr_t begin;
63 };
64 struct BackTrackInfoParenthesesTerminal {
65 uintptr_t begin;
66 };
67 struct BackTrackInfoParentheses {
68 uintptr_t matchAmount;
69 ParenthesesDisjunctionContext* lastContext;
70 };
71
72 static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
73 {
74 context->next = backTrack->lastContext;
75 backTrack->lastContext = context;
76 ++backTrack->matchAmount;
77 }
78
79 static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
80 {
81 ASSERT(backTrack->matchAmount);
82 ASSERT(backTrack->lastContext);
83 backTrack->lastContext = backTrack->lastContext->next;
84 --backTrack->matchAmount;
85 }
86
87 struct DisjunctionContext
88 {
89 DisjunctionContext()
90 : term(0)
91 {
92 }
93
94 void* operator new(size_t, void* where)
95 {
96 return where;
97 }
98
99 int term;
100 unsigned matchBegin;
101 unsigned matchEnd;
102 uintptr_t frame[1];
103 };
104
105 DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
106 {
107 size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
108 allocatorPool = allocatorPool->ensureCapacity(size);
109 if (!allocatorPool)
110 CRASH();
111 return new(allocatorPool->alloc(size)) DisjunctionContext();
112 }
113
114 void freeDisjunctionContext(DisjunctionContext* context)
115 {
116 allocatorPool = allocatorPool->dealloc(context);
117 }
118
119 struct ParenthesesDisjunctionContext
120 {
121 ParenthesesDisjunctionContext(int* output, ByteTerm& term)
122 : next(0)
123 {
124 unsigned firstSubpatternId = term.atom.subpatternId;
125 unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
126
127 for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
128 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
129 output[(firstSubpatternId << 1) + i] = -1;
130 }
131
132 new(getDisjunctionContext(term)) DisjunctionContext();
133 }
134
135 void* operator new(size_t, void* where)
136 {
137 return where;
138 }
139
140 void restoreOutput(int* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
141 {
142 for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
143 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
144 }
145
146 DisjunctionContext* getDisjunctionContext(ByteTerm& term)
147 {
148 return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
149 }
150
151 ParenthesesDisjunctionContext* next;
152 int subpatternBackup[1];
153 };
154
155 ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, int* output, ByteTerm& term)
156 {
157 size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(int) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(int) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
158 allocatorPool = allocatorPool->ensureCapacity(size);
159 if (!allocatorPool)
160 CRASH();
161 return new(allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
162 }
163
164 void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
165 {
166 allocatorPool = allocatorPool->dealloc(context);
167 }
168
169 class InputStream {
170 public:
171 InputStream(const UChar* input, unsigned start, unsigned length)
172 : input(input)
173 , pos(start)
174 , length(length)
175 {
176 }
177
178 void next()
179 {
180 ++pos;
181 }
182
183 void rewind(unsigned amount)
184 {
185 ASSERT(pos >= amount);
186 pos -= amount;
187 }
188
189 int read()
190 {
191 ASSERT(pos < length);
192 if (pos < length)
193 return input[pos];
194 return -1;
195 }
196
197 int readPair()
198 {
199 ASSERT(pos + 1 < length);
200 return input[pos] | input[pos + 1] << 16;
201 }
202
203 int readChecked(int position)
204 {
205 ASSERT(position < 0);
206 ASSERT(static_cast<unsigned>(-position) <= pos);
207 unsigned p = pos + position;
208 ASSERT(p < length);
209 return input[p];
210 }
211
212 int reread(unsigned from)
213 {
214 ASSERT(from < length);
215 return input[from];
216 }
217
218 int prev()
219 {
220 ASSERT(!(pos > length));
221 if (pos && length)
222 return input[pos - 1];
223 return -1;
224 }
225
226 unsigned getPos()
227 {
228 return pos;
229 }
230
231 void setPos(unsigned p)
232 {
233 pos = p;
234 }
235
236 bool atStart()
237 {
238 return pos == 0;
239 }
240
241 bool atEnd()
242 {
243 return pos == length;
244 }
245
246 unsigned end()
247 {
248 return length;
249 }
250
251 bool checkInput(int count)
252 {
253 if ((pos + count) <= length) {
254 pos += count;
255 return true;
256 }
257 return false;
258 }
259
260 void uncheckInput(int count)
261 {
262 pos -= count;
263 }
264
265 bool atStart(int position)
266 {
267 return (pos + position) == 0;
268 }
269
270 bool atEnd(int position)
271 {
272 return (pos + position) == length;
273 }
274
275 bool isNotAvailableInput(int position)
276 {
277 return (pos + position) > length;
278 }
279
280 private:
281 const UChar* input;
282 unsigned pos;
283 unsigned length;
284 };
285
286 bool testCharacterClass(CharacterClass* characterClass, int ch)
287 {
288 if (ch & 0xFF80) {
289 for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
290 if (ch == characterClass->m_matchesUnicode[i])
291 return true;
292 for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
293 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
294 return true;
295 } else {
296 for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
297 if (ch == characterClass->m_matches[i])
298 return true;
299 for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
300 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
301 return true;
302 }
303
304 return false;
305 }
306
307 bool checkCharacter(int testChar, int inputPosition)
308 {
309 return testChar == input.readChecked(inputPosition);
310 }
311
312 bool checkCasedCharacter(int loChar, int hiChar, int inputPosition)
313 {
314 int ch = input.readChecked(inputPosition);
315 return (loChar == ch) || (hiChar == ch);
316 }
317
318 bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition)
319 {
320 bool match = testCharacterClass(characterClass, input.readChecked(inputPosition));
321 return invert ? !match : match;
322 }
323
324 bool tryConsumeBackReference(int matchBegin, int matchEnd, int inputOffset)
325 {
326 int matchSize = matchEnd - matchBegin;
327
328 if (!input.checkInput(matchSize))
329 return false;
330
331 if (pattern->m_ignoreCase) {
332 for (int i = 0; i < matchSize; ++i) {
333 int ch = input.reread(matchBegin + i);
334
335 int lo = Unicode::toLower(ch);
336 int hi = Unicode::toUpper(ch);
337
338 if ((lo != hi) ? (!checkCasedCharacter(lo, hi, inputOffset - matchSize + i)) : (!checkCharacter(ch, inputOffset - matchSize + i))) {
339 input.uncheckInput(matchSize);
340 return false;
341 }
342 }
343 } else {
344 for (int i = 0; i < matchSize; ++i) {
345 if (!checkCharacter(input.reread(matchBegin + i), inputOffset - matchSize + i)) {
346 input.uncheckInput(matchSize);
347 return false;
348 }
349 }
350 }
351
352 return true;
353 }
354
355 bool matchAssertionBOL(ByteTerm& term)
356 {
357 return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition - 1)));
358 }
359
360 bool matchAssertionEOL(ByteTerm& term)
361 {
362 if (term.inputPosition)
363 return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
364
365 return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
366 }
367
368 bool matchAssertionWordBoundary(ByteTerm& term)
369 {
370 bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition - 1));
371 bool readIsWordchar;
372 if (term.inputPosition)
373 readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
374 else
375 readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
376
377 bool wordBoundary = prevIsWordchar != readIsWordchar;
378 return term.invert() ? !wordBoundary : wordBoundary;
379 }
380
381 bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
382 {
383 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
384
385 switch (term.atom.quantityType) {
386 case QuantifierFixedCount:
387 break;
388
389 case QuantifierGreedy:
390 if (backTrack->matchAmount) {
391 --backTrack->matchAmount;
392 input.uncheckInput(1);
393 return true;
394 }
395 break;
396
397 case QuantifierNonGreedy:
398 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
399 ++backTrack->matchAmount;
400 if (checkCharacter(term.atom.patternCharacter, term.inputPosition - 1))
401 return true;
402 }
403 input.uncheckInput(backTrack->matchAmount);
404 break;
405 }
406
407 return false;
408 }
409
410 bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
411 {
412 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
413
414 switch (term.atom.quantityType) {
415 case QuantifierFixedCount:
416 break;
417
418 case QuantifierGreedy:
419 if (backTrack->matchAmount) {
420 --backTrack->matchAmount;
421 input.uncheckInput(1);
422 return true;
423 }
424 break;
425
426 case QuantifierNonGreedy:
427 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
428 ++backTrack->matchAmount;
429 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition - 1))
430 return true;
431 }
432 input.uncheckInput(backTrack->matchAmount);
433 break;
434 }
435
436 return false;
437 }
438
439 bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
440 {
441 ASSERT(term.type == ByteTerm::TypeCharacterClass);
442 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
443
444 switch (term.atom.quantityType) {
445 case QuantifierFixedCount: {
446 for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
447 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + matchAmount))
448 return false;
449 }
450 return true;
451 }
452
453 case QuantifierGreedy: {
454 unsigned matchAmount = 0;
455 while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
456 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1)) {
457 input.uncheckInput(1);
458 break;
459 }
460 ++matchAmount;
461 }
462 backTrack->matchAmount = matchAmount;
463
464 return true;
465 }
466
467 case QuantifierNonGreedy:
468 backTrack->matchAmount = 0;
469 return true;
470 }
471
472 ASSERT_NOT_REACHED();
473 return false;
474 }
475
476 bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
477 {
478 ASSERT(term.type == ByteTerm::TypeCharacterClass);
479 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
480
481 switch (term.atom.quantityType) {
482 case QuantifierFixedCount:
483 break;
484
485 case QuantifierGreedy:
486 if (backTrack->matchAmount) {
487 --backTrack->matchAmount;
488 input.uncheckInput(1);
489 return true;
490 }
491 break;
492
493 case QuantifierNonGreedy:
494 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
495 ++backTrack->matchAmount;
496 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1))
497 return true;
498 }
499 input.uncheckInput(backTrack->matchAmount);
500 break;
501 }
502
503 return false;
504 }
505
506 bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
507 {
508 ASSERT(term.type == ByteTerm::TypeBackReference);
509 BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
510
511 int matchBegin = output[(term.atom.subpatternId << 1)];
512 int matchEnd = output[(term.atom.subpatternId << 1) + 1];
513
514 // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
515 // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
516 // Eg.: /(a\1)/
517 if (matchEnd == -1)
518 return true;
519
520 ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
521
522 if (matchBegin == matchEnd)
523 return true;
524
525 switch (term.atom.quantityType) {
526 case QuantifierFixedCount: {
527 backTrack->begin = input.getPos();
528 for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
529 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
530 input.setPos(backTrack->begin);
531 return false;
532 }
533 }
534 return true;
535 }
536
537 case QuantifierGreedy: {
538 unsigned matchAmount = 0;
539 while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
540 ++matchAmount;
541 backTrack->matchAmount = matchAmount;
542 return true;
543 }
544
545 case QuantifierNonGreedy:
546 backTrack->begin = input.getPos();
547 backTrack->matchAmount = 0;
548 return true;
549 }
550
551 ASSERT_NOT_REACHED();
552 return false;
553 }
554
555 bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
556 {
557 ASSERT(term.type == ByteTerm::TypeBackReference);
558 BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
559
560 int matchBegin = output[(term.atom.subpatternId << 1)];
561 int matchEnd = output[(term.atom.subpatternId << 1) + 1];
562 ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
563
564 if (matchBegin == matchEnd)
565 return false;
566
567 switch (term.atom.quantityType) {
568 case QuantifierFixedCount:
569 // for quantityCount == 1, could rewind.
570 input.setPos(backTrack->begin);
571 break;
572
573 case QuantifierGreedy:
574 if (backTrack->matchAmount) {
575 --backTrack->matchAmount;
576 input.rewind(matchEnd - matchBegin);
577 return true;
578 }
579 break;
580
581 case QuantifierNonGreedy:
582 if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
583 ++backTrack->matchAmount;
584 return true;
585 }
586 input.setPos(backTrack->begin);
587 break;
588 }
589
590 return false;
591 }
592
593 void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
594 {
595 if (term.capture()) {
596 unsigned subpatternId = term.atom.subpatternId;
597 output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
598 output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
599 }
600 }
601 void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
602 {
603 unsigned firstSubpatternId = term.atom.subpatternId;
604 unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
605 context->restoreOutput(output, firstSubpatternId, count);
606 }
607 JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
608 {
609 while (backTrack->matchAmount) {
610 ParenthesesDisjunctionContext* context = backTrack->lastContext;
611
612 JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
613 if (result == JSRegExpMatch)
614 return JSRegExpMatch;
615
616 resetMatches(term, context);
617 popParenthesesDisjunctionContext(backTrack);
618 freeParenthesesDisjunctionContext(context);
619
620 if (result != JSRegExpNoMatch)
621 return result;
622 }
623
624 return JSRegExpNoMatch;
625 }
626
627 bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
628 {
629 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
630 ASSERT(term.atom.quantityCount == 1);
631
632 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
633
634 switch (term.atom.quantityType) {
635 case QuantifierGreedy: {
636 // set this speculatively; if we get to the parens end this will be true.
637 backTrack->begin = input.getPos();
638 break;
639 }
640 case QuantifierNonGreedy: {
641 backTrack->begin = notFound;
642 context->term += term.atom.parenthesesWidth;
643 return true;
644 }
645 case QuantifierFixedCount:
646 break;
647 }
648
649 if (term.capture()) {
650 unsigned subpatternId = term.atom.subpatternId;
651 output[(subpatternId << 1)] = input.getPos() + term.inputPosition;
652 }
653
654 return true;
655 }
656
657 bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
658 {
659 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
660 ASSERT(term.atom.quantityCount == 1);
661
662 if (term.capture()) {
663 unsigned subpatternId = term.atom.subpatternId;
664 output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
665 }
666
667 if (term.atom.quantityType == QuantifierFixedCount)
668 return true;
669
670 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
671 return backTrack->begin != input.getPos();
672 }
673
674 bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
675 {
676 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
677 ASSERT(term.atom.quantityCount == 1);
678
679 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
680
681 if (term.capture()) {
682 unsigned subpatternId = term.atom.subpatternId;
683 output[(subpatternId << 1)] = -1;
684 output[(subpatternId << 1) + 1] = -1;
685 }
686
687 switch (term.atom.quantityType) {
688 case QuantifierGreedy:
689 // if we backtrack to this point, there is another chance - try matching nothing.
690 ASSERT(backTrack->begin != notFound);
691 backTrack->begin = notFound;
692 context->term += term.atom.parenthesesWidth;
693 return true;
694 case QuantifierNonGreedy:
695 ASSERT(backTrack->begin != notFound);
696 case QuantifierFixedCount:
697 break;
698 }
699
700 return false;
701 }
702
703 bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
704 {
705 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
706 ASSERT(term.atom.quantityCount == 1);
707
708 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
709
710 switch (term.atom.quantityType) {
711 case QuantifierGreedy:
712 if (backTrack->begin == notFound) {
713 context->term -= term.atom.parenthesesWidth;
714 return false;
715 }
716 case QuantifierNonGreedy:
717 if (backTrack->begin == notFound) {
718 backTrack->begin = input.getPos();
719 if (term.capture()) {
720 // Technically this access to inputPosition should be accessing the begin term's
721 // inputPosition, but for repeats other than fixed these values should be
722 // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
723 ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
724 ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
725 unsigned subpatternId = term.atom.subpatternId;
726 output[subpatternId << 1] = input.getPos() + term.inputPosition;
727 }
728 context->term -= term.atom.parenthesesWidth;
729 return true;
730 }
731 case QuantifierFixedCount:
732 break;
733 }
734
735 return false;
736 }
737
738 bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
739 {
740 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
741 ASSERT(term.atom.quantityType == QuantifierGreedy);
742 ASSERT(term.atom.quantityCount == quantifyInfinite);
743 ASSERT(!term.capture());
744
745 BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
746 backTrack->begin = input.getPos();
747 return true;
748 }
749
750 bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
751 {
752 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
753
754 BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
755 // Empty match is a failed match.
756 if (backTrack->begin == input.getPos())
757 return false;
758
759 // Successful match! Okay, what's next? - loop around and try to match moar!
760 context->term -= (term.atom.parenthesesWidth + 1);
761 return true;
762 }
763
764 bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
765 {
766 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
767 ASSERT(term.atom.quantityType == QuantifierGreedy);
768 ASSERT(term.atom.quantityCount == quantifyInfinite);
769 ASSERT(!term.capture());
770
771 // If we backtrack to this point, we have failed to match this iteration of the parens.
772 // Since this is greedy / zero minimum a failed is also accepted as a match!
773 context->term += term.atom.parenthesesWidth;
774 return true;
775 }
776
777 bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
778 {
779 // 'Terminal' parentheses are at the end of the regex, and as such a match past end
780 // should always be returned as a successful match - we should never backtrack to here.
781 ASSERT_NOT_REACHED();
782 return false;
783 }
784
785 bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
786 {
787 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
788 ASSERT(term.atom.quantityCount == 1);
789
790 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
791
792 backTrack->begin = input.getPos();
793 return true;
794 }
795
796 bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
797 {
798 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
799 ASSERT(term.atom.quantityCount == 1);
800
801 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
802
803 input.setPos(backTrack->begin);
804
805 // We've reached the end of the parens; if they are inverted, this is failure.
806 if (term.invert()) {
807 context->term -= term.atom.parenthesesWidth;
808 return false;
809 }
810
811 return true;
812 }
813
814 bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
815 {
816 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
817 ASSERT(term.atom.quantityCount == 1);
818
819 // We've failed to match parens; if they are inverted, this is win!
820 if (term.invert()) {
821 context->term += term.atom.parenthesesWidth;
822 return true;
823 }
824
825 return false;
826 }
827
828 bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
829 {
830 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
831 ASSERT(term.atom.quantityCount == 1);
832
833 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
834
835 input.setPos(backTrack->begin);
836
837 context->term -= term.atom.parenthesesWidth;
838 return false;
839 }
840
841 JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
842 {
843 ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
844
845 BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
846 ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
847
848 backTrack->matchAmount = 0;
849 backTrack->lastContext = 0;
850
851 switch (term.atom.quantityType) {
852 case QuantifierFixedCount: {
853 // While we haven't yet reached our fixed limit,
854 while (backTrack->matchAmount < term.atom.quantityCount) {
855 // Try to do a match, and it it succeeds, add it to the list.
856 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
857 JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
858 if (result == JSRegExpMatch)
859 appendParenthesesDisjunctionContext(backTrack, context);
860 else {
861 // The match failed; try to find an alternate point to carry on from.
862 resetMatches(term, context);
863 freeParenthesesDisjunctionContext(context);
864
865 if (result == JSRegExpNoMatch) {
866 JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
867 if (backtrackResult != JSRegExpMatch)
868 return backtrackResult;
869 } else
870 return result;
871 }
872 }
873
874 ASSERT(backTrack->matchAmount == term.atom.quantityCount);
875 ParenthesesDisjunctionContext* context = backTrack->lastContext;
876 recordParenthesesMatch(term, context);
877 return JSRegExpMatch;
878 }
879
880 case QuantifierGreedy: {
881 while (backTrack->matchAmount < term.atom.quantityCount) {
882 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
883 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
884 if (result == JSRegExpMatch)
885 appendParenthesesDisjunctionContext(backTrack, context);
886 else {
887 resetMatches(term, context);
888 freeParenthesesDisjunctionContext(context);
889
890 if (result != JSRegExpNoMatch)
891 return result;
892
893 break;
894 }
895 }
896
897 if (backTrack->matchAmount) {
898 ParenthesesDisjunctionContext* context = backTrack->lastContext;
899 recordParenthesesMatch(term, context);
900 }
901 return JSRegExpMatch;
902 }
903
904 case QuantifierNonGreedy:
905 return JSRegExpMatch;
906 }
907
908 ASSERT_NOT_REACHED();
909 return JSRegExpErrorNoMatch;
910 }
911
912 // Rules for backtracking differ depending on whether this is greedy or non-greedy.
913 //
914 // Greedy matches never should try just adding more - you should already have done
915 // the 'more' cases. Always backtrack, at least a leetle bit. However cases where
916 // you backtrack an item off the list needs checking, since we'll never have matched
917 // the one less case. Tracking forwards, still add as much as possible.
918 //
919 // Non-greedy, we've already done the one less case, so don't match on popping.
920 // We haven't done the one more case, so always try to add that.
921 //
922 JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
923 {
924 ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
925
926 BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
927 ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
928
929 switch (term.atom.quantityType) {
930 case QuantifierFixedCount: {
931 ASSERT(backTrack->matchAmount == term.atom.quantityCount);
932
933 ParenthesesDisjunctionContext* context = 0;
934 JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
935
936 if (result != JSRegExpMatch)
937 return result;
938
939 // While we haven't yet reached our fixed limit,
940 while (backTrack->matchAmount < term.atom.quantityCount) {
941 // Try to do a match, and it it succeeds, add it to the list.
942 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
943 result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
944
945 if (result == JSRegExpMatch)
946 appendParenthesesDisjunctionContext(backTrack, context);
947 else {
948 // The match failed; try to find an alternate point to carry on from.
949 resetMatches(term, context);
950 freeParenthesesDisjunctionContext(context);
951
952 if (result == JSRegExpNoMatch) {
953 JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
954 if (backtrackResult != JSRegExpMatch)
955 return backtrackResult;
956 } else
957 return result;
958 }
959 }
960
961 ASSERT(backTrack->matchAmount == term.atom.quantityCount);
962 context = backTrack->lastContext;
963 recordParenthesesMatch(term, context);
964 return JSRegExpMatch;
965 }
966
967 case QuantifierGreedy: {
968 if (!backTrack->matchAmount)
969 return JSRegExpNoMatch;
970
971 ParenthesesDisjunctionContext* context = backTrack->lastContext;
972 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
973 if (result == JSRegExpMatch) {
974 while (backTrack->matchAmount < term.atom.quantityCount) {
975 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
976 JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
977 if (parenthesesResult == JSRegExpMatch)
978 appendParenthesesDisjunctionContext(backTrack, context);
979 else {
980 resetMatches(term, context);
981 freeParenthesesDisjunctionContext(context);
982
983 if (parenthesesResult != JSRegExpNoMatch)
984 return parenthesesResult;
985
986 break;
987 }
988 }
989 } else {
990 resetMatches(term, context);
991 popParenthesesDisjunctionContext(backTrack);
992 freeParenthesesDisjunctionContext(context);
993
994 if (result != JSRegExpNoMatch)
995 return result;
996 }
997
998 if (backTrack->matchAmount) {
999 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1000 recordParenthesesMatch(term, context);
1001 }
1002 return JSRegExpMatch;
1003 }
1004
1005 case QuantifierNonGreedy: {
1006 // If we've not reached the limit, try to add one more match.
1007 if (backTrack->matchAmount < term.atom.quantityCount) {
1008 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1009 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1010 if (result == JSRegExpMatch) {
1011 appendParenthesesDisjunctionContext(backTrack, context);
1012 recordParenthesesMatch(term, context);
1013 return JSRegExpMatch;
1014 }
1015
1016 resetMatches(term, context);
1017 freeParenthesesDisjunctionContext(context);
1018
1019 if (result != JSRegExpNoMatch)
1020 return result;
1021 }
1022
1023 // Nope - okay backtrack looking for an alternative.
1024 while (backTrack->matchAmount) {
1025 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1026 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1027 if (result == JSRegExpMatch) {
1028 // successful backtrack! we're back in the game!
1029 if (backTrack->matchAmount) {
1030 context = backTrack->lastContext;
1031 recordParenthesesMatch(term, context);
1032 }
1033 return JSRegExpMatch;
1034 }
1035
1036 // pop a match off the stack
1037 resetMatches(term, context);
1038 popParenthesesDisjunctionContext(backTrack);
1039 freeParenthesesDisjunctionContext(context);
1040
1041 return result;
1042 }
1043
1044 return JSRegExpNoMatch;
1045 }
1046 }
1047
1048 ASSERT_NOT_REACHED();
1049 return JSRegExpErrorNoMatch;
1050 }
1051
1052 bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
1053 {
1054 UNUSED_PARAM(term);
1055 unsigned matchBegin = context->matchBegin;
1056
1057 if (matchBegin) {
1058 for (matchBegin--; true; matchBegin--) {
1059 if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
1060 ++matchBegin;
1061 break;
1062 }
1063
1064 if (!matchBegin)
1065 break;
1066 }
1067 }
1068
1069 unsigned matchEnd = input.getPos();
1070
1071 for (; (matchEnd != input.end())
1072 && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
1073
1074 if (((matchBegin && term.anchors.m_bol)
1075 || ((matchEnd != input.end()) && term.anchors.m_eol))
1076 && !pattern->m_multiline)
1077 return false;
1078
1079 context->matchBegin = matchBegin;
1080 context->matchEnd = matchEnd;
1081 return true;
1082 }
1083
1084 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
1085 #define BACKTRACK() { --context->term; goto backtrack; }
1086 #define currentTerm() (disjunction->terms[context->term])
1087 JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1088 {
1089 if (!--remainingMatchCount)
1090 return JSRegExpErrorHitLimit;
1091
1092 if (btrack)
1093 BACKTRACK();
1094
1095 context->matchBegin = input.getPos();
1096 context->term = 0;
1097
1098 matchAgain:
1099 ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1100
1101 switch (currentTerm().type) {
1102 case ByteTerm::TypeSubpatternBegin:
1103 MATCH_NEXT();
1104 case ByteTerm::TypeSubpatternEnd:
1105 context->matchEnd = input.getPos();
1106 return JSRegExpMatch;
1107
1108 case ByteTerm::TypeBodyAlternativeBegin:
1109 MATCH_NEXT();
1110 case ByteTerm::TypeBodyAlternativeDisjunction:
1111 case ByteTerm::TypeBodyAlternativeEnd:
1112 context->matchEnd = input.getPos();
1113 return JSRegExpMatch;
1114
1115 case ByteTerm::TypeAlternativeBegin:
1116 MATCH_NEXT();
1117 case ByteTerm::TypeAlternativeDisjunction:
1118 case ByteTerm::TypeAlternativeEnd: {
1119 int offset = currentTerm().alternative.end;
1120 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1121 backTrack->offset = offset;
1122 context->term += offset;
1123 MATCH_NEXT();
1124 }
1125
1126 case ByteTerm::TypeAssertionBOL:
1127 if (matchAssertionBOL(currentTerm()))
1128 MATCH_NEXT();
1129 BACKTRACK();
1130 case ByteTerm::TypeAssertionEOL:
1131 if (matchAssertionEOL(currentTerm()))
1132 MATCH_NEXT();
1133 BACKTRACK();
1134 case ByteTerm::TypeAssertionWordBoundary:
1135 if (matchAssertionWordBoundary(currentTerm()))
1136 MATCH_NEXT();
1137 BACKTRACK();
1138
1139 case ByteTerm::TypePatternCharacterOnce:
1140 case ByteTerm::TypePatternCharacterFixed: {
1141 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1142 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount))
1143 BACKTRACK();
1144 }
1145 MATCH_NEXT();
1146 }
1147 case ByteTerm::TypePatternCharacterGreedy: {
1148 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1149 unsigned matchAmount = 0;
1150 while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1151 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) {
1152 input.uncheckInput(1);
1153 break;
1154 }
1155 ++matchAmount;
1156 }
1157 backTrack->matchAmount = matchAmount;
1158
1159 MATCH_NEXT();
1160 }
1161 case ByteTerm::TypePatternCharacterNonGreedy: {
1162 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1163 backTrack->matchAmount = 0;
1164 MATCH_NEXT();
1165 }
1166
1167 case ByteTerm::TypePatternCasedCharacterOnce:
1168 case ByteTerm::TypePatternCasedCharacterFixed: {
1169 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1170 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount))
1171 BACKTRACK();
1172 }
1173 MATCH_NEXT();
1174 }
1175 case ByteTerm::TypePatternCasedCharacterGreedy: {
1176 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1177 unsigned matchAmount = 0;
1178 while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1179 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) {
1180 input.uncheckInput(1);
1181 break;
1182 }
1183 ++matchAmount;
1184 }
1185 backTrack->matchAmount = matchAmount;
1186
1187 MATCH_NEXT();
1188 }
1189 case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1190 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1191 backTrack->matchAmount = 0;
1192 MATCH_NEXT();
1193 }
1194
1195 case ByteTerm::TypeCharacterClass:
1196 if (matchCharacterClass(currentTerm(), context))
1197 MATCH_NEXT();
1198 BACKTRACK();
1199 case ByteTerm::TypeBackReference:
1200 if (matchBackReference(currentTerm(), context))
1201 MATCH_NEXT();
1202 BACKTRACK();
1203 case ByteTerm::TypeParenthesesSubpattern: {
1204 JSRegExpResult result = matchParentheses(currentTerm(), context);
1205
1206 if (result == JSRegExpMatch) {
1207 MATCH_NEXT();
1208 } else if (result != JSRegExpNoMatch)
1209 return result;
1210
1211 BACKTRACK();
1212 }
1213 case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1214 if (matchParenthesesOnceBegin(currentTerm(), context))
1215 MATCH_NEXT();
1216 BACKTRACK();
1217 case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1218 if (matchParenthesesOnceEnd(currentTerm(), context))
1219 MATCH_NEXT();
1220 BACKTRACK();
1221 case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1222 if (matchParenthesesTerminalBegin(currentTerm(), context))
1223 MATCH_NEXT();
1224 BACKTRACK();
1225 case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1226 if (matchParenthesesTerminalEnd(currentTerm(), context))
1227 MATCH_NEXT();
1228 BACKTRACK();
1229 case ByteTerm::TypeParentheticalAssertionBegin:
1230 if (matchParentheticalAssertionBegin(currentTerm(), context))
1231 MATCH_NEXT();
1232 BACKTRACK();
1233 case ByteTerm::TypeParentheticalAssertionEnd:
1234 if (matchParentheticalAssertionEnd(currentTerm(), context))
1235 MATCH_NEXT();
1236 BACKTRACK();
1237
1238 case ByteTerm::TypeCheckInput:
1239 if (input.checkInput(currentTerm().checkInputCount))
1240 MATCH_NEXT();
1241 BACKTRACK();
1242
1243 case ByteTerm::TypeUncheckInput:
1244 input.uncheckInput(currentTerm().checkInputCount);
1245 MATCH_NEXT();
1246
1247 case ByteTerm::TypeDotStarEnclosure:
1248 if (matchDotStarEnclosure(currentTerm(), context))
1249 return JSRegExpMatch;
1250 BACKTRACK();
1251 }
1252
1253 // We should never fall-through to here.
1254 ASSERT_NOT_REACHED();
1255
1256 backtrack:
1257 ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1258
1259 switch (currentTerm().type) {
1260 case ByteTerm::TypeSubpatternBegin:
1261 return JSRegExpNoMatch;
1262 case ByteTerm::TypeSubpatternEnd:
1263 ASSERT_NOT_REACHED();
1264
1265 case ByteTerm::TypeBodyAlternativeBegin:
1266 case ByteTerm::TypeBodyAlternativeDisjunction: {
1267 int offset = currentTerm().alternative.next;
1268 context->term += offset;
1269 if (offset > 0)
1270 MATCH_NEXT();
1271
1272 if (input.atEnd())
1273 return JSRegExpNoMatch;
1274
1275 input.next();
1276
1277 context->matchBegin = input.getPos();
1278
1279 if (currentTerm().alternative.onceThrough)
1280 context->term += currentTerm().alternative.next;
1281
1282 MATCH_NEXT();
1283 }
1284 case ByteTerm::TypeBodyAlternativeEnd:
1285 ASSERT_NOT_REACHED();
1286
1287 case ByteTerm::TypeAlternativeBegin:
1288 case ByteTerm::TypeAlternativeDisjunction: {
1289 int offset = currentTerm().alternative.next;
1290 context->term += offset;
1291 if (offset > 0)
1292 MATCH_NEXT();
1293 BACKTRACK();
1294 }
1295 case ByteTerm::TypeAlternativeEnd: {
1296 // We should never backtrack back into an alternative of the main body of the regex.
1297 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1298 unsigned offset = backTrack->offset;
1299 context->term -= offset;
1300 BACKTRACK();
1301 }
1302
1303 case ByteTerm::TypeAssertionBOL:
1304 case ByteTerm::TypeAssertionEOL:
1305 case ByteTerm::TypeAssertionWordBoundary:
1306 BACKTRACK();
1307
1308 case ByteTerm::TypePatternCharacterOnce:
1309 case ByteTerm::TypePatternCharacterFixed:
1310 case ByteTerm::TypePatternCharacterGreedy:
1311 case ByteTerm::TypePatternCharacterNonGreedy:
1312 if (backtrackPatternCharacter(currentTerm(), context))
1313 MATCH_NEXT();
1314 BACKTRACK();
1315 case ByteTerm::TypePatternCasedCharacterOnce:
1316 case ByteTerm::TypePatternCasedCharacterFixed:
1317 case ByteTerm::TypePatternCasedCharacterGreedy:
1318 case ByteTerm::TypePatternCasedCharacterNonGreedy:
1319 if (backtrackPatternCasedCharacter(currentTerm(), context))
1320 MATCH_NEXT();
1321 BACKTRACK();
1322 case ByteTerm::TypeCharacterClass:
1323 if (backtrackCharacterClass(currentTerm(), context))
1324 MATCH_NEXT();
1325 BACKTRACK();
1326 case ByteTerm::TypeBackReference:
1327 if (backtrackBackReference(currentTerm(), context))
1328 MATCH_NEXT();
1329 BACKTRACK();
1330 case ByteTerm::TypeParenthesesSubpattern: {
1331 JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1332
1333 if (result == JSRegExpMatch) {
1334 MATCH_NEXT();
1335 } else if (result != JSRegExpNoMatch)
1336 return result;
1337
1338 BACKTRACK();
1339 }
1340 case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1341 if (backtrackParenthesesOnceBegin(currentTerm(), context))
1342 MATCH_NEXT();
1343 BACKTRACK();
1344 case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1345 if (backtrackParenthesesOnceEnd(currentTerm(), context))
1346 MATCH_NEXT();
1347 BACKTRACK();
1348 case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1349 if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1350 MATCH_NEXT();
1351 BACKTRACK();
1352 case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1353 if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1354 MATCH_NEXT();
1355 BACKTRACK();
1356 case ByteTerm::TypeParentheticalAssertionBegin:
1357 if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1358 MATCH_NEXT();
1359 BACKTRACK();
1360 case ByteTerm::TypeParentheticalAssertionEnd:
1361 if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1362 MATCH_NEXT();
1363 BACKTRACK();
1364
1365 case ByteTerm::TypeCheckInput:
1366 input.uncheckInput(currentTerm().checkInputCount);
1367 BACKTRACK();
1368
1369 case ByteTerm::TypeUncheckInput:
1370 input.checkInput(currentTerm().checkInputCount);
1371 BACKTRACK();
1372
1373 case ByteTerm::TypeDotStarEnclosure:
1374 ASSERT_NOT_REACHED();
1375 }
1376
1377 ASSERT_NOT_REACHED();
1378 return JSRegExpErrorNoMatch;
1379 }
1380
1381 JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1382 {
1383 JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1384
1385 if (result == JSRegExpMatch) {
1386 while (context->matchBegin == context->matchEnd) {
1387 result = matchDisjunction(disjunction, context, true);
1388 if (result != JSRegExpMatch)
1389 return result;
1390 }
1391 return JSRegExpMatch;
1392 }
1393
1394 return result;
1395 }
1396
1397 int interpret()
1398 {
1399 allocatorPool = pattern->m_allocator->startAllocator();
1400 if (!allocatorPool)
1401 CRASH();
1402
1403 for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i)
1404 output[i] = -1;
1405
1406 DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1407
1408 JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
1409 if (result == JSRegExpMatch) {
1410 output[0] = context->matchBegin;
1411 output[1] = context->matchEnd;
1412 }
1413
1414 freeDisjunctionContext(context);
1415
1416 pattern->m_allocator->stopAllocator();
1417
1418 // RegExp.cpp currently expects all error to be converted to -1.
1419 ASSERT((result == JSRegExpMatch) == (output[0] != -1));
1420 return output[0];
1421 }
1422
1423 Interpreter(BytecodePattern* pattern, int* output, const UChar* inputChar, unsigned start, unsigned length)
1424 : pattern(pattern)
1425 , output(output)
1426 , input(inputChar, start, length)
1427 , allocatorPool(0)
1428 , remainingMatchCount(matchLimit)
1429 {
1430 }
1431
1432 private:
1433 BytecodePattern* pattern;
1434 int* output;
1435 InputStream input;
1436 BumpPointerPool* allocatorPool;
1437 unsigned remainingMatchCount;
1438 };
1439
1440
1441
1442 class ByteCompiler {
1443 struct ParenthesesStackEntry {
1444 unsigned beginTerm;
1445 unsigned savedAlternativeIndex;
1446 ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
1447 : beginTerm(beginTerm)
1448 , savedAlternativeIndex(savedAlternativeIndex)
1449 {
1450 }
1451 };
1452
1453 public:
1454 ByteCompiler(YarrPattern& pattern)
1455 : m_pattern(pattern)
1456 {
1457 m_currentAlternativeIndex = 0;
1458 }
1459
1460 PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
1461 {
1462 regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
1463 emitDisjunction(m_pattern.m_body);
1464 regexEnd();
1465
1466 return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator));
1467 }
1468
1469 void checkInput(unsigned count)
1470 {
1471 m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
1472 }
1473
1474 void uncheckInput(unsigned count)
1475 {
1476 m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
1477 }
1478
1479 void assertionBOL(int inputPosition)
1480 {
1481 m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
1482 }
1483
1484 void assertionEOL(int inputPosition)
1485 {
1486 m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
1487 }
1488
1489 void assertionWordBoundary(bool invert, int inputPosition)
1490 {
1491 m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1492 }
1493
1494 void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1495 {
1496 if (m_pattern.m_ignoreCase) {
1497 UChar lo = Unicode::toLower(ch);
1498 UChar hi = Unicode::toUpper(ch);
1499
1500 if (lo != hi) {
1501 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
1502 return;
1503 }
1504 }
1505
1506 m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
1507 }
1508
1509 void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1510 {
1511 m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
1512
1513 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
1514 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1515 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1516 }
1517
1518 void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1519 {
1520 ASSERT(subpatternId);
1521
1522 m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1523
1524 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
1525 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1526 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1527 }
1528
1529 void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1530 {
1531 int beginTerm = m_bodyDisjunction->terms.size();
1532
1533 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1534 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1535 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1536 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1537
1538 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1539 m_currentAlternativeIndex = beginTerm + 1;
1540 }
1541
1542 void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1543 {
1544 int beginTerm = m_bodyDisjunction->terms.size();
1545
1546 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1547 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1548 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1549 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1550
1551 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1552 m_currentAlternativeIndex = beginTerm + 1;
1553 }
1554
1555 void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1556 {
1557 // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1558 // then fix this up at the end! - simplifying this should make it much clearer.
1559 // https://bugs.webkit.org/show_bug.cgi?id=50136
1560
1561 int beginTerm = m_bodyDisjunction->terms.size();
1562
1563 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1564 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1565 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1566 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1567
1568 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1569 m_currentAlternativeIndex = beginTerm + 1;
1570 }
1571
1572 void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1573 {
1574 int beginTerm = m_bodyDisjunction->terms.size();
1575
1576 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
1577 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1578 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1579 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1580
1581 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1582 m_currentAlternativeIndex = beginTerm + 1;
1583 }
1584
1585 void atomParentheticalAssertionEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1586 {
1587 unsigned beginTerm = popParenthesesStack();
1588 closeAlternative(beginTerm + 1);
1589 unsigned endTerm = m_bodyDisjunction->terms.size();
1590
1591 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1592
1593 bool invert = m_bodyDisjunction->terms[beginTerm].invert();
1594 unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1595
1596 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1597 m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1598 m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1599 m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1600
1601 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
1602 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1603 m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
1604 m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1605 }
1606
1607 void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
1608 {
1609 m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
1610 }
1611
1612 unsigned popParenthesesStack()
1613 {
1614 ASSERT(m_parenthesesStack.size());
1615 int stackEnd = m_parenthesesStack.size() - 1;
1616 unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
1617 m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
1618 m_parenthesesStack.shrink(stackEnd);
1619
1620 ASSERT(beginTerm < m_bodyDisjunction->terms.size());
1621 ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
1622
1623 return beginTerm;
1624 }
1625
1626 #ifndef NDEBUG
1627 void dumpDisjunction(ByteDisjunction* disjunction)
1628 {
1629 printf("ByteDisjunction(%p):\n\t", disjunction);
1630 for (unsigned i = 0; i < disjunction->terms.size(); ++i)
1631 printf("{ %d } ", disjunction->terms[i].type);
1632 printf("\n");
1633 }
1634 #endif
1635
1636 void closeAlternative(int beginTerm)
1637 {
1638 int origBeginTerm = beginTerm;
1639 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1640 int endIndex = m_bodyDisjunction->terms.size();
1641
1642 unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1643
1644 if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
1645 m_bodyDisjunction->terms.remove(beginTerm);
1646 else {
1647 while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1648 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1649 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1650 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1651 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1652 }
1653
1654 m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1655
1656 m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
1657 m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1658 }
1659 }
1660
1661 void closeBodyAlternative()
1662 {
1663 int beginTerm = 0;
1664 int origBeginTerm = 0;
1665 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1666 int endIndex = m_bodyDisjunction->terms.size();
1667
1668 unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1669
1670 while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1671 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1672 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1673 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1674 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1675 }
1676
1677 m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1678
1679 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
1680 m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1681 }
1682
1683 void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
1684 {
1685 unsigned beginTerm = popParenthesesStack();
1686 closeAlternative(beginTerm + 1);
1687 unsigned endTerm = m_bodyDisjunction->terms.size();
1688
1689 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1690
1691 ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
1692
1693 bool capture = parenthesesBegin.capture();
1694 unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1695
1696 unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
1697 ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
1698
1699 parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
1700 for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
1701 parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
1702 parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
1703
1704 m_bodyDisjunction->terms.shrink(beginTerm);
1705
1706 m_allParenthesesInfo.append(parenthesesDisjunction);
1707 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition));
1708
1709 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
1710 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1711 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1712 }
1713
1714 void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1715 {
1716 unsigned beginTerm = popParenthesesStack();
1717 closeAlternative(beginTerm + 1);
1718 unsigned endTerm = m_bodyDisjunction->terms.size();
1719
1720 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1721
1722 bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1723 unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1724
1725 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1726 m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1727 m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1728 m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1729
1730 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
1731 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1732 m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
1733 m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1734 }
1735
1736 void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1737 {
1738 unsigned beginTerm = popParenthesesStack();
1739 closeAlternative(beginTerm + 1);
1740 unsigned endTerm = m_bodyDisjunction->terms.size();
1741
1742 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1743
1744 bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1745 unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1746
1747 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1748 m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1749 m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1750 m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1751
1752 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
1753 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1754 m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
1755 m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1756 }
1757
1758 void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
1759 {
1760 m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
1761 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
1762 m_bodyDisjunction->terms[0].frameLocation = 0;
1763 m_currentAlternativeIndex = 0;
1764 }
1765
1766 void regexEnd()
1767 {
1768 closeBodyAlternative();
1769 }
1770
1771 void alternativeBodyDisjunction(bool onceThrough)
1772 {
1773 int newAlternativeIndex = m_bodyDisjunction->terms.size();
1774 m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1775 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
1776
1777 m_currentAlternativeIndex = newAlternativeIndex;
1778 }
1779
1780 void alternativeDisjunction()
1781 {
1782 int newAlternativeIndex = m_bodyDisjunction->terms.size();
1783 m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1784 m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
1785
1786 m_currentAlternativeIndex = newAlternativeIndex;
1787 }
1788
1789 void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
1790 {
1791 for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
1792 unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
1793
1794 PatternAlternative* alternative = disjunction->m_alternatives[alt];
1795
1796 if (alt) {
1797 if (disjunction == m_pattern.m_body)
1798 alternativeBodyDisjunction(alternative->onceThrough());
1799 else
1800 alternativeDisjunction();
1801 }
1802
1803 unsigned minimumSize = alternative->m_minimumSize;
1804 ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
1805 unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
1806
1807 if (countToCheck) {
1808 checkInput(countToCheck);
1809 currentCountAlreadyChecked += countToCheck;
1810 }
1811
1812 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
1813 PatternTerm& term = alternative->m_terms[i];
1814
1815 switch (term.type) {
1816 case PatternTerm::TypeAssertionBOL:
1817 assertionBOL(term.inputPosition - currentCountAlreadyChecked);
1818 break;
1819
1820 case PatternTerm::TypeAssertionEOL:
1821 assertionEOL(term.inputPosition - currentCountAlreadyChecked);
1822 break;
1823
1824 case PatternTerm::TypeAssertionWordBoundary:
1825 assertionWordBoundary(term.invert(), term.inputPosition - currentCountAlreadyChecked);
1826 break;
1827
1828 case PatternTerm::TypePatternCharacter:
1829 atomPatternCharacter(term.patternCharacter, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
1830 break;
1831
1832 case PatternTerm::TypeCharacterClass:
1833 atomCharacterClass(term.characterClass, term.invert(), term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
1834 break;
1835
1836 case PatternTerm::TypeBackReference:
1837 atomBackReference(term.backReferenceSubpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
1838 break;
1839
1840 case PatternTerm::TypeForwardReference:
1841 break;
1842
1843 case PatternTerm::TypeParenthesesSubpattern: {
1844 unsigned disjunctionAlreadyCheckedCount = 0;
1845 if (term.quantityCount == 1 && !term.parentheses.isCopy) {
1846 unsigned alternativeFrameLocation = term.frameLocation;
1847 // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
1848 if (term.quantityType == QuantifierFixedCount)
1849 disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
1850 else
1851 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1852 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1853 atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, alternativeFrameLocation);
1854 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1855 atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1856 } else if (term.parentheses.isTerminal) {
1857 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1858 atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
1859 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1860 atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1861 } else {
1862 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1863 atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
1864 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
1865 atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
1866 }
1867 break;
1868 }
1869
1870 case PatternTerm::TypeParentheticalAssertion: {
1871 unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
1872
1873 ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
1874 unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition);
1875 unsigned uncheckAmount = 0;
1876 if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
1877 uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
1878 uncheckInput(uncheckAmount);
1879 currentCountAlreadyChecked -= uncheckAmount;
1880 }
1881
1882 atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
1883 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
1884 atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
1885 if (uncheckAmount) {
1886 checkInput(uncheckAmount);
1887 currentCountAlreadyChecked += uncheckAmount;
1888 }
1889 break;
1890 }
1891
1892 case PatternTerm::TypeDotStarEnclosure:
1893 assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
1894 break;
1895 }
1896 }
1897 }
1898 }
1899
1900 private:
1901 YarrPattern& m_pattern;
1902 OwnPtr<ByteDisjunction> m_bodyDisjunction;
1903 unsigned m_currentAlternativeIndex;
1904 Vector<ParenthesesStackEntry> m_parenthesesStack;
1905 Vector<ByteDisjunction*> m_allParenthesesInfo;
1906 };
1907
1908 PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
1909 {
1910 return ByteCompiler(pattern).compile(allocator);
1911 }
1912
1913 int interpret(BytecodePattern* bytecode, const UChar* input, unsigned start, unsigned length, int* output)
1914 {
1915 return Interpreter(bytecode, output, input, start, length).interpret();
1916 }
1917
1918 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
1919 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
1920 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
1921 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
1922 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
1923 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
1924 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
1925
1926
1927 } }