2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "YarrInterpreter.h"
31 #include <wtf/BumpPointerAllocator.h>
39 namespace JSC
{ namespace Yarr
{
43 struct ParenthesesDisjunctionContext
;
45 struct BackTrackInfoPatternCharacter
{
46 uintptr_t matchAmount
;
48 struct BackTrackInfoCharacterClass
{
49 uintptr_t matchAmount
;
51 struct BackTrackInfoBackReference
{
52 uintptr_t begin
; // Not really needed for greedy quantifiers.
53 uintptr_t matchAmount
; // Not really needed for fixed quantifiers.
55 struct BackTrackInfoAlternative
{
58 struct BackTrackInfoParentheticalAssertion
{
61 struct BackTrackInfoParenthesesOnce
{
64 struct BackTrackInfoParenthesesTerminal
{
67 struct BackTrackInfoParentheses
{
68 uintptr_t matchAmount
;
69 ParenthesesDisjunctionContext
* lastContext
;
72 static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses
* backTrack
, ParenthesesDisjunctionContext
* context
)
74 context
->next
= backTrack
->lastContext
;
75 backTrack
->lastContext
= context
;
76 ++backTrack
->matchAmount
;
79 static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses
* backTrack
)
81 ASSERT(backTrack
->matchAmount
);
82 ASSERT(backTrack
->lastContext
);
83 backTrack
->lastContext
= backTrack
->lastContext
->next
;
84 --backTrack
->matchAmount
;
87 struct DisjunctionContext
94 void* operator new(size_t, void* where
)
105 DisjunctionContext
* allocDisjunctionContext(ByteDisjunction
* disjunction
)
107 size_t size
= sizeof(DisjunctionContext
) - sizeof(uintptr_t) + disjunction
->m_frameSize
* sizeof(uintptr_t);
108 allocatorPool
= allocatorPool
->ensureCapacity(size
);
111 return new(allocatorPool
->alloc(size
)) DisjunctionContext();
114 void freeDisjunctionContext(DisjunctionContext
* context
)
116 allocatorPool
= allocatorPool
->dealloc(context
);
119 struct ParenthesesDisjunctionContext
121 ParenthesesDisjunctionContext(int* output
, ByteTerm
& term
)
124 unsigned firstSubpatternId
= term
.atom
.subpatternId
;
125 unsigned numNestedSubpatterns
= term
.atom
.parenthesesDisjunction
->m_numSubpatterns
;
127 for (unsigned i
= 0; i
< (numNestedSubpatterns
<< 1); ++i
) {
128 subpatternBackup
[i
] = output
[(firstSubpatternId
<< 1) + i
];
129 output
[(firstSubpatternId
<< 1) + i
] = -1;
132 new(getDisjunctionContext(term
)) DisjunctionContext();
135 void* operator new(size_t, void* where
)
140 void restoreOutput(int* output
, unsigned firstSubpatternId
, unsigned numNestedSubpatterns
)
142 for (unsigned i
= 0; i
< (numNestedSubpatterns
<< 1); ++i
)
143 output
[(firstSubpatternId
<< 1) + i
] = subpatternBackup
[i
];
146 DisjunctionContext
* getDisjunctionContext(ByteTerm
& term
)
148 return reinterpret_cast<DisjunctionContext
*>(&(subpatternBackup
[term
.atom
.parenthesesDisjunction
->m_numSubpatterns
<< 1]));
151 ParenthesesDisjunctionContext
* next
;
152 int subpatternBackup
[1];
155 ParenthesesDisjunctionContext
* allocParenthesesDisjunctionContext(ByteDisjunction
* disjunction
, int* output
, ByteTerm
& term
)
157 size_t size
= sizeof(ParenthesesDisjunctionContext
) - sizeof(int) + (term
.atom
.parenthesesDisjunction
->m_numSubpatterns
<< 1) * sizeof(int) + sizeof(DisjunctionContext
) - sizeof(uintptr_t) + disjunction
->m_frameSize
* sizeof(uintptr_t);
158 allocatorPool
= allocatorPool
->ensureCapacity(size
);
161 return new(allocatorPool
->alloc(size
)) ParenthesesDisjunctionContext(output
, term
);
164 void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext
* context
)
166 allocatorPool
= allocatorPool
->dealloc(context
);
171 InputStream(const UChar
* input
, unsigned start
, unsigned length
)
183 void rewind(unsigned amount
)
185 ASSERT(pos
>= amount
);
191 ASSERT(pos
< length
);
199 ASSERT(pos
+ 1 < length
);
200 return input
[pos
] | input
[pos
+ 1] << 16;
203 int readChecked(int position
)
205 ASSERT(position
< 0);
206 ASSERT(static_cast<unsigned>(-position
) <= pos
);
207 unsigned p
= pos
+ position
;
212 int reread(unsigned from
)
214 ASSERT(from
< length
);
220 ASSERT(!(pos
> length
));
222 return input
[pos
- 1];
231 void setPos(unsigned p
)
243 return pos
== length
;
251 bool checkInput(int count
)
253 if ((pos
+ count
) <= length
) {
260 void uncheckInput(int count
)
265 bool atStart(int position
)
267 return (pos
+ position
) == 0;
270 bool atEnd(int position
)
272 return (pos
+ position
) == length
;
275 bool isNotAvailableInput(int position
)
277 return (pos
+ position
) > length
;
286 bool testCharacterClass(CharacterClass
* characterClass
, int ch
)
289 for (unsigned i
= 0; i
< characterClass
->m_matchesUnicode
.size(); ++i
)
290 if (ch
== characterClass
->m_matchesUnicode
[i
])
292 for (unsigned i
= 0; i
< characterClass
->m_rangesUnicode
.size(); ++i
)
293 if ((ch
>= characterClass
->m_rangesUnicode
[i
].begin
) && (ch
<= characterClass
->m_rangesUnicode
[i
].end
))
296 for (unsigned i
= 0; i
< characterClass
->m_matches
.size(); ++i
)
297 if (ch
== characterClass
->m_matches
[i
])
299 for (unsigned i
= 0; i
< characterClass
->m_ranges
.size(); ++i
)
300 if ((ch
>= characterClass
->m_ranges
[i
].begin
) && (ch
<= characterClass
->m_ranges
[i
].end
))
307 bool checkCharacter(int testChar
, int inputPosition
)
309 return testChar
== input
.readChecked(inputPosition
);
312 bool checkCasedCharacter(int loChar
, int hiChar
, int inputPosition
)
314 int ch
= input
.readChecked(inputPosition
);
315 return (loChar
== ch
) || (hiChar
== ch
);
318 bool checkCharacterClass(CharacterClass
* characterClass
, bool invert
, int inputPosition
)
320 bool match
= testCharacterClass(characterClass
, input
.readChecked(inputPosition
));
321 return invert
? !match
: match
;
324 bool tryConsumeBackReference(int matchBegin
, int matchEnd
, int inputOffset
)
326 int matchSize
= matchEnd
- matchBegin
;
328 if (!input
.checkInput(matchSize
))
331 if (pattern
->m_ignoreCase
) {
332 for (int i
= 0; i
< matchSize
; ++i
) {
333 int ch
= input
.reread(matchBegin
+ i
);
335 int lo
= Unicode::toLower(ch
);
336 int hi
= Unicode::toUpper(ch
);
338 if ((lo
!= hi
) ? (!checkCasedCharacter(lo
, hi
, inputOffset
- matchSize
+ i
)) : (!checkCharacter(ch
, inputOffset
- matchSize
+ i
))) {
339 input
.uncheckInput(matchSize
);
344 for (int i
= 0; i
< matchSize
; ++i
) {
345 if (!checkCharacter(input
.reread(matchBegin
+ i
), inputOffset
- matchSize
+ i
)) {
346 input
.uncheckInput(matchSize
);
355 bool matchAssertionBOL(ByteTerm
& term
)
357 return (input
.atStart(term
.inputPosition
)) || (pattern
->m_multiline
&& testCharacterClass(pattern
->newlineCharacterClass
, input
.readChecked(term
.inputPosition
- 1)));
360 bool matchAssertionEOL(ByteTerm
& term
)
362 if (term
.inputPosition
)
363 return (input
.atEnd(term
.inputPosition
)) || (pattern
->m_multiline
&& testCharacterClass(pattern
->newlineCharacterClass
, input
.readChecked(term
.inputPosition
)));
365 return (input
.atEnd()) || (pattern
->m_multiline
&& testCharacterClass(pattern
->newlineCharacterClass
, input
.read()));
368 bool matchAssertionWordBoundary(ByteTerm
& term
)
370 bool prevIsWordchar
= !input
.atStart(term
.inputPosition
) && testCharacterClass(pattern
->wordcharCharacterClass
, input
.readChecked(term
.inputPosition
- 1));
372 if (term
.inputPosition
)
373 readIsWordchar
= !input
.atEnd(term
.inputPosition
) && testCharacterClass(pattern
->wordcharCharacterClass
, input
.readChecked(term
.inputPosition
));
375 readIsWordchar
= !input
.atEnd() && testCharacterClass(pattern
->wordcharCharacterClass
, input
.read());
377 bool wordBoundary
= prevIsWordchar
!= readIsWordchar
;
378 return term
.invert() ? !wordBoundary
: wordBoundary
;
381 bool backtrackPatternCharacter(ByteTerm
& term
, DisjunctionContext
* context
)
383 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ term
.frameLocation
);
385 switch (term
.atom
.quantityType
) {
386 case QuantifierFixedCount
:
389 case QuantifierGreedy
:
390 if (backTrack
->matchAmount
) {
391 --backTrack
->matchAmount
;
392 input
.uncheckInput(1);
397 case QuantifierNonGreedy
:
398 if ((backTrack
->matchAmount
< term
.atom
.quantityCount
) && input
.checkInput(1)) {
399 ++backTrack
->matchAmount
;
400 if (checkCharacter(term
.atom
.patternCharacter
, term
.inputPosition
- 1))
403 input
.uncheckInput(backTrack
->matchAmount
);
410 bool backtrackPatternCasedCharacter(ByteTerm
& term
, DisjunctionContext
* context
)
412 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ term
.frameLocation
);
414 switch (term
.atom
.quantityType
) {
415 case QuantifierFixedCount
:
418 case QuantifierGreedy
:
419 if (backTrack
->matchAmount
) {
420 --backTrack
->matchAmount
;
421 input
.uncheckInput(1);
426 case QuantifierNonGreedy
:
427 if ((backTrack
->matchAmount
< term
.atom
.quantityCount
) && input
.checkInput(1)) {
428 ++backTrack
->matchAmount
;
429 if (checkCasedCharacter(term
.atom
.casedCharacter
.lo
, term
.atom
.casedCharacter
.hi
, term
.inputPosition
- 1))
432 input
.uncheckInput(backTrack
->matchAmount
);
439 bool matchCharacterClass(ByteTerm
& term
, DisjunctionContext
* context
)
441 ASSERT(term
.type
== ByteTerm::TypeCharacterClass
);
442 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ term
.frameLocation
);
444 switch (term
.atom
.quantityType
) {
445 case QuantifierFixedCount
: {
446 for (unsigned matchAmount
= 0; matchAmount
< term
.atom
.quantityCount
; ++matchAmount
) {
447 if (!checkCharacterClass(term
.atom
.characterClass
, term
.invert(), term
.inputPosition
+ matchAmount
))
453 case QuantifierGreedy
: {
454 unsigned matchAmount
= 0;
455 while ((matchAmount
< term
.atom
.quantityCount
) && input
.checkInput(1)) {
456 if (!checkCharacterClass(term
.atom
.characterClass
, term
.invert(), term
.inputPosition
- 1)) {
457 input
.uncheckInput(1);
462 backTrack
->matchAmount
= matchAmount
;
467 case QuantifierNonGreedy
:
468 backTrack
->matchAmount
= 0;
472 ASSERT_NOT_REACHED();
476 bool backtrackCharacterClass(ByteTerm
& term
, DisjunctionContext
* context
)
478 ASSERT(term
.type
== ByteTerm::TypeCharacterClass
);
479 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ term
.frameLocation
);
481 switch (term
.atom
.quantityType
) {
482 case QuantifierFixedCount
:
485 case QuantifierGreedy
:
486 if (backTrack
->matchAmount
) {
487 --backTrack
->matchAmount
;
488 input
.uncheckInput(1);
493 case QuantifierNonGreedy
:
494 if ((backTrack
->matchAmount
< term
.atom
.quantityCount
) && input
.checkInput(1)) {
495 ++backTrack
->matchAmount
;
496 if (checkCharacterClass(term
.atom
.characterClass
, term
.invert(), term
.inputPosition
- 1))
499 input
.uncheckInput(backTrack
->matchAmount
);
506 bool matchBackReference(ByteTerm
& term
, DisjunctionContext
* context
)
508 ASSERT(term
.type
== ByteTerm::TypeBackReference
);
509 BackTrackInfoBackReference
* backTrack
= reinterpret_cast<BackTrackInfoBackReference
*>(context
->frame
+ term
.frameLocation
);
511 int matchBegin
= output
[(term
.atom
.subpatternId
<< 1)];
512 int matchEnd
= output
[(term
.atom
.subpatternId
<< 1) + 1];
514 // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
515 // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
520 ASSERT((matchBegin
== -1) || (matchBegin
<= matchEnd
));
522 if (matchBegin
== matchEnd
)
525 switch (term
.atom
.quantityType
) {
526 case QuantifierFixedCount
: {
527 backTrack
->begin
= input
.getPos();
528 for (unsigned matchAmount
= 0; matchAmount
< term
.atom
.quantityCount
; ++matchAmount
) {
529 if (!tryConsumeBackReference(matchBegin
, matchEnd
, term
.inputPosition
)) {
530 input
.setPos(backTrack
->begin
);
537 case QuantifierGreedy
: {
538 unsigned matchAmount
= 0;
539 while ((matchAmount
< term
.atom
.quantityCount
) && tryConsumeBackReference(matchBegin
, matchEnd
, term
.inputPosition
))
541 backTrack
->matchAmount
= matchAmount
;
545 case QuantifierNonGreedy
:
546 backTrack
->begin
= input
.getPos();
547 backTrack
->matchAmount
= 0;
551 ASSERT_NOT_REACHED();
555 bool backtrackBackReference(ByteTerm
& term
, DisjunctionContext
* context
)
557 ASSERT(term
.type
== ByteTerm::TypeBackReference
);
558 BackTrackInfoBackReference
* backTrack
= reinterpret_cast<BackTrackInfoBackReference
*>(context
->frame
+ term
.frameLocation
);
560 int matchBegin
= output
[(term
.atom
.subpatternId
<< 1)];
561 int matchEnd
= output
[(term
.atom
.subpatternId
<< 1) + 1];
562 ASSERT((matchBegin
== -1) || (matchBegin
<= matchEnd
));
564 if (matchBegin
== matchEnd
)
567 switch (term
.atom
.quantityType
) {
568 case QuantifierFixedCount
:
569 // for quantityCount == 1, could rewind.
570 input
.setPos(backTrack
->begin
);
573 case QuantifierGreedy
:
574 if (backTrack
->matchAmount
) {
575 --backTrack
->matchAmount
;
576 input
.rewind(matchEnd
- matchBegin
);
581 case QuantifierNonGreedy
:
582 if ((backTrack
->matchAmount
< term
.atom
.quantityCount
) && tryConsumeBackReference(matchBegin
, matchEnd
, term
.inputPosition
)) {
583 ++backTrack
->matchAmount
;
586 input
.setPos(backTrack
->begin
);
593 void recordParenthesesMatch(ByteTerm
& term
, ParenthesesDisjunctionContext
* context
)
595 if (term
.capture()) {
596 unsigned subpatternId
= term
.atom
.subpatternId
;
597 output
[(subpatternId
<< 1)] = context
->getDisjunctionContext(term
)->matchBegin
+ term
.inputPosition
;
598 output
[(subpatternId
<< 1) + 1] = context
->getDisjunctionContext(term
)->matchEnd
+ term
.inputPosition
;
601 void resetMatches(ByteTerm
& term
, ParenthesesDisjunctionContext
* context
)
603 unsigned firstSubpatternId
= term
.atom
.subpatternId
;
604 unsigned count
= term
.atom
.parenthesesDisjunction
->m_numSubpatterns
;
605 context
->restoreOutput(output
, firstSubpatternId
, count
);
607 JSRegExpResult
parenthesesDoBacktrack(ByteTerm
& term
, BackTrackInfoParentheses
* backTrack
)
609 while (backTrack
->matchAmount
) {
610 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
612 JSRegExpResult result
= matchDisjunction(term
.atom
.parenthesesDisjunction
, context
->getDisjunctionContext(term
), true);
613 if (result
== JSRegExpMatch
)
614 return JSRegExpMatch
;
616 resetMatches(term
, context
);
617 popParenthesesDisjunctionContext(backTrack
);
618 freeParenthesesDisjunctionContext(context
);
620 if (result
!= JSRegExpNoMatch
)
624 return JSRegExpNoMatch
;
627 bool matchParenthesesOnceBegin(ByteTerm
& term
, DisjunctionContext
* context
)
629 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
630 ASSERT(term
.atom
.quantityCount
== 1);
632 BackTrackInfoParenthesesOnce
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesOnce
*>(context
->frame
+ term
.frameLocation
);
634 switch (term
.atom
.quantityType
) {
635 case QuantifierGreedy
: {
636 // set this speculatively; if we get to the parens end this will be true.
637 backTrack
->begin
= input
.getPos();
640 case QuantifierNonGreedy
: {
641 backTrack
->begin
= notFound
;
642 context
->term
+= term
.atom
.parenthesesWidth
;
645 case QuantifierFixedCount
:
649 if (term
.capture()) {
650 unsigned subpatternId
= term
.atom
.subpatternId
;
651 output
[(subpatternId
<< 1)] = input
.getPos() + term
.inputPosition
;
657 bool matchParenthesesOnceEnd(ByteTerm
& term
, DisjunctionContext
* context
)
659 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternOnceEnd
);
660 ASSERT(term
.atom
.quantityCount
== 1);
662 if (term
.capture()) {
663 unsigned subpatternId
= term
.atom
.subpatternId
;
664 output
[(subpatternId
<< 1) + 1] = input
.getPos() + term
.inputPosition
;
667 if (term
.atom
.quantityType
== QuantifierFixedCount
)
670 BackTrackInfoParenthesesOnce
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesOnce
*>(context
->frame
+ term
.frameLocation
);
671 return backTrack
->begin
!= input
.getPos();
674 bool backtrackParenthesesOnceBegin(ByteTerm
& term
, DisjunctionContext
* context
)
676 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
677 ASSERT(term
.atom
.quantityCount
== 1);
679 BackTrackInfoParenthesesOnce
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesOnce
*>(context
->frame
+ term
.frameLocation
);
681 if (term
.capture()) {
682 unsigned subpatternId
= term
.atom
.subpatternId
;
683 output
[(subpatternId
<< 1)] = -1;
684 output
[(subpatternId
<< 1) + 1] = -1;
687 switch (term
.atom
.quantityType
) {
688 case QuantifierGreedy
:
689 // if we backtrack to this point, there is another chance - try matching nothing.
690 ASSERT(backTrack
->begin
!= notFound
);
691 backTrack
->begin
= notFound
;
692 context
->term
+= term
.atom
.parenthesesWidth
;
694 case QuantifierNonGreedy
:
695 ASSERT(backTrack
->begin
!= notFound
);
696 case QuantifierFixedCount
:
703 bool backtrackParenthesesOnceEnd(ByteTerm
& term
, DisjunctionContext
* context
)
705 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternOnceEnd
);
706 ASSERT(term
.atom
.quantityCount
== 1);
708 BackTrackInfoParenthesesOnce
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesOnce
*>(context
->frame
+ term
.frameLocation
);
710 switch (term
.atom
.quantityType
) {
711 case QuantifierGreedy
:
712 if (backTrack
->begin
== notFound
) {
713 context
->term
-= term
.atom
.parenthesesWidth
;
716 case QuantifierNonGreedy
:
717 if (backTrack
->begin
== notFound
) {
718 backTrack
->begin
= input
.getPos();
719 if (term
.capture()) {
720 // Technically this access to inputPosition should be accessing the begin term's
721 // inputPosition, but for repeats other than fixed these values should be
722 // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
723 ASSERT((&term
- term
.atom
.parenthesesWidth
)->type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
724 ASSERT((&term
- term
.atom
.parenthesesWidth
)->inputPosition
== term
.inputPosition
);
725 unsigned subpatternId
= term
.atom
.subpatternId
;
726 output
[subpatternId
<< 1] = input
.getPos() + term
.inputPosition
;
728 context
->term
-= term
.atom
.parenthesesWidth
;
731 case QuantifierFixedCount
:
738 bool matchParenthesesTerminalBegin(ByteTerm
& term
, DisjunctionContext
* context
)
740 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternTerminalBegin
);
741 ASSERT(term
.atom
.quantityType
== QuantifierGreedy
);
742 ASSERT(term
.atom
.quantityCount
== quantifyInfinite
);
743 ASSERT(!term
.capture());
745 BackTrackInfoParenthesesTerminal
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesTerminal
*>(context
->frame
+ term
.frameLocation
);
746 backTrack
->begin
= input
.getPos();
750 bool matchParenthesesTerminalEnd(ByteTerm
& term
, DisjunctionContext
* context
)
752 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternTerminalEnd
);
754 BackTrackInfoParenthesesTerminal
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesTerminal
*>(context
->frame
+ term
.frameLocation
);
755 // Empty match is a failed match.
756 if (backTrack
->begin
== input
.getPos())
759 // Successful match! Okay, what's next? - loop around and try to match moar!
760 context
->term
-= (term
.atom
.parenthesesWidth
+ 1);
764 bool backtrackParenthesesTerminalBegin(ByteTerm
& term
, DisjunctionContext
* context
)
766 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternTerminalBegin
);
767 ASSERT(term
.atom
.quantityType
== QuantifierGreedy
);
768 ASSERT(term
.atom
.quantityCount
== quantifyInfinite
);
769 ASSERT(!term
.capture());
771 // If we backtrack to this point, we have failed to match this iteration of the parens.
772 // Since this is greedy / zero minimum a failed is also accepted as a match!
773 context
->term
+= term
.atom
.parenthesesWidth
;
777 bool backtrackParenthesesTerminalEnd(ByteTerm
&, DisjunctionContext
*)
779 // 'Terminal' parentheses are at the end of the regex, and as such a match past end
780 // should always be returned as a successful match - we should never backtrack to here.
781 ASSERT_NOT_REACHED();
785 bool matchParentheticalAssertionBegin(ByteTerm
& term
, DisjunctionContext
* context
)
787 ASSERT(term
.type
== ByteTerm::TypeParentheticalAssertionBegin
);
788 ASSERT(term
.atom
.quantityCount
== 1);
790 BackTrackInfoParentheticalAssertion
* backTrack
= reinterpret_cast<BackTrackInfoParentheticalAssertion
*>(context
->frame
+ term
.frameLocation
);
792 backTrack
->begin
= input
.getPos();
796 bool matchParentheticalAssertionEnd(ByteTerm
& term
, DisjunctionContext
* context
)
798 ASSERT(term
.type
== ByteTerm::TypeParentheticalAssertionEnd
);
799 ASSERT(term
.atom
.quantityCount
== 1);
801 BackTrackInfoParentheticalAssertion
* backTrack
= reinterpret_cast<BackTrackInfoParentheticalAssertion
*>(context
->frame
+ term
.frameLocation
);
803 input
.setPos(backTrack
->begin
);
805 // We've reached the end of the parens; if they are inverted, this is failure.
807 context
->term
-= term
.atom
.parenthesesWidth
;
814 bool backtrackParentheticalAssertionBegin(ByteTerm
& term
, DisjunctionContext
* context
)
816 ASSERT(term
.type
== ByteTerm::TypeParentheticalAssertionBegin
);
817 ASSERT(term
.atom
.quantityCount
== 1);
819 // We've failed to match parens; if they are inverted, this is win!
821 context
->term
+= term
.atom
.parenthesesWidth
;
828 bool backtrackParentheticalAssertionEnd(ByteTerm
& term
, DisjunctionContext
* context
)
830 ASSERT(term
.type
== ByteTerm::TypeParentheticalAssertionEnd
);
831 ASSERT(term
.atom
.quantityCount
== 1);
833 BackTrackInfoParentheticalAssertion
* backTrack
= reinterpret_cast<BackTrackInfoParentheticalAssertion
*>(context
->frame
+ term
.frameLocation
);
835 input
.setPos(backTrack
->begin
);
837 context
->term
-= term
.atom
.parenthesesWidth
;
841 JSRegExpResult
matchParentheses(ByteTerm
& term
, DisjunctionContext
* context
)
843 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpattern
);
845 BackTrackInfoParentheses
* backTrack
= reinterpret_cast<BackTrackInfoParentheses
*>(context
->frame
+ term
.frameLocation
);
846 ByteDisjunction
* disjunctionBody
= term
.atom
.parenthesesDisjunction
;
848 backTrack
->matchAmount
= 0;
849 backTrack
->lastContext
= 0;
851 switch (term
.atom
.quantityType
) {
852 case QuantifierFixedCount
: {
853 // While we haven't yet reached our fixed limit,
854 while (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
855 // Try to do a match, and it it succeeds, add it to the list.
856 ParenthesesDisjunctionContext
* context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
857 JSRegExpResult result
= matchDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
858 if (result
== JSRegExpMatch
)
859 appendParenthesesDisjunctionContext(backTrack
, context
);
861 // The match failed; try to find an alternate point to carry on from.
862 resetMatches(term
, context
);
863 freeParenthesesDisjunctionContext(context
);
865 if (result
== JSRegExpNoMatch
) {
866 JSRegExpResult backtrackResult
= parenthesesDoBacktrack(term
, backTrack
);
867 if (backtrackResult
!= JSRegExpMatch
)
868 return backtrackResult
;
874 ASSERT(backTrack
->matchAmount
== term
.atom
.quantityCount
);
875 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
876 recordParenthesesMatch(term
, context
);
877 return JSRegExpMatch
;
880 case QuantifierGreedy
: {
881 while (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
882 ParenthesesDisjunctionContext
* context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
883 JSRegExpResult result
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
884 if (result
== JSRegExpMatch
)
885 appendParenthesesDisjunctionContext(backTrack
, context
);
887 resetMatches(term
, context
);
888 freeParenthesesDisjunctionContext(context
);
890 if (result
!= JSRegExpNoMatch
)
897 if (backTrack
->matchAmount
) {
898 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
899 recordParenthesesMatch(term
, context
);
901 return JSRegExpMatch
;
904 case QuantifierNonGreedy
:
905 return JSRegExpMatch
;
908 ASSERT_NOT_REACHED();
909 return JSRegExpErrorNoMatch
;
912 // Rules for backtracking differ depending on whether this is greedy or non-greedy.
914 // Greedy matches never should try just adding more - you should already have done
915 // the 'more' cases. Always backtrack, at least a leetle bit. However cases where
916 // you backtrack an item off the list needs checking, since we'll never have matched
917 // the one less case. Tracking forwards, still add as much as possible.
919 // Non-greedy, we've already done the one less case, so don't match on popping.
920 // We haven't done the one more case, so always try to add that.
922 JSRegExpResult
backtrackParentheses(ByteTerm
& term
, DisjunctionContext
* context
)
924 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpattern
);
926 BackTrackInfoParentheses
* backTrack
= reinterpret_cast<BackTrackInfoParentheses
*>(context
->frame
+ term
.frameLocation
);
927 ByteDisjunction
* disjunctionBody
= term
.atom
.parenthesesDisjunction
;
929 switch (term
.atom
.quantityType
) {
930 case QuantifierFixedCount
: {
931 ASSERT(backTrack
->matchAmount
== term
.atom
.quantityCount
);
933 ParenthesesDisjunctionContext
* context
= 0;
934 JSRegExpResult result
= parenthesesDoBacktrack(term
, backTrack
);
936 if (result
!= JSRegExpMatch
)
939 // While we haven't yet reached our fixed limit,
940 while (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
941 // Try to do a match, and it it succeeds, add it to the list.
942 context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
943 result
= matchDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
945 if (result
== JSRegExpMatch
)
946 appendParenthesesDisjunctionContext(backTrack
, context
);
948 // The match failed; try to find an alternate point to carry on from.
949 resetMatches(term
, context
);
950 freeParenthesesDisjunctionContext(context
);
952 if (result
== JSRegExpNoMatch
) {
953 JSRegExpResult backtrackResult
= parenthesesDoBacktrack(term
, backTrack
);
954 if (backtrackResult
!= JSRegExpMatch
)
955 return backtrackResult
;
961 ASSERT(backTrack
->matchAmount
== term
.atom
.quantityCount
);
962 context
= backTrack
->lastContext
;
963 recordParenthesesMatch(term
, context
);
964 return JSRegExpMatch
;
967 case QuantifierGreedy
: {
968 if (!backTrack
->matchAmount
)
969 return JSRegExpNoMatch
;
971 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
972 JSRegExpResult result
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
), true);
973 if (result
== JSRegExpMatch
) {
974 while (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
975 ParenthesesDisjunctionContext
* context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
976 JSRegExpResult parenthesesResult
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
977 if (parenthesesResult
== JSRegExpMatch
)
978 appendParenthesesDisjunctionContext(backTrack
, context
);
980 resetMatches(term
, context
);
981 freeParenthesesDisjunctionContext(context
);
983 if (parenthesesResult
!= JSRegExpNoMatch
)
984 return parenthesesResult
;
990 resetMatches(term
, context
);
991 popParenthesesDisjunctionContext(backTrack
);
992 freeParenthesesDisjunctionContext(context
);
994 if (result
!= JSRegExpNoMatch
)
998 if (backTrack
->matchAmount
) {
999 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
1000 recordParenthesesMatch(term
, context
);
1002 return JSRegExpMatch
;
1005 case QuantifierNonGreedy
: {
1006 // If we've not reached the limit, try to add one more match.
1007 if (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
1008 ParenthesesDisjunctionContext
* context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
1009 JSRegExpResult result
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
1010 if (result
== JSRegExpMatch
) {
1011 appendParenthesesDisjunctionContext(backTrack
, context
);
1012 recordParenthesesMatch(term
, context
);
1013 return JSRegExpMatch
;
1016 resetMatches(term
, context
);
1017 freeParenthesesDisjunctionContext(context
);
1019 if (result
!= JSRegExpNoMatch
)
1023 // Nope - okay backtrack looking for an alternative.
1024 while (backTrack
->matchAmount
) {
1025 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
1026 JSRegExpResult result
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
), true);
1027 if (result
== JSRegExpMatch
) {
1028 // successful backtrack! we're back in the game!
1029 if (backTrack
->matchAmount
) {
1030 context
= backTrack
->lastContext
;
1031 recordParenthesesMatch(term
, context
);
1033 return JSRegExpMatch
;
1036 // pop a match off the stack
1037 resetMatches(term
, context
);
1038 popParenthesesDisjunctionContext(backTrack
);
1039 freeParenthesesDisjunctionContext(context
);
1044 return JSRegExpNoMatch
;
1048 ASSERT_NOT_REACHED();
1049 return JSRegExpErrorNoMatch
;
1052 bool matchDotStarEnclosure(ByteTerm
& term
, DisjunctionContext
* context
)
1055 unsigned matchBegin
= context
->matchBegin
;
1058 for (matchBegin
--; true; matchBegin
--) {
1059 if (testCharacterClass(pattern
->newlineCharacterClass
, input
.reread(matchBegin
))) {
1069 unsigned matchEnd
= input
.getPos();
1071 for (; (matchEnd
!= input
.end())
1072 && (!testCharacterClass(pattern
->newlineCharacterClass
, input
.reread(matchEnd
))); matchEnd
++) { }
1074 if (((matchBegin
&& term
.anchors
.m_bol
)
1075 || ((matchEnd
!= input
.end()) && term
.anchors
.m_eol
))
1076 && !pattern
->m_multiline
)
1079 context
->matchBegin
= matchBegin
;
1080 context
->matchEnd
= matchEnd
;
1084 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
1085 #define BACKTRACK() { --context->term; goto backtrack; }
1086 #define currentTerm() (disjunction->terms[context->term])
1087 JSRegExpResult
matchDisjunction(ByteDisjunction
* disjunction
, DisjunctionContext
* context
, bool btrack
= false)
1089 if (!--remainingMatchCount
)
1090 return JSRegExpErrorHitLimit
;
1095 context
->matchBegin
= input
.getPos();
1099 ASSERT(context
->term
< static_cast<int>(disjunction
->terms
.size()));
1101 switch (currentTerm().type
) {
1102 case ByteTerm::TypeSubpatternBegin
:
1104 case ByteTerm::TypeSubpatternEnd
:
1105 context
->matchEnd
= input
.getPos();
1106 return JSRegExpMatch
;
1108 case ByteTerm::TypeBodyAlternativeBegin
:
1110 case ByteTerm::TypeBodyAlternativeDisjunction
:
1111 case ByteTerm::TypeBodyAlternativeEnd
:
1112 context
->matchEnd
= input
.getPos();
1113 return JSRegExpMatch
;
1115 case ByteTerm::TypeAlternativeBegin
:
1117 case ByteTerm::TypeAlternativeDisjunction
:
1118 case ByteTerm::TypeAlternativeEnd
: {
1119 int offset
= currentTerm().alternative
.end
;
1120 BackTrackInfoAlternative
* backTrack
= reinterpret_cast<BackTrackInfoAlternative
*>(context
->frame
+ currentTerm().frameLocation
);
1121 backTrack
->offset
= offset
;
1122 context
->term
+= offset
;
1126 case ByteTerm::TypeAssertionBOL
:
1127 if (matchAssertionBOL(currentTerm()))
1130 case ByteTerm::TypeAssertionEOL
:
1131 if (matchAssertionEOL(currentTerm()))
1134 case ByteTerm::TypeAssertionWordBoundary
:
1135 if (matchAssertionWordBoundary(currentTerm()))
1139 case ByteTerm::TypePatternCharacterOnce
:
1140 case ByteTerm::TypePatternCharacterFixed
: {
1141 for (unsigned matchAmount
= 0; matchAmount
< currentTerm().atom
.quantityCount
; ++matchAmount
) {
1142 if (!checkCharacter(currentTerm().atom
.patternCharacter
, currentTerm().inputPosition
+ matchAmount
))
1147 case ByteTerm::TypePatternCharacterGreedy
: {
1148 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ currentTerm().frameLocation
);
1149 unsigned matchAmount
= 0;
1150 while ((matchAmount
< currentTerm().atom
.quantityCount
) && input
.checkInput(1)) {
1151 if (!checkCharacter(currentTerm().atom
.patternCharacter
, currentTerm().inputPosition
- 1)) {
1152 input
.uncheckInput(1);
1157 backTrack
->matchAmount
= matchAmount
;
1161 case ByteTerm::TypePatternCharacterNonGreedy
: {
1162 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ currentTerm().frameLocation
);
1163 backTrack
->matchAmount
= 0;
1167 case ByteTerm::TypePatternCasedCharacterOnce
:
1168 case ByteTerm::TypePatternCasedCharacterFixed
: {
1169 for (unsigned matchAmount
= 0; matchAmount
< currentTerm().atom
.quantityCount
; ++matchAmount
) {
1170 if (!checkCasedCharacter(currentTerm().atom
.casedCharacter
.lo
, currentTerm().atom
.casedCharacter
.hi
, currentTerm().inputPosition
+ matchAmount
))
1175 case ByteTerm::TypePatternCasedCharacterGreedy
: {
1176 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ currentTerm().frameLocation
);
1177 unsigned matchAmount
= 0;
1178 while ((matchAmount
< currentTerm().atom
.quantityCount
) && input
.checkInput(1)) {
1179 if (!checkCasedCharacter(currentTerm().atom
.casedCharacter
.lo
, currentTerm().atom
.casedCharacter
.hi
, currentTerm().inputPosition
- 1)) {
1180 input
.uncheckInput(1);
1185 backTrack
->matchAmount
= matchAmount
;
1189 case ByteTerm::TypePatternCasedCharacterNonGreedy
: {
1190 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ currentTerm().frameLocation
);
1191 backTrack
->matchAmount
= 0;
1195 case ByteTerm::TypeCharacterClass
:
1196 if (matchCharacterClass(currentTerm(), context
))
1199 case ByteTerm::TypeBackReference
:
1200 if (matchBackReference(currentTerm(), context
))
1203 case ByteTerm::TypeParenthesesSubpattern
: {
1204 JSRegExpResult result
= matchParentheses(currentTerm(), context
);
1206 if (result
== JSRegExpMatch
) {
1208 } else if (result
!= JSRegExpNoMatch
)
1213 case ByteTerm::TypeParenthesesSubpatternOnceBegin
:
1214 if (matchParenthesesOnceBegin(currentTerm(), context
))
1217 case ByteTerm::TypeParenthesesSubpatternOnceEnd
:
1218 if (matchParenthesesOnceEnd(currentTerm(), context
))
1221 case ByteTerm::TypeParenthesesSubpatternTerminalBegin
:
1222 if (matchParenthesesTerminalBegin(currentTerm(), context
))
1225 case ByteTerm::TypeParenthesesSubpatternTerminalEnd
:
1226 if (matchParenthesesTerminalEnd(currentTerm(), context
))
1229 case ByteTerm::TypeParentheticalAssertionBegin
:
1230 if (matchParentheticalAssertionBegin(currentTerm(), context
))
1233 case ByteTerm::TypeParentheticalAssertionEnd
:
1234 if (matchParentheticalAssertionEnd(currentTerm(), context
))
1238 case ByteTerm::TypeCheckInput
:
1239 if (input
.checkInput(currentTerm().checkInputCount
))
1243 case ByteTerm::TypeUncheckInput
:
1244 input
.uncheckInput(currentTerm().checkInputCount
);
1247 case ByteTerm::TypeDotStarEnclosure
:
1248 if (matchDotStarEnclosure(currentTerm(), context
))
1249 return JSRegExpMatch
;
1253 // We should never fall-through to here.
1254 ASSERT_NOT_REACHED();
1257 ASSERT(context
->term
< static_cast<int>(disjunction
->terms
.size()));
1259 switch (currentTerm().type
) {
1260 case ByteTerm::TypeSubpatternBegin
:
1261 return JSRegExpNoMatch
;
1262 case ByteTerm::TypeSubpatternEnd
:
1263 ASSERT_NOT_REACHED();
1265 case ByteTerm::TypeBodyAlternativeBegin
:
1266 case ByteTerm::TypeBodyAlternativeDisjunction
: {
1267 int offset
= currentTerm().alternative
.next
;
1268 context
->term
+= offset
;
1273 return JSRegExpNoMatch
;
1277 context
->matchBegin
= input
.getPos();
1279 if (currentTerm().alternative
.onceThrough
)
1280 context
->term
+= currentTerm().alternative
.next
;
1284 case ByteTerm::TypeBodyAlternativeEnd
:
1285 ASSERT_NOT_REACHED();
1287 case ByteTerm::TypeAlternativeBegin
:
1288 case ByteTerm::TypeAlternativeDisjunction
: {
1289 int offset
= currentTerm().alternative
.next
;
1290 context
->term
+= offset
;
1295 case ByteTerm::TypeAlternativeEnd
: {
1296 // We should never backtrack back into an alternative of the main body of the regex.
1297 BackTrackInfoAlternative
* backTrack
= reinterpret_cast<BackTrackInfoAlternative
*>(context
->frame
+ currentTerm().frameLocation
);
1298 unsigned offset
= backTrack
->offset
;
1299 context
->term
-= offset
;
1303 case ByteTerm::TypeAssertionBOL
:
1304 case ByteTerm::TypeAssertionEOL
:
1305 case ByteTerm::TypeAssertionWordBoundary
:
1308 case ByteTerm::TypePatternCharacterOnce
:
1309 case ByteTerm::TypePatternCharacterFixed
:
1310 case ByteTerm::TypePatternCharacterGreedy
:
1311 case ByteTerm::TypePatternCharacterNonGreedy
:
1312 if (backtrackPatternCharacter(currentTerm(), context
))
1315 case ByteTerm::TypePatternCasedCharacterOnce
:
1316 case ByteTerm::TypePatternCasedCharacterFixed
:
1317 case ByteTerm::TypePatternCasedCharacterGreedy
:
1318 case ByteTerm::TypePatternCasedCharacterNonGreedy
:
1319 if (backtrackPatternCasedCharacter(currentTerm(), context
))
1322 case ByteTerm::TypeCharacterClass
:
1323 if (backtrackCharacterClass(currentTerm(), context
))
1326 case ByteTerm::TypeBackReference
:
1327 if (backtrackBackReference(currentTerm(), context
))
1330 case ByteTerm::TypeParenthesesSubpattern
: {
1331 JSRegExpResult result
= backtrackParentheses(currentTerm(), context
);
1333 if (result
== JSRegExpMatch
) {
1335 } else if (result
!= JSRegExpNoMatch
)
1340 case ByteTerm::TypeParenthesesSubpatternOnceBegin
:
1341 if (backtrackParenthesesOnceBegin(currentTerm(), context
))
1344 case ByteTerm::TypeParenthesesSubpatternOnceEnd
:
1345 if (backtrackParenthesesOnceEnd(currentTerm(), context
))
1348 case ByteTerm::TypeParenthesesSubpatternTerminalBegin
:
1349 if (backtrackParenthesesTerminalBegin(currentTerm(), context
))
1352 case ByteTerm::TypeParenthesesSubpatternTerminalEnd
:
1353 if (backtrackParenthesesTerminalEnd(currentTerm(), context
))
1356 case ByteTerm::TypeParentheticalAssertionBegin
:
1357 if (backtrackParentheticalAssertionBegin(currentTerm(), context
))
1360 case ByteTerm::TypeParentheticalAssertionEnd
:
1361 if (backtrackParentheticalAssertionEnd(currentTerm(), context
))
1365 case ByteTerm::TypeCheckInput
:
1366 input
.uncheckInput(currentTerm().checkInputCount
);
1369 case ByteTerm::TypeUncheckInput
:
1370 input
.checkInput(currentTerm().checkInputCount
);
1373 case ByteTerm::TypeDotStarEnclosure
:
1374 ASSERT_NOT_REACHED();
1377 ASSERT_NOT_REACHED();
1378 return JSRegExpErrorNoMatch
;
1381 JSRegExpResult
matchNonZeroDisjunction(ByteDisjunction
* disjunction
, DisjunctionContext
* context
, bool btrack
= false)
1383 JSRegExpResult result
= matchDisjunction(disjunction
, context
, btrack
);
1385 if (result
== JSRegExpMatch
) {
1386 while (context
->matchBegin
== context
->matchEnd
) {
1387 result
= matchDisjunction(disjunction
, context
, true);
1388 if (result
!= JSRegExpMatch
)
1391 return JSRegExpMatch
;
1399 allocatorPool
= pattern
->m_allocator
->startAllocator();
1403 for (unsigned i
= 0; i
< ((pattern
->m_body
->m_numSubpatterns
+ 1) << 1); ++i
)
1406 DisjunctionContext
* context
= allocDisjunctionContext(pattern
->m_body
.get());
1408 JSRegExpResult result
= matchDisjunction(pattern
->m_body
.get(), context
, false);
1409 if (result
== JSRegExpMatch
) {
1410 output
[0] = context
->matchBegin
;
1411 output
[1] = context
->matchEnd
;
1414 freeDisjunctionContext(context
);
1416 pattern
->m_allocator
->stopAllocator();
1418 // RegExp.cpp currently expects all error to be converted to -1.
1419 ASSERT((result
== JSRegExpMatch
) == (output
[0] != -1));
1423 Interpreter(BytecodePattern
* pattern
, int* output
, const UChar
* inputChar
, unsigned start
, unsigned length
)
1426 , input(inputChar
, start
, length
)
1428 , remainingMatchCount(matchLimit
)
1433 BytecodePattern
* pattern
;
1436 BumpPointerPool
* allocatorPool
;
1437 unsigned remainingMatchCount
;
1442 class ByteCompiler
{
1443 struct ParenthesesStackEntry
{
1445 unsigned savedAlternativeIndex
;
1446 ParenthesesStackEntry(unsigned beginTerm
, unsigned savedAlternativeIndex
/*, unsigned subpatternId, bool capture = false*/)
1447 : beginTerm(beginTerm
)
1448 , savedAlternativeIndex(savedAlternativeIndex
)
1454 ByteCompiler(YarrPattern
& pattern
)
1455 : m_pattern(pattern
)
1457 m_currentAlternativeIndex
= 0;
1460 PassOwnPtr
<BytecodePattern
> compile(BumpPointerAllocator
* allocator
)
1462 regexBegin(m_pattern
.m_numSubpatterns
, m_pattern
.m_body
->m_callFrameSize
, m_pattern
.m_body
->m_alternatives
[0]->onceThrough());
1463 emitDisjunction(m_pattern
.m_body
);
1466 return adoptPtr(new BytecodePattern(m_bodyDisjunction
.release(), m_allParenthesesInfo
, m_pattern
, allocator
));
1469 void checkInput(unsigned count
)
1471 m_bodyDisjunction
->terms
.append(ByteTerm::CheckInput(count
));
1474 void uncheckInput(unsigned count
)
1476 m_bodyDisjunction
->terms
.append(ByteTerm::UncheckInput(count
));
1479 void assertionBOL(int inputPosition
)
1481 m_bodyDisjunction
->terms
.append(ByteTerm::BOL(inputPosition
));
1484 void assertionEOL(int inputPosition
)
1486 m_bodyDisjunction
->terms
.append(ByteTerm::EOL(inputPosition
));
1489 void assertionWordBoundary(bool invert
, int inputPosition
)
1491 m_bodyDisjunction
->terms
.append(ByteTerm::WordBoundary(invert
, inputPosition
));
1494 void atomPatternCharacter(UChar ch
, int inputPosition
, unsigned frameLocation
, unsigned quantityCount
, QuantifierType quantityType
)
1496 if (m_pattern
.m_ignoreCase
) {
1497 UChar lo
= Unicode::toLower(ch
);
1498 UChar hi
= Unicode::toUpper(ch
);
1501 m_bodyDisjunction
->terms
.append(ByteTerm(lo
, hi
, inputPosition
, frameLocation
, quantityCount
, quantityType
));
1506 m_bodyDisjunction
->terms
.append(ByteTerm(ch
, inputPosition
, frameLocation
, quantityCount
, quantityType
));
1509 void atomCharacterClass(CharacterClass
* characterClass
, bool invert
, int inputPosition
, unsigned frameLocation
, unsigned quantityCount
, QuantifierType quantityType
)
1511 m_bodyDisjunction
->terms
.append(ByteTerm(characterClass
, invert
, inputPosition
));
1513 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].atom
.quantityCount
= quantityCount
;
1514 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].atom
.quantityType
= quantityType
;
1515 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1518 void atomBackReference(unsigned subpatternId
, int inputPosition
, unsigned frameLocation
, unsigned quantityCount
, QuantifierType quantityType
)
1520 ASSERT(subpatternId
);
1522 m_bodyDisjunction
->terms
.append(ByteTerm::BackReference(subpatternId
, inputPosition
));
1524 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].atom
.quantityCount
= quantityCount
;
1525 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].atom
.quantityType
= quantityType
;
1526 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1529 void atomParenthesesOnceBegin(unsigned subpatternId
, bool capture
, int inputPosition
, unsigned frameLocation
, unsigned alternativeFrameLocation
)
1531 int beginTerm
= m_bodyDisjunction
->terms
.size();
1533 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin
, subpatternId
, capture
, false, inputPosition
));
1534 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1535 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeBegin());
1536 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= alternativeFrameLocation
;
1538 m_parenthesesStack
.append(ParenthesesStackEntry(beginTerm
, m_currentAlternativeIndex
));
1539 m_currentAlternativeIndex
= beginTerm
+ 1;
1542 void atomParenthesesTerminalBegin(unsigned subpatternId
, bool capture
, int inputPosition
, unsigned frameLocation
, unsigned alternativeFrameLocation
)
1544 int beginTerm
= m_bodyDisjunction
->terms
.size();
1546 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin
, subpatternId
, capture
, false, inputPosition
));
1547 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1548 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeBegin());
1549 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= alternativeFrameLocation
;
1551 m_parenthesesStack
.append(ParenthesesStackEntry(beginTerm
, m_currentAlternativeIndex
));
1552 m_currentAlternativeIndex
= beginTerm
+ 1;
1555 void atomParenthesesSubpatternBegin(unsigned subpatternId
, bool capture
, int inputPosition
, unsigned frameLocation
, unsigned alternativeFrameLocation
)
1557 // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1558 // then fix this up at the end! - simplifying this should make it much clearer.
1559 // https://bugs.webkit.org/show_bug.cgi?id=50136
1561 int beginTerm
= m_bodyDisjunction
->terms
.size();
1563 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin
, subpatternId
, capture
, false, inputPosition
));
1564 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1565 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeBegin());
1566 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= alternativeFrameLocation
;
1568 m_parenthesesStack
.append(ParenthesesStackEntry(beginTerm
, m_currentAlternativeIndex
));
1569 m_currentAlternativeIndex
= beginTerm
+ 1;
1572 void atomParentheticalAssertionBegin(unsigned subpatternId
, bool invert
, unsigned frameLocation
, unsigned alternativeFrameLocation
)
1574 int beginTerm
= m_bodyDisjunction
->terms
.size();
1576 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin
, subpatternId
, false, invert
, 0));
1577 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1578 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeBegin());
1579 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= alternativeFrameLocation
;
1581 m_parenthesesStack
.append(ParenthesesStackEntry(beginTerm
, m_currentAlternativeIndex
));
1582 m_currentAlternativeIndex
= beginTerm
+ 1;
1585 void atomParentheticalAssertionEnd(int inputPosition
, unsigned frameLocation
, unsigned quantityCount
, QuantifierType quantityType
)
1587 unsigned beginTerm
= popParenthesesStack();
1588 closeAlternative(beginTerm
+ 1);
1589 unsigned endTerm
= m_bodyDisjunction
->terms
.size();
1591 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeParentheticalAssertionBegin
);
1593 bool invert
= m_bodyDisjunction
->terms
[beginTerm
].invert();
1594 unsigned subpatternId
= m_bodyDisjunction
->terms
[beginTerm
].atom
.subpatternId
;
1596 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd
, subpatternId
, false, invert
, inputPosition
));
1597 m_bodyDisjunction
->terms
[beginTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1598 m_bodyDisjunction
->terms
[endTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1599 m_bodyDisjunction
->terms
[endTerm
].frameLocation
= frameLocation
;
1601 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityCount
= quantityCount
;
1602 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityType
= quantityType
;
1603 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityCount
= quantityCount
;
1604 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityType
= quantityType
;
1607 void assertionDotStarEnclosure(bool bolAnchored
, bool eolAnchored
)
1609 m_bodyDisjunction
->terms
.append(ByteTerm::DotStarEnclosure(bolAnchored
, eolAnchored
));
1612 unsigned popParenthesesStack()
1614 ASSERT(m_parenthesesStack
.size());
1615 int stackEnd
= m_parenthesesStack
.size() - 1;
1616 unsigned beginTerm
= m_parenthesesStack
[stackEnd
].beginTerm
;
1617 m_currentAlternativeIndex
= m_parenthesesStack
[stackEnd
].savedAlternativeIndex
;
1618 m_parenthesesStack
.shrink(stackEnd
);
1620 ASSERT(beginTerm
< m_bodyDisjunction
->terms
.size());
1621 ASSERT(m_currentAlternativeIndex
< m_bodyDisjunction
->terms
.size());
1627 void dumpDisjunction(ByteDisjunction
* disjunction
)
1629 printf("ByteDisjunction(%p):\n\t", disjunction
);
1630 for (unsigned i
= 0; i
< disjunction
->terms
.size(); ++i
)
1631 printf("{ %d } ", disjunction
->terms
[i
].type
);
1636 void closeAlternative(int beginTerm
)
1638 int origBeginTerm
= beginTerm
;
1639 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeAlternativeBegin
);
1640 int endIndex
= m_bodyDisjunction
->terms
.size();
1642 unsigned frameLocation
= m_bodyDisjunction
->terms
[beginTerm
].frameLocation
;
1644 if (!m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
)
1645 m_bodyDisjunction
->terms
.remove(beginTerm
);
1647 while (m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
) {
1648 beginTerm
+= m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
;
1649 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeAlternativeDisjunction
);
1650 m_bodyDisjunction
->terms
[beginTerm
].alternative
.end
= endIndex
- beginTerm
;
1651 m_bodyDisjunction
->terms
[beginTerm
].frameLocation
= frameLocation
;
1654 m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
= origBeginTerm
- beginTerm
;
1656 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeEnd());
1657 m_bodyDisjunction
->terms
[endIndex
].frameLocation
= frameLocation
;
1661 void closeBodyAlternative()
1664 int origBeginTerm
= 0;
1665 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeBodyAlternativeBegin
);
1666 int endIndex
= m_bodyDisjunction
->terms
.size();
1668 unsigned frameLocation
= m_bodyDisjunction
->terms
[beginTerm
].frameLocation
;
1670 while (m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
) {
1671 beginTerm
+= m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
;
1672 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeBodyAlternativeDisjunction
);
1673 m_bodyDisjunction
->terms
[beginTerm
].alternative
.end
= endIndex
- beginTerm
;
1674 m_bodyDisjunction
->terms
[beginTerm
].frameLocation
= frameLocation
;
1677 m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
= origBeginTerm
- beginTerm
;
1679 m_bodyDisjunction
->terms
.append(ByteTerm::BodyAlternativeEnd());
1680 m_bodyDisjunction
->terms
[endIndex
].frameLocation
= frameLocation
;
1683 void atomParenthesesSubpatternEnd(unsigned lastSubpatternId
, int inputPosition
, unsigned frameLocation
, unsigned quantityCount
, QuantifierType quantityType
, unsigned callFrameSize
= 0)
1685 unsigned beginTerm
= popParenthesesStack();
1686 closeAlternative(beginTerm
+ 1);
1687 unsigned endTerm
= m_bodyDisjunction
->terms
.size();
1689 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
1691 ByteTerm
& parenthesesBegin
= m_bodyDisjunction
->terms
[beginTerm
];
1693 bool capture
= parenthesesBegin
.capture();
1694 unsigned subpatternId
= parenthesesBegin
.atom
.subpatternId
;
1696 unsigned numSubpatterns
= lastSubpatternId
- subpatternId
+ 1;
1697 ByteDisjunction
* parenthesesDisjunction
= new ByteDisjunction(numSubpatterns
, callFrameSize
);
1699 parenthesesDisjunction
->terms
.append(ByteTerm::SubpatternBegin());
1700 for (unsigned termInParentheses
= beginTerm
+ 1; termInParentheses
< endTerm
; ++termInParentheses
)
1701 parenthesesDisjunction
->terms
.append(m_bodyDisjunction
->terms
[termInParentheses
]);
1702 parenthesesDisjunction
->terms
.append(ByteTerm::SubpatternEnd());
1704 m_bodyDisjunction
->terms
.shrink(beginTerm
);
1706 m_allParenthesesInfo
.append(parenthesesDisjunction
);
1707 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern
, subpatternId
, parenthesesDisjunction
, capture
, inputPosition
));
1709 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityCount
= quantityCount
;
1710 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityType
= quantityType
;
1711 m_bodyDisjunction
->terms
[beginTerm
].frameLocation
= frameLocation
;
1714 void atomParenthesesOnceEnd(int inputPosition
, unsigned frameLocation
, unsigned quantityCount
, QuantifierType quantityType
)
1716 unsigned beginTerm
= popParenthesesStack();
1717 closeAlternative(beginTerm
+ 1);
1718 unsigned endTerm
= m_bodyDisjunction
->terms
.size();
1720 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
1722 bool capture
= m_bodyDisjunction
->terms
[beginTerm
].capture();
1723 unsigned subpatternId
= m_bodyDisjunction
->terms
[beginTerm
].atom
.subpatternId
;
1725 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd
, subpatternId
, capture
, false, inputPosition
));
1726 m_bodyDisjunction
->terms
[beginTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1727 m_bodyDisjunction
->terms
[endTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1728 m_bodyDisjunction
->terms
[endTerm
].frameLocation
= frameLocation
;
1730 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityCount
= quantityCount
;
1731 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityType
= quantityType
;
1732 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityCount
= quantityCount
;
1733 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityType
= quantityType
;
1736 void atomParenthesesTerminalEnd(int inputPosition
, unsigned frameLocation
, unsigned quantityCount
, QuantifierType quantityType
)
1738 unsigned beginTerm
= popParenthesesStack();
1739 closeAlternative(beginTerm
+ 1);
1740 unsigned endTerm
= m_bodyDisjunction
->terms
.size();
1742 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeParenthesesSubpatternTerminalBegin
);
1744 bool capture
= m_bodyDisjunction
->terms
[beginTerm
].capture();
1745 unsigned subpatternId
= m_bodyDisjunction
->terms
[beginTerm
].atom
.subpatternId
;
1747 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd
, subpatternId
, capture
, false, inputPosition
));
1748 m_bodyDisjunction
->terms
[beginTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1749 m_bodyDisjunction
->terms
[endTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1750 m_bodyDisjunction
->terms
[endTerm
].frameLocation
= frameLocation
;
1752 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityCount
= quantityCount
;
1753 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityType
= quantityType
;
1754 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityCount
= quantityCount
;
1755 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityType
= quantityType
;
1758 void regexBegin(unsigned numSubpatterns
, unsigned callFrameSize
, bool onceThrough
)
1760 m_bodyDisjunction
= adoptPtr(new ByteDisjunction(numSubpatterns
, callFrameSize
));
1761 m_bodyDisjunction
->terms
.append(ByteTerm::BodyAlternativeBegin(onceThrough
));
1762 m_bodyDisjunction
->terms
[0].frameLocation
= 0;
1763 m_currentAlternativeIndex
= 0;
1768 closeBodyAlternative();
1771 void alternativeBodyDisjunction(bool onceThrough
)
1773 int newAlternativeIndex
= m_bodyDisjunction
->terms
.size();
1774 m_bodyDisjunction
->terms
[m_currentAlternativeIndex
].alternative
.next
= newAlternativeIndex
- m_currentAlternativeIndex
;
1775 m_bodyDisjunction
->terms
.append(ByteTerm::BodyAlternativeDisjunction(onceThrough
));
1777 m_currentAlternativeIndex
= newAlternativeIndex
;
1780 void alternativeDisjunction()
1782 int newAlternativeIndex
= m_bodyDisjunction
->terms
.size();
1783 m_bodyDisjunction
->terms
[m_currentAlternativeIndex
].alternative
.next
= newAlternativeIndex
- m_currentAlternativeIndex
;
1784 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeDisjunction());
1786 m_currentAlternativeIndex
= newAlternativeIndex
;
1789 void emitDisjunction(PatternDisjunction
* disjunction
, unsigned inputCountAlreadyChecked
= 0, unsigned parenthesesInputCountAlreadyChecked
= 0)
1791 for (unsigned alt
= 0; alt
< disjunction
->m_alternatives
.size(); ++alt
) {
1792 unsigned currentCountAlreadyChecked
= inputCountAlreadyChecked
;
1794 PatternAlternative
* alternative
= disjunction
->m_alternatives
[alt
];
1797 if (disjunction
== m_pattern
.m_body
)
1798 alternativeBodyDisjunction(alternative
->onceThrough());
1800 alternativeDisjunction();
1803 unsigned minimumSize
= alternative
->m_minimumSize
;
1804 ASSERT(minimumSize
>= parenthesesInputCountAlreadyChecked
);
1805 unsigned countToCheck
= minimumSize
- parenthesesInputCountAlreadyChecked
;
1808 checkInput(countToCheck
);
1809 currentCountAlreadyChecked
+= countToCheck
;
1812 for (unsigned i
= 0; i
< alternative
->m_terms
.size(); ++i
) {
1813 PatternTerm
& term
= alternative
->m_terms
[i
];
1815 switch (term
.type
) {
1816 case PatternTerm::TypeAssertionBOL
:
1817 assertionBOL(term
.inputPosition
- currentCountAlreadyChecked
);
1820 case PatternTerm::TypeAssertionEOL
:
1821 assertionEOL(term
.inputPosition
- currentCountAlreadyChecked
);
1824 case PatternTerm::TypeAssertionWordBoundary
:
1825 assertionWordBoundary(term
.invert(), term
.inputPosition
- currentCountAlreadyChecked
);
1828 case PatternTerm::TypePatternCharacter
:
1829 atomPatternCharacter(term
.patternCharacter
, term
.inputPosition
- currentCountAlreadyChecked
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1832 case PatternTerm::TypeCharacterClass
:
1833 atomCharacterClass(term
.characterClass
, term
.invert(), term
.inputPosition
- currentCountAlreadyChecked
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1836 case PatternTerm::TypeBackReference
:
1837 atomBackReference(term
.backReferenceSubpatternId
, term
.inputPosition
- currentCountAlreadyChecked
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1840 case PatternTerm::TypeForwardReference
:
1843 case PatternTerm::TypeParenthesesSubpattern
: {
1844 unsigned disjunctionAlreadyCheckedCount
= 0;
1845 if (term
.quantityCount
== 1 && !term
.parentheses
.isCopy
) {
1846 unsigned alternativeFrameLocation
= term
.frameLocation
;
1847 // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
1848 if (term
.quantityType
== QuantifierFixedCount
)
1849 disjunctionAlreadyCheckedCount
= term
.parentheses
.disjunction
->m_minimumSize
;
1851 alternativeFrameLocation
+= YarrStackSpaceForBackTrackInfoParenthesesOnce
;
1852 unsigned delegateEndInputOffset
= term
.inputPosition
- currentCountAlreadyChecked
;
1853 atomParenthesesOnceBegin(term
.parentheses
.subpatternId
, term
.capture(), delegateEndInputOffset
- disjunctionAlreadyCheckedCount
, term
.frameLocation
, alternativeFrameLocation
);
1854 emitDisjunction(term
.parentheses
.disjunction
, currentCountAlreadyChecked
, disjunctionAlreadyCheckedCount
);
1855 atomParenthesesOnceEnd(delegateEndInputOffset
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1856 } else if (term
.parentheses
.isTerminal
) {
1857 unsigned delegateEndInputOffset
= term
.inputPosition
- currentCountAlreadyChecked
;
1858 atomParenthesesTerminalBegin(term
.parentheses
.subpatternId
, term
.capture(), delegateEndInputOffset
- disjunctionAlreadyCheckedCount
, term
.frameLocation
, term
.frameLocation
+ YarrStackSpaceForBackTrackInfoParenthesesOnce
);
1859 emitDisjunction(term
.parentheses
.disjunction
, currentCountAlreadyChecked
, disjunctionAlreadyCheckedCount
);
1860 atomParenthesesTerminalEnd(delegateEndInputOffset
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1862 unsigned delegateEndInputOffset
= term
.inputPosition
- currentCountAlreadyChecked
;
1863 atomParenthesesSubpatternBegin(term
.parentheses
.subpatternId
, term
.capture(), delegateEndInputOffset
- disjunctionAlreadyCheckedCount
, term
.frameLocation
, 0);
1864 emitDisjunction(term
.parentheses
.disjunction
, currentCountAlreadyChecked
, 0);
1865 atomParenthesesSubpatternEnd(term
.parentheses
.lastSubpatternId
, delegateEndInputOffset
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
, term
.parentheses
.disjunction
->m_callFrameSize
);
1870 case PatternTerm::TypeParentheticalAssertion
: {
1871 unsigned alternativeFrameLocation
= term
.frameLocation
+ YarrStackSpaceForBackTrackInfoParentheticalAssertion
;
1873 ASSERT(currentCountAlreadyChecked
>= static_cast<unsigned>(term
.inputPosition
));
1874 unsigned positiveInputOffset
= currentCountAlreadyChecked
- static_cast<unsigned>(term
.inputPosition
);
1875 unsigned uncheckAmount
= 0;
1876 if (positiveInputOffset
> term
.parentheses
.disjunction
->m_minimumSize
) {
1877 uncheckAmount
= positiveInputOffset
- term
.parentheses
.disjunction
->m_minimumSize
;
1878 uncheckInput(uncheckAmount
);
1879 currentCountAlreadyChecked
-= uncheckAmount
;
1882 atomParentheticalAssertionBegin(term
.parentheses
.subpatternId
, term
.invert(), term
.frameLocation
, alternativeFrameLocation
);
1883 emitDisjunction(term
.parentheses
.disjunction
, currentCountAlreadyChecked
, positiveInputOffset
- uncheckAmount
);
1884 atomParentheticalAssertionEnd(0, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1885 if (uncheckAmount
) {
1886 checkInput(uncheckAmount
);
1887 currentCountAlreadyChecked
+= uncheckAmount
;
1892 case PatternTerm::TypeDotStarEnclosure
:
1893 assertionDotStarEnclosure(term
.anchors
.bolAnchor
, term
.anchors
.eolAnchor
);
1901 YarrPattern
& m_pattern
;
1902 OwnPtr
<ByteDisjunction
> m_bodyDisjunction
;
1903 unsigned m_currentAlternativeIndex
;
1904 Vector
<ParenthesesStackEntry
> m_parenthesesStack
;
1905 Vector
<ByteDisjunction
*> m_allParenthesesInfo
;
1908 PassOwnPtr
<BytecodePattern
> byteCompile(YarrPattern
& pattern
, BumpPointerAllocator
* allocator
)
1910 return ByteCompiler(pattern
).compile(allocator
);
1913 int interpret(BytecodePattern
* bytecode
, const UChar
* input
, unsigned start
, unsigned length
, int* output
)
1915 return Interpreter(bytecode
, output
, input
, start
, length
).interpret();
1918 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter
) == (YarrStackSpaceForBackTrackInfoPatternCharacter
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter
);
1919 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass
) == (YarrStackSpaceForBackTrackInfoCharacterClass
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass
);
1920 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference
) == (YarrStackSpaceForBackTrackInfoBackReference
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference
);
1921 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative
) == (YarrStackSpaceForBackTrackInfoAlternative
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative
);
1922 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion
) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion
);
1923 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce
) == (YarrStackSpaceForBackTrackInfoParenthesesOnce
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce
);
1924 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses
) == (YarrStackSpaceForBackTrackInfoParentheses
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses
);