2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "YarrInterpreter.h"
32 #include "YarrCanonicalizeUCS2.h"
33 #include <wtf/BumpPointerAllocator.h>
34 #include <wtf/DataLog.h>
35 #include <wtf/text/CString.h>
43 namespace JSC
{ namespace Yarr
{
45 template<typename CharType
>
48 struct ParenthesesDisjunctionContext
;
50 struct BackTrackInfoPatternCharacter
{
51 uintptr_t matchAmount
;
53 struct BackTrackInfoCharacterClass
{
54 uintptr_t matchAmount
;
56 struct BackTrackInfoBackReference
{
57 uintptr_t begin
; // Not really needed for greedy quantifiers.
58 uintptr_t matchAmount
; // Not really needed for fixed quantifiers.
60 struct BackTrackInfoAlternative
{
63 struct BackTrackInfoParentheticalAssertion
{
66 struct BackTrackInfoParenthesesOnce
{
69 struct BackTrackInfoParenthesesTerminal
{
72 struct BackTrackInfoParentheses
{
73 uintptr_t matchAmount
;
74 ParenthesesDisjunctionContext
* lastContext
;
77 static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses
* backTrack
, ParenthesesDisjunctionContext
* context
)
79 context
->next
= backTrack
->lastContext
;
80 backTrack
->lastContext
= context
;
81 ++backTrack
->matchAmount
;
84 static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses
* backTrack
)
86 ASSERT(backTrack
->matchAmount
);
87 ASSERT(backTrack
->lastContext
);
88 backTrack
->lastContext
= backTrack
->lastContext
->next
;
89 --backTrack
->matchAmount
;
92 struct DisjunctionContext
99 void* operator new(size_t, void* where
)
110 DisjunctionContext
* allocDisjunctionContext(ByteDisjunction
* disjunction
)
112 size_t size
= sizeof(DisjunctionContext
) - sizeof(uintptr_t) + disjunction
->m_frameSize
* sizeof(uintptr_t);
113 allocatorPool
= allocatorPool
->ensureCapacity(size
);
116 return new (allocatorPool
->alloc(size
)) DisjunctionContext();
119 void freeDisjunctionContext(DisjunctionContext
* context
)
121 allocatorPool
= allocatorPool
->dealloc(context
);
124 struct ParenthesesDisjunctionContext
126 ParenthesesDisjunctionContext(unsigned* output
, ByteTerm
& term
)
129 unsigned firstSubpatternId
= term
.atom
.subpatternId
;
130 unsigned numNestedSubpatterns
= term
.atom
.parenthesesDisjunction
->m_numSubpatterns
;
132 for (unsigned i
= 0; i
< (numNestedSubpatterns
<< 1); ++i
) {
133 subpatternBackup
[i
] = output
[(firstSubpatternId
<< 1) + i
];
134 output
[(firstSubpatternId
<< 1) + i
] = offsetNoMatch
;
137 new (getDisjunctionContext(term
)) DisjunctionContext();
140 void* operator new(size_t, void* where
)
145 void restoreOutput(unsigned* output
, unsigned firstSubpatternId
, unsigned numNestedSubpatterns
)
147 for (unsigned i
= 0; i
< (numNestedSubpatterns
<< 1); ++i
)
148 output
[(firstSubpatternId
<< 1) + i
] = subpatternBackup
[i
];
151 DisjunctionContext
* getDisjunctionContext(ByteTerm
& term
)
153 return reinterpret_cast<DisjunctionContext
*>(&(subpatternBackup
[term
.atom
.parenthesesDisjunction
->m_numSubpatterns
<< 1]));
156 ParenthesesDisjunctionContext
* next
;
157 unsigned subpatternBackup
[1];
160 ParenthesesDisjunctionContext
* allocParenthesesDisjunctionContext(ByteDisjunction
* disjunction
, unsigned* output
, ByteTerm
& term
)
162 size_t size
= sizeof(ParenthesesDisjunctionContext
) - sizeof(unsigned) + (term
.atom
.parenthesesDisjunction
->m_numSubpatterns
<< 1) * sizeof(unsigned) + sizeof(DisjunctionContext
) - sizeof(uintptr_t) + disjunction
->m_frameSize
* sizeof(uintptr_t);
163 allocatorPool
= allocatorPool
->ensureCapacity(size
);
166 return new (allocatorPool
->alloc(size
)) ParenthesesDisjunctionContext(output
, term
);
169 void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext
* context
)
171 allocatorPool
= allocatorPool
->dealloc(context
);
176 InputStream(const CharType
* input
, unsigned start
, unsigned length
)
188 void rewind(unsigned amount
)
190 ASSERT(pos
>= amount
);
196 ASSERT(pos
< length
);
204 ASSERT(pos
+ 1 < length
);
205 return input
[pos
] | input
[pos
+ 1] << 16;
208 int readChecked(unsigned negativePositionOffest
)
210 if (pos
< negativePositionOffest
)
212 unsigned p
= pos
- negativePositionOffest
;
217 int reread(unsigned from
)
219 ASSERT(from
< length
);
225 ASSERT(!(pos
> length
));
227 return input
[pos
- 1];
236 void setPos(unsigned p
)
248 return pos
== length
;
256 bool checkInput(unsigned count
)
258 if (((pos
+ count
) <= length
) && ((pos
+ count
) >= pos
)) {
265 void uncheckInput(unsigned count
)
272 bool atStart(unsigned negativePositionOffest
)
274 return pos
== negativePositionOffest
;
277 bool atEnd(unsigned negativePositionOffest
)
279 if (pos
< negativePositionOffest
)
281 return (pos
- negativePositionOffest
) == length
;
284 bool isAvailableInput(unsigned offset
)
286 return (((pos
+ offset
) <= length
) && ((pos
+ offset
) >= pos
));
290 const CharType
* input
;
295 bool testCharacterClass(CharacterClass
* characterClass
, int ch
)
298 for (unsigned i
= 0; i
< characterClass
->m_matchesUnicode
.size(); ++i
)
299 if (ch
== characterClass
->m_matchesUnicode
[i
])
301 for (unsigned i
= 0; i
< characterClass
->m_rangesUnicode
.size(); ++i
)
302 if ((ch
>= characterClass
->m_rangesUnicode
[i
].begin
) && (ch
<= characterClass
->m_rangesUnicode
[i
].end
))
305 for (unsigned i
= 0; i
< characterClass
->m_matches
.size(); ++i
)
306 if (ch
== characterClass
->m_matches
[i
])
308 for (unsigned i
= 0; i
< characterClass
->m_ranges
.size(); ++i
)
309 if ((ch
>= characterClass
->m_ranges
[i
].begin
) && (ch
<= characterClass
->m_ranges
[i
].end
))
316 bool checkCharacter(int testChar
, unsigned negativeInputOffset
)
318 return testChar
== input
.readChecked(negativeInputOffset
);
321 bool checkCasedCharacter(int loChar
, int hiChar
, unsigned negativeInputOffset
)
323 int ch
= input
.readChecked(negativeInputOffset
);
324 return (loChar
== ch
) || (hiChar
== ch
);
327 bool checkCharacterClass(CharacterClass
* characterClass
, bool invert
, unsigned negativeInputOffset
)
329 bool match
= testCharacterClass(characterClass
, input
.readChecked(negativeInputOffset
));
330 return invert
? !match
: match
;
333 bool tryConsumeBackReference(int matchBegin
, int matchEnd
, unsigned negativeInputOffset
)
335 unsigned matchSize
= (unsigned)(matchEnd
- matchBegin
);
337 if (!input
.checkInput(matchSize
))
340 if (pattern
->m_ignoreCase
) {
341 for (unsigned i
= 0; i
< matchSize
; ++i
) {
342 int oldCh
= input
.reread(matchBegin
+ i
);
343 int ch
= input
.readChecked(negativeInputOffset
+ matchSize
- i
);
348 // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that
349 // unicode values are never allowed to match against ascii ones.
350 if (isASCII(oldCh
) || isASCII(ch
)) {
351 if (toASCIIUpper(oldCh
) == toASCIIUpper(ch
))
353 } else if (areCanonicallyEquivalent(oldCh
, ch
))
356 input
.uncheckInput(matchSize
);
360 for (unsigned i
= 0; i
< matchSize
; ++i
) {
361 if (!checkCharacter(input
.reread(matchBegin
+ i
), negativeInputOffset
+ matchSize
- i
)) {
362 input
.uncheckInput(matchSize
);
371 bool matchAssertionBOL(ByteTerm
& term
)
373 return (input
.atStart(term
.inputPosition
)) || (pattern
->m_multiline
&& testCharacterClass(pattern
->newlineCharacterClass
, input
.readChecked(term
.inputPosition
+ 1)));
376 bool matchAssertionEOL(ByteTerm
& term
)
378 if (term
.inputPosition
)
379 return (input
.atEnd(term
.inputPosition
)) || (pattern
->m_multiline
&& testCharacterClass(pattern
->newlineCharacterClass
, input
.readChecked(term
.inputPosition
)));
381 return (input
.atEnd()) || (pattern
->m_multiline
&& testCharacterClass(pattern
->newlineCharacterClass
, input
.read()));
384 bool matchAssertionWordBoundary(ByteTerm
& term
)
386 bool prevIsWordchar
= !input
.atStart(term
.inputPosition
) && testCharacterClass(pattern
->wordcharCharacterClass
, input
.readChecked(term
.inputPosition
+ 1));
388 if (term
.inputPosition
)
389 readIsWordchar
= !input
.atEnd(term
.inputPosition
) && testCharacterClass(pattern
->wordcharCharacterClass
, input
.readChecked(term
.inputPosition
));
391 readIsWordchar
= !input
.atEnd() && testCharacterClass(pattern
->wordcharCharacterClass
, input
.read());
393 bool wordBoundary
= prevIsWordchar
!= readIsWordchar
;
394 return term
.invert() ? !wordBoundary
: wordBoundary
;
397 bool backtrackPatternCharacter(ByteTerm
& term
, DisjunctionContext
* context
)
399 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ term
.frameLocation
);
401 switch (term
.atom
.quantityType
) {
402 case QuantifierFixedCount
:
405 case QuantifierGreedy
:
406 if (backTrack
->matchAmount
) {
407 --backTrack
->matchAmount
;
408 input
.uncheckInput(1);
413 case QuantifierNonGreedy
:
414 if ((backTrack
->matchAmount
< term
.atom
.quantityCount
) && input
.checkInput(1)) {
415 ++backTrack
->matchAmount
;
416 if (checkCharacter(term
.atom
.patternCharacter
, term
.inputPosition
+ 1))
419 input
.uncheckInput(backTrack
->matchAmount
);
426 bool backtrackPatternCasedCharacter(ByteTerm
& term
, DisjunctionContext
* context
)
428 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ term
.frameLocation
);
430 switch (term
.atom
.quantityType
) {
431 case QuantifierFixedCount
:
434 case QuantifierGreedy
:
435 if (backTrack
->matchAmount
) {
436 --backTrack
->matchAmount
;
437 input
.uncheckInput(1);
442 case QuantifierNonGreedy
:
443 if ((backTrack
->matchAmount
< term
.atom
.quantityCount
) && input
.checkInput(1)) {
444 ++backTrack
->matchAmount
;
445 if (checkCasedCharacter(term
.atom
.casedCharacter
.lo
, term
.atom
.casedCharacter
.hi
, term
.inputPosition
+ 1))
448 input
.uncheckInput(backTrack
->matchAmount
);
455 bool matchCharacterClass(ByteTerm
& term
, DisjunctionContext
* context
)
457 ASSERT(term
.type
== ByteTerm::TypeCharacterClass
);
458 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ term
.frameLocation
);
460 switch (term
.atom
.quantityType
) {
461 case QuantifierFixedCount
: {
462 for (unsigned matchAmount
= 0; matchAmount
< term
.atom
.quantityCount
; ++matchAmount
) {
463 if (!checkCharacterClass(term
.atom
.characterClass
, term
.invert(), term
.inputPosition
- matchAmount
))
469 case QuantifierGreedy
: {
470 unsigned matchAmount
= 0;
471 while ((matchAmount
< term
.atom
.quantityCount
) && input
.checkInput(1)) {
472 if (!checkCharacterClass(term
.atom
.characterClass
, term
.invert(), term
.inputPosition
+ 1)) {
473 input
.uncheckInput(1);
478 backTrack
->matchAmount
= matchAmount
;
483 case QuantifierNonGreedy
:
484 backTrack
->matchAmount
= 0;
488 ASSERT_NOT_REACHED();
492 bool backtrackCharacterClass(ByteTerm
& term
, DisjunctionContext
* context
)
494 ASSERT(term
.type
== ByteTerm::TypeCharacterClass
);
495 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ term
.frameLocation
);
497 switch (term
.atom
.quantityType
) {
498 case QuantifierFixedCount
:
501 case QuantifierGreedy
:
502 if (backTrack
->matchAmount
) {
503 --backTrack
->matchAmount
;
504 input
.uncheckInput(1);
509 case QuantifierNonGreedy
:
510 if ((backTrack
->matchAmount
< term
.atom
.quantityCount
) && input
.checkInput(1)) {
511 ++backTrack
->matchAmount
;
512 if (checkCharacterClass(term
.atom
.characterClass
, term
.invert(), term
.inputPosition
+ 1))
515 input
.uncheckInput(backTrack
->matchAmount
);
522 bool matchBackReference(ByteTerm
& term
, DisjunctionContext
* context
)
524 ASSERT(term
.type
== ByteTerm::TypeBackReference
);
525 BackTrackInfoBackReference
* backTrack
= reinterpret_cast<BackTrackInfoBackReference
*>(context
->frame
+ term
.frameLocation
);
527 unsigned matchBegin
= output
[(term
.atom
.subpatternId
<< 1)];
528 unsigned matchEnd
= output
[(term
.atom
.subpatternId
<< 1) + 1];
530 // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
531 // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
533 if (matchEnd
== offsetNoMatch
)
536 if (matchBegin
== offsetNoMatch
)
539 ASSERT(matchBegin
<= matchEnd
);
541 if (matchBegin
== matchEnd
)
544 switch (term
.atom
.quantityType
) {
545 case QuantifierFixedCount
: {
546 backTrack
->begin
= input
.getPos();
547 for (unsigned matchAmount
= 0; matchAmount
< term
.atom
.quantityCount
; ++matchAmount
) {
548 if (!tryConsumeBackReference(matchBegin
, matchEnd
, term
.inputPosition
)) {
549 input
.setPos(backTrack
->begin
);
556 case QuantifierGreedy
: {
557 unsigned matchAmount
= 0;
558 while ((matchAmount
< term
.atom
.quantityCount
) && tryConsumeBackReference(matchBegin
, matchEnd
, term
.inputPosition
))
560 backTrack
->matchAmount
= matchAmount
;
564 case QuantifierNonGreedy
:
565 backTrack
->begin
= input
.getPos();
566 backTrack
->matchAmount
= 0;
570 ASSERT_NOT_REACHED();
574 bool backtrackBackReference(ByteTerm
& term
, DisjunctionContext
* context
)
576 ASSERT(term
.type
== ByteTerm::TypeBackReference
);
577 BackTrackInfoBackReference
* backTrack
= reinterpret_cast<BackTrackInfoBackReference
*>(context
->frame
+ term
.frameLocation
);
579 unsigned matchBegin
= output
[(term
.atom
.subpatternId
<< 1)];
580 unsigned matchEnd
= output
[(term
.atom
.subpatternId
<< 1) + 1];
582 if (matchBegin
== offsetNoMatch
)
585 ASSERT(matchBegin
<= matchEnd
);
587 if (matchBegin
== matchEnd
)
590 switch (term
.atom
.quantityType
) {
591 case QuantifierFixedCount
:
592 // for quantityCount == 1, could rewind.
593 input
.setPos(backTrack
->begin
);
596 case QuantifierGreedy
:
597 if (backTrack
->matchAmount
) {
598 --backTrack
->matchAmount
;
599 input
.rewind(matchEnd
- matchBegin
);
604 case QuantifierNonGreedy
:
605 if ((backTrack
->matchAmount
< term
.atom
.quantityCount
) && tryConsumeBackReference(matchBegin
, matchEnd
, term
.inputPosition
)) {
606 ++backTrack
->matchAmount
;
609 input
.setPos(backTrack
->begin
);
616 void recordParenthesesMatch(ByteTerm
& term
, ParenthesesDisjunctionContext
* context
)
618 if (term
.capture()) {
619 unsigned subpatternId
= term
.atom
.subpatternId
;
620 output
[(subpatternId
<< 1)] = context
->getDisjunctionContext(term
)->matchBegin
+ term
.inputPosition
;
621 output
[(subpatternId
<< 1) + 1] = context
->getDisjunctionContext(term
)->matchEnd
+ term
.inputPosition
;
624 void resetMatches(ByteTerm
& term
, ParenthesesDisjunctionContext
* context
)
626 unsigned firstSubpatternId
= term
.atom
.subpatternId
;
627 unsigned count
= term
.atom
.parenthesesDisjunction
->m_numSubpatterns
;
628 context
->restoreOutput(output
, firstSubpatternId
, count
);
630 JSRegExpResult
parenthesesDoBacktrack(ByteTerm
& term
, BackTrackInfoParentheses
* backTrack
)
632 while (backTrack
->matchAmount
) {
633 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
635 JSRegExpResult result
= matchDisjunction(term
.atom
.parenthesesDisjunction
, context
->getDisjunctionContext(term
), true);
636 if (result
== JSRegExpMatch
)
637 return JSRegExpMatch
;
639 resetMatches(term
, context
);
640 popParenthesesDisjunctionContext(backTrack
);
641 freeParenthesesDisjunctionContext(context
);
643 if (result
!= JSRegExpNoMatch
)
647 return JSRegExpNoMatch
;
650 bool matchParenthesesOnceBegin(ByteTerm
& term
, DisjunctionContext
* context
)
652 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
653 ASSERT(term
.atom
.quantityCount
== 1);
655 BackTrackInfoParenthesesOnce
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesOnce
*>(context
->frame
+ term
.frameLocation
);
657 switch (term
.atom
.quantityType
) {
658 case QuantifierGreedy
: {
659 // set this speculatively; if we get to the parens end this will be true.
660 backTrack
->begin
= input
.getPos();
663 case QuantifierNonGreedy
: {
664 backTrack
->begin
= notFound
;
665 context
->term
+= term
.atom
.parenthesesWidth
;
668 case QuantifierFixedCount
:
672 if (term
.capture()) {
673 unsigned subpatternId
= term
.atom
.subpatternId
;
674 output
[(subpatternId
<< 1)] = input
.getPos() - term
.inputPosition
;
680 bool matchParenthesesOnceEnd(ByteTerm
& term
, DisjunctionContext
* context
)
682 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternOnceEnd
);
683 ASSERT(term
.atom
.quantityCount
== 1);
685 if (term
.capture()) {
686 unsigned subpatternId
= term
.atom
.subpatternId
;
687 output
[(subpatternId
<< 1) + 1] = input
.getPos() + term
.inputPosition
;
690 if (term
.atom
.quantityType
== QuantifierFixedCount
)
693 BackTrackInfoParenthesesOnce
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesOnce
*>(context
->frame
+ term
.frameLocation
);
694 return backTrack
->begin
!= input
.getPos();
697 bool backtrackParenthesesOnceBegin(ByteTerm
& term
, DisjunctionContext
* context
)
699 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
700 ASSERT(term
.atom
.quantityCount
== 1);
702 BackTrackInfoParenthesesOnce
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesOnce
*>(context
->frame
+ term
.frameLocation
);
704 if (term
.capture()) {
705 unsigned subpatternId
= term
.atom
.subpatternId
;
706 output
[(subpatternId
<< 1)] = offsetNoMatch
;
707 output
[(subpatternId
<< 1) + 1] = offsetNoMatch
;
710 switch (term
.atom
.quantityType
) {
711 case QuantifierGreedy
:
712 // if we backtrack to this point, there is another chance - try matching nothing.
713 ASSERT(backTrack
->begin
!= notFound
);
714 backTrack
->begin
= notFound
;
715 context
->term
+= term
.atom
.parenthesesWidth
;
717 case QuantifierNonGreedy
:
718 ASSERT(backTrack
->begin
!= notFound
);
719 case QuantifierFixedCount
:
726 bool backtrackParenthesesOnceEnd(ByteTerm
& term
, DisjunctionContext
* context
)
728 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternOnceEnd
);
729 ASSERT(term
.atom
.quantityCount
== 1);
731 BackTrackInfoParenthesesOnce
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesOnce
*>(context
->frame
+ term
.frameLocation
);
733 switch (term
.atom
.quantityType
) {
734 case QuantifierGreedy
:
735 if (backTrack
->begin
== notFound
) {
736 context
->term
-= term
.atom
.parenthesesWidth
;
739 case QuantifierNonGreedy
:
740 if (backTrack
->begin
== notFound
) {
741 backTrack
->begin
= input
.getPos();
742 if (term
.capture()) {
743 // Technically this access to inputPosition should be accessing the begin term's
744 // inputPosition, but for repeats other than fixed these values should be
745 // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
746 ASSERT((&term
- term
.atom
.parenthesesWidth
)->type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
747 ASSERT((&term
- term
.atom
.parenthesesWidth
)->inputPosition
== term
.inputPosition
);
748 unsigned subpatternId
= term
.atom
.subpatternId
;
749 output
[subpatternId
<< 1] = input
.getPos() + term
.inputPosition
;
751 context
->term
-= term
.atom
.parenthesesWidth
;
754 case QuantifierFixedCount
:
761 bool matchParenthesesTerminalBegin(ByteTerm
& term
, DisjunctionContext
* context
)
763 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternTerminalBegin
);
764 ASSERT(term
.atom
.quantityType
== QuantifierGreedy
);
765 ASSERT(term
.atom
.quantityCount
== quantifyInfinite
);
766 ASSERT(!term
.capture());
768 BackTrackInfoParenthesesTerminal
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesTerminal
*>(context
->frame
+ term
.frameLocation
);
769 backTrack
->begin
= input
.getPos();
773 bool matchParenthesesTerminalEnd(ByteTerm
& term
, DisjunctionContext
* context
)
775 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternTerminalEnd
);
777 BackTrackInfoParenthesesTerminal
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesTerminal
*>(context
->frame
+ term
.frameLocation
);
778 // Empty match is a failed match.
779 if (backTrack
->begin
== input
.getPos())
782 // Successful match! Okay, what's next? - loop around and try to match moar!
783 context
->term
-= (term
.atom
.parenthesesWidth
+ 1);
787 bool backtrackParenthesesTerminalBegin(ByteTerm
& term
, DisjunctionContext
* context
)
789 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternTerminalBegin
);
790 ASSERT(term
.atom
.quantityType
== QuantifierGreedy
);
791 ASSERT(term
.atom
.quantityCount
== quantifyInfinite
);
792 ASSERT(!term
.capture());
794 // If we backtrack to this point, we have failed to match this iteration of the parens.
795 // Since this is greedy / zero minimum a failed is also accepted as a match!
796 context
->term
+= term
.atom
.parenthesesWidth
;
800 bool backtrackParenthesesTerminalEnd(ByteTerm
&, DisjunctionContext
*)
802 // 'Terminal' parentheses are at the end of the regex, and as such a match past end
803 // should always be returned as a successful match - we should never backtrack to here.
804 ASSERT_NOT_REACHED();
808 bool matchParentheticalAssertionBegin(ByteTerm
& term
, DisjunctionContext
* context
)
810 ASSERT(term
.type
== ByteTerm::TypeParentheticalAssertionBegin
);
811 ASSERT(term
.atom
.quantityCount
== 1);
813 BackTrackInfoParentheticalAssertion
* backTrack
= reinterpret_cast<BackTrackInfoParentheticalAssertion
*>(context
->frame
+ term
.frameLocation
);
815 backTrack
->begin
= input
.getPos();
819 bool matchParentheticalAssertionEnd(ByteTerm
& term
, DisjunctionContext
* context
)
821 ASSERT(term
.type
== ByteTerm::TypeParentheticalAssertionEnd
);
822 ASSERT(term
.atom
.quantityCount
== 1);
824 BackTrackInfoParentheticalAssertion
* backTrack
= reinterpret_cast<BackTrackInfoParentheticalAssertion
*>(context
->frame
+ term
.frameLocation
);
826 input
.setPos(backTrack
->begin
);
828 // We've reached the end of the parens; if they are inverted, this is failure.
830 context
->term
-= term
.atom
.parenthesesWidth
;
837 bool backtrackParentheticalAssertionBegin(ByteTerm
& term
, DisjunctionContext
* context
)
839 ASSERT(term
.type
== ByteTerm::TypeParentheticalAssertionBegin
);
840 ASSERT(term
.atom
.quantityCount
== 1);
842 // We've failed to match parens; if they are inverted, this is win!
844 context
->term
+= term
.atom
.parenthesesWidth
;
851 bool backtrackParentheticalAssertionEnd(ByteTerm
& term
, DisjunctionContext
* context
)
853 ASSERT(term
.type
== ByteTerm::TypeParentheticalAssertionEnd
);
854 ASSERT(term
.atom
.quantityCount
== 1);
856 BackTrackInfoParentheticalAssertion
* backTrack
= reinterpret_cast<BackTrackInfoParentheticalAssertion
*>(context
->frame
+ term
.frameLocation
);
858 input
.setPos(backTrack
->begin
);
860 context
->term
-= term
.atom
.parenthesesWidth
;
864 JSRegExpResult
matchParentheses(ByteTerm
& term
, DisjunctionContext
* context
)
866 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpattern
);
868 BackTrackInfoParentheses
* backTrack
= reinterpret_cast<BackTrackInfoParentheses
*>(context
->frame
+ term
.frameLocation
);
869 ByteDisjunction
* disjunctionBody
= term
.atom
.parenthesesDisjunction
;
871 backTrack
->matchAmount
= 0;
872 backTrack
->lastContext
= 0;
874 switch (term
.atom
.quantityType
) {
875 case QuantifierFixedCount
: {
876 // While we haven't yet reached our fixed limit,
877 while (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
878 // Try to do a match, and it it succeeds, add it to the list.
879 ParenthesesDisjunctionContext
* context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
880 JSRegExpResult result
= matchDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
881 if (result
== JSRegExpMatch
)
882 appendParenthesesDisjunctionContext(backTrack
, context
);
884 // The match failed; try to find an alternate point to carry on from.
885 resetMatches(term
, context
);
886 freeParenthesesDisjunctionContext(context
);
888 if (result
!= JSRegExpNoMatch
)
890 JSRegExpResult backtrackResult
= parenthesesDoBacktrack(term
, backTrack
);
891 if (backtrackResult
!= JSRegExpMatch
)
892 return backtrackResult
;
896 ASSERT(backTrack
->matchAmount
== term
.atom
.quantityCount
);
897 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
898 recordParenthesesMatch(term
, context
);
899 return JSRegExpMatch
;
902 case QuantifierGreedy
: {
903 while (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
904 ParenthesesDisjunctionContext
* context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
905 JSRegExpResult result
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
906 if (result
== JSRegExpMatch
)
907 appendParenthesesDisjunctionContext(backTrack
, context
);
909 resetMatches(term
, context
);
910 freeParenthesesDisjunctionContext(context
);
912 if (result
!= JSRegExpNoMatch
)
919 if (backTrack
->matchAmount
) {
920 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
921 recordParenthesesMatch(term
, context
);
923 return JSRegExpMatch
;
926 case QuantifierNonGreedy
:
927 return JSRegExpMatch
;
930 ASSERT_NOT_REACHED();
931 return JSRegExpErrorNoMatch
;
934 // Rules for backtracking differ depending on whether this is greedy or non-greedy.
936 // Greedy matches never should try just adding more - you should already have done
937 // the 'more' cases. Always backtrack, at least a leetle bit. However cases where
938 // you backtrack an item off the list needs checking, since we'll never have matched
939 // the one less case. Tracking forwards, still add as much as possible.
941 // Non-greedy, we've already done the one less case, so don't match on popping.
942 // We haven't done the one more case, so always try to add that.
944 JSRegExpResult
backtrackParentheses(ByteTerm
& term
, DisjunctionContext
* context
)
946 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpattern
);
948 BackTrackInfoParentheses
* backTrack
= reinterpret_cast<BackTrackInfoParentheses
*>(context
->frame
+ term
.frameLocation
);
949 ByteDisjunction
* disjunctionBody
= term
.atom
.parenthesesDisjunction
;
951 switch (term
.atom
.quantityType
) {
952 case QuantifierFixedCount
: {
953 ASSERT(backTrack
->matchAmount
== term
.atom
.quantityCount
);
955 ParenthesesDisjunctionContext
* context
= 0;
956 JSRegExpResult result
= parenthesesDoBacktrack(term
, backTrack
);
958 if (result
!= JSRegExpMatch
)
961 // While we haven't yet reached our fixed limit,
962 while (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
963 // Try to do a match, and it it succeeds, add it to the list.
964 context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
965 result
= matchDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
967 if (result
== JSRegExpMatch
)
968 appendParenthesesDisjunctionContext(backTrack
, context
);
970 // The match failed; try to find an alternate point to carry on from.
971 resetMatches(term
, context
);
972 freeParenthesesDisjunctionContext(context
);
974 if (result
!= JSRegExpNoMatch
)
976 JSRegExpResult backtrackResult
= parenthesesDoBacktrack(term
, backTrack
);
977 if (backtrackResult
!= JSRegExpMatch
)
978 return backtrackResult
;
982 ASSERT(backTrack
->matchAmount
== term
.atom
.quantityCount
);
983 context
= backTrack
->lastContext
;
984 recordParenthesesMatch(term
, context
);
985 return JSRegExpMatch
;
988 case QuantifierGreedy
: {
989 if (!backTrack
->matchAmount
)
990 return JSRegExpNoMatch
;
992 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
993 JSRegExpResult result
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
), true);
994 if (result
== JSRegExpMatch
) {
995 while (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
996 ParenthesesDisjunctionContext
* context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
997 JSRegExpResult parenthesesResult
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
998 if (parenthesesResult
== JSRegExpMatch
)
999 appendParenthesesDisjunctionContext(backTrack
, context
);
1001 resetMatches(term
, context
);
1002 freeParenthesesDisjunctionContext(context
);
1004 if (parenthesesResult
!= JSRegExpNoMatch
)
1005 return parenthesesResult
;
1011 resetMatches(term
, context
);
1012 popParenthesesDisjunctionContext(backTrack
);
1013 freeParenthesesDisjunctionContext(context
);
1015 if (result
!= JSRegExpNoMatch
)
1019 if (backTrack
->matchAmount
) {
1020 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
1021 recordParenthesesMatch(term
, context
);
1023 return JSRegExpMatch
;
1026 case QuantifierNonGreedy
: {
1027 // If we've not reached the limit, try to add one more match.
1028 if (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
1029 ParenthesesDisjunctionContext
* context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
1030 JSRegExpResult result
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
1031 if (result
== JSRegExpMatch
) {
1032 appendParenthesesDisjunctionContext(backTrack
, context
);
1033 recordParenthesesMatch(term
, context
);
1034 return JSRegExpMatch
;
1037 resetMatches(term
, context
);
1038 freeParenthesesDisjunctionContext(context
);
1040 if (result
!= JSRegExpNoMatch
)
1044 // Nope - okay backtrack looking for an alternative.
1045 while (backTrack
->matchAmount
) {
1046 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
1047 JSRegExpResult result
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
), true);
1048 if (result
== JSRegExpMatch
) {
1049 // successful backtrack! we're back in the game!
1050 if (backTrack
->matchAmount
) {
1051 context
= backTrack
->lastContext
;
1052 recordParenthesesMatch(term
, context
);
1054 return JSRegExpMatch
;
1057 // pop a match off the stack
1058 resetMatches(term
, context
);
1059 popParenthesesDisjunctionContext(backTrack
);
1060 freeParenthesesDisjunctionContext(context
);
1062 if (result
!= JSRegExpNoMatch
)
1066 return JSRegExpNoMatch
;
1070 ASSERT_NOT_REACHED();
1071 return JSRegExpErrorNoMatch
;
1074 bool matchDotStarEnclosure(ByteTerm
& term
, DisjunctionContext
* context
)
1077 unsigned matchBegin
= context
->matchBegin
;
1080 for (matchBegin
--; true; matchBegin
--) {
1081 if (testCharacterClass(pattern
->newlineCharacterClass
, input
.reread(matchBegin
))) {
1091 unsigned matchEnd
= input
.getPos();
1093 for (; (matchEnd
!= input
.end())
1094 && (!testCharacterClass(pattern
->newlineCharacterClass
, input
.reread(matchEnd
))); matchEnd
++) { }
1096 if (((matchBegin
&& term
.anchors
.m_bol
)
1097 || ((matchEnd
!= input
.end()) && term
.anchors
.m_eol
))
1098 && !pattern
->m_multiline
)
1101 context
->matchBegin
= matchBegin
;
1102 context
->matchEnd
= matchEnd
;
1106 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
1107 #define BACKTRACK() { --context->term; goto backtrack; }
1108 #define currentTerm() (disjunction->terms[context->term])
1109 JSRegExpResult
matchDisjunction(ByteDisjunction
* disjunction
, DisjunctionContext
* context
, bool btrack
= false)
1111 if (!--remainingMatchCount
)
1112 return JSRegExpErrorHitLimit
;
1117 context
->matchBegin
= input
.getPos();
1121 ASSERT(context
->term
< static_cast<int>(disjunction
->terms
.size()));
1123 switch (currentTerm().type
) {
1124 case ByteTerm::TypeSubpatternBegin
:
1126 case ByteTerm::TypeSubpatternEnd
:
1127 context
->matchEnd
= input
.getPos();
1128 return JSRegExpMatch
;
1130 case ByteTerm::TypeBodyAlternativeBegin
:
1132 case ByteTerm::TypeBodyAlternativeDisjunction
:
1133 case ByteTerm::TypeBodyAlternativeEnd
:
1134 context
->matchEnd
= input
.getPos();
1135 return JSRegExpMatch
;
1137 case ByteTerm::TypeAlternativeBegin
:
1139 case ByteTerm::TypeAlternativeDisjunction
:
1140 case ByteTerm::TypeAlternativeEnd
: {
1141 int offset
= currentTerm().alternative
.end
;
1142 BackTrackInfoAlternative
* backTrack
= reinterpret_cast<BackTrackInfoAlternative
*>(context
->frame
+ currentTerm().frameLocation
);
1143 backTrack
->offset
= offset
;
1144 context
->term
+= offset
;
1148 case ByteTerm::TypeAssertionBOL
:
1149 if (matchAssertionBOL(currentTerm()))
1152 case ByteTerm::TypeAssertionEOL
:
1153 if (matchAssertionEOL(currentTerm()))
1156 case ByteTerm::TypeAssertionWordBoundary
:
1157 if (matchAssertionWordBoundary(currentTerm()))
1161 case ByteTerm::TypePatternCharacterOnce
:
1162 case ByteTerm::TypePatternCharacterFixed
: {
1163 for (unsigned matchAmount
= 0; matchAmount
< currentTerm().atom
.quantityCount
; ++matchAmount
) {
1164 if (!checkCharacter(currentTerm().atom
.patternCharacter
, currentTerm().inputPosition
- matchAmount
))
1169 case ByteTerm::TypePatternCharacterGreedy
: {
1170 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ currentTerm().frameLocation
);
1171 unsigned matchAmount
= 0;
1172 while ((matchAmount
< currentTerm().atom
.quantityCount
) && input
.checkInput(1)) {
1173 if (!checkCharacter(currentTerm().atom
.patternCharacter
, currentTerm().inputPosition
+ 1)) {
1174 input
.uncheckInput(1);
1179 backTrack
->matchAmount
= matchAmount
;
1183 case ByteTerm::TypePatternCharacterNonGreedy
: {
1184 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ currentTerm().frameLocation
);
1185 backTrack
->matchAmount
= 0;
1189 case ByteTerm::TypePatternCasedCharacterOnce
:
1190 case ByteTerm::TypePatternCasedCharacterFixed
: {
1191 for (unsigned matchAmount
= 0; matchAmount
< currentTerm().atom
.quantityCount
; ++matchAmount
) {
1192 if (!checkCasedCharacter(currentTerm().atom
.casedCharacter
.lo
, currentTerm().atom
.casedCharacter
.hi
, currentTerm().inputPosition
- matchAmount
))
1197 case ByteTerm::TypePatternCasedCharacterGreedy
: {
1198 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ currentTerm().frameLocation
);
1199 unsigned matchAmount
= 0;
1200 while ((matchAmount
< currentTerm().atom
.quantityCount
) && input
.checkInput(1)) {
1201 if (!checkCasedCharacter(currentTerm().atom
.casedCharacter
.lo
, currentTerm().atom
.casedCharacter
.hi
, currentTerm().inputPosition
+ 1)) {
1202 input
.uncheckInput(1);
1207 backTrack
->matchAmount
= matchAmount
;
1211 case ByteTerm::TypePatternCasedCharacterNonGreedy
: {
1212 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ currentTerm().frameLocation
);
1213 backTrack
->matchAmount
= 0;
1217 case ByteTerm::TypeCharacterClass
:
1218 if (matchCharacterClass(currentTerm(), context
))
1221 case ByteTerm::TypeBackReference
:
1222 if (matchBackReference(currentTerm(), context
))
1225 case ByteTerm::TypeParenthesesSubpattern
: {
1226 JSRegExpResult result
= matchParentheses(currentTerm(), context
);
1228 if (result
== JSRegExpMatch
) {
1230 } else if (result
!= JSRegExpNoMatch
)
1235 case ByteTerm::TypeParenthesesSubpatternOnceBegin
:
1236 if (matchParenthesesOnceBegin(currentTerm(), context
))
1239 case ByteTerm::TypeParenthesesSubpatternOnceEnd
:
1240 if (matchParenthesesOnceEnd(currentTerm(), context
))
1243 case ByteTerm::TypeParenthesesSubpatternTerminalBegin
:
1244 if (matchParenthesesTerminalBegin(currentTerm(), context
))
1247 case ByteTerm::TypeParenthesesSubpatternTerminalEnd
:
1248 if (matchParenthesesTerminalEnd(currentTerm(), context
))
1251 case ByteTerm::TypeParentheticalAssertionBegin
:
1252 if (matchParentheticalAssertionBegin(currentTerm(), context
))
1255 case ByteTerm::TypeParentheticalAssertionEnd
:
1256 if (matchParentheticalAssertionEnd(currentTerm(), context
))
1260 case ByteTerm::TypeCheckInput
:
1261 if (input
.checkInput(currentTerm().checkInputCount
))
1265 case ByteTerm::TypeUncheckInput
:
1266 input
.uncheckInput(currentTerm().checkInputCount
);
1269 case ByteTerm::TypeDotStarEnclosure
:
1270 if (matchDotStarEnclosure(currentTerm(), context
))
1271 return JSRegExpMatch
;
1275 // We should never fall-through to here.
1276 ASSERT_NOT_REACHED();
1279 ASSERT(context
->term
< static_cast<int>(disjunction
->terms
.size()));
1281 switch (currentTerm().type
) {
1282 case ByteTerm::TypeSubpatternBegin
:
1283 return JSRegExpNoMatch
;
1284 case ByteTerm::TypeSubpatternEnd
:
1285 ASSERT_NOT_REACHED();
1287 case ByteTerm::TypeBodyAlternativeBegin
:
1288 case ByteTerm::TypeBodyAlternativeDisjunction
: {
1289 int offset
= currentTerm().alternative
.next
;
1290 context
->term
+= offset
;
1295 return JSRegExpNoMatch
;
1299 context
->matchBegin
= input
.getPos();
1301 if (currentTerm().alternative
.onceThrough
)
1302 context
->term
+= currentTerm().alternative
.next
;
1306 case ByteTerm::TypeBodyAlternativeEnd
:
1307 ASSERT_NOT_REACHED();
1309 case ByteTerm::TypeAlternativeBegin
:
1310 case ByteTerm::TypeAlternativeDisjunction
: {
1311 int offset
= currentTerm().alternative
.next
;
1312 context
->term
+= offset
;
1317 case ByteTerm::TypeAlternativeEnd
: {
1318 // We should never backtrack back into an alternative of the main body of the regex.
1319 BackTrackInfoAlternative
* backTrack
= reinterpret_cast<BackTrackInfoAlternative
*>(context
->frame
+ currentTerm().frameLocation
);
1320 unsigned offset
= backTrack
->offset
;
1321 context
->term
-= offset
;
1325 case ByteTerm::TypeAssertionBOL
:
1326 case ByteTerm::TypeAssertionEOL
:
1327 case ByteTerm::TypeAssertionWordBoundary
:
1330 case ByteTerm::TypePatternCharacterOnce
:
1331 case ByteTerm::TypePatternCharacterFixed
:
1332 case ByteTerm::TypePatternCharacterGreedy
:
1333 case ByteTerm::TypePatternCharacterNonGreedy
:
1334 if (backtrackPatternCharacter(currentTerm(), context
))
1337 case ByteTerm::TypePatternCasedCharacterOnce
:
1338 case ByteTerm::TypePatternCasedCharacterFixed
:
1339 case ByteTerm::TypePatternCasedCharacterGreedy
:
1340 case ByteTerm::TypePatternCasedCharacterNonGreedy
:
1341 if (backtrackPatternCasedCharacter(currentTerm(), context
))
1344 case ByteTerm::TypeCharacterClass
:
1345 if (backtrackCharacterClass(currentTerm(), context
))
1348 case ByteTerm::TypeBackReference
:
1349 if (backtrackBackReference(currentTerm(), context
))
1352 case ByteTerm::TypeParenthesesSubpattern
: {
1353 JSRegExpResult result
= backtrackParentheses(currentTerm(), context
);
1355 if (result
== JSRegExpMatch
) {
1357 } else if (result
!= JSRegExpNoMatch
)
1362 case ByteTerm::TypeParenthesesSubpatternOnceBegin
:
1363 if (backtrackParenthesesOnceBegin(currentTerm(), context
))
1366 case ByteTerm::TypeParenthesesSubpatternOnceEnd
:
1367 if (backtrackParenthesesOnceEnd(currentTerm(), context
))
1370 case ByteTerm::TypeParenthesesSubpatternTerminalBegin
:
1371 if (backtrackParenthesesTerminalBegin(currentTerm(), context
))
1374 case ByteTerm::TypeParenthesesSubpatternTerminalEnd
:
1375 if (backtrackParenthesesTerminalEnd(currentTerm(), context
))
1378 case ByteTerm::TypeParentheticalAssertionBegin
:
1379 if (backtrackParentheticalAssertionBegin(currentTerm(), context
))
1382 case ByteTerm::TypeParentheticalAssertionEnd
:
1383 if (backtrackParentheticalAssertionEnd(currentTerm(), context
))
1387 case ByteTerm::TypeCheckInput
:
1388 input
.uncheckInput(currentTerm().checkInputCount
);
1391 case ByteTerm::TypeUncheckInput
:
1392 input
.checkInput(currentTerm().checkInputCount
);
1395 case ByteTerm::TypeDotStarEnclosure
:
1396 ASSERT_NOT_REACHED();
1399 ASSERT_NOT_REACHED();
1400 return JSRegExpErrorNoMatch
;
1403 JSRegExpResult
matchNonZeroDisjunction(ByteDisjunction
* disjunction
, DisjunctionContext
* context
, bool btrack
= false)
1405 JSRegExpResult result
= matchDisjunction(disjunction
, context
, btrack
);
1407 if (result
== JSRegExpMatch
) {
1408 while (context
->matchBegin
== context
->matchEnd
) {
1409 result
= matchDisjunction(disjunction
, context
, true);
1410 if (result
!= JSRegExpMatch
)
1413 return JSRegExpMatch
;
1419 unsigned interpret()
1421 if (!input
.isAvailableInput(0))
1422 return offsetNoMatch
;
1424 for (unsigned i
= 0; i
< pattern
->m_body
->m_numSubpatterns
+ 1; ++i
)
1425 output
[i
<< 1] = offsetNoMatch
;
1427 allocatorPool
= pattern
->m_allocator
->startAllocator();
1431 DisjunctionContext
* context
= allocDisjunctionContext(pattern
->m_body
.get());
1433 JSRegExpResult result
= matchDisjunction(pattern
->m_body
.get(), context
, false);
1434 if (result
== JSRegExpMatch
) {
1435 output
[0] = context
->matchBegin
;
1436 output
[1] = context
->matchEnd
;
1439 freeDisjunctionContext(context
);
1441 pattern
->m_allocator
->stopAllocator();
1443 ASSERT((result
== JSRegExpMatch
) == (output
[0] != offsetNoMatch
));
1447 Interpreter(BytecodePattern
* pattern
, unsigned* output
, const CharType
* input
, unsigned length
, unsigned start
)
1450 , input(input
, start
, length
)
1452 , remainingMatchCount(matchLimit
)
1457 BytecodePattern
* pattern
;
1460 BumpPointerPool
* allocatorPool
;
1461 unsigned remainingMatchCount
;
1466 class ByteCompiler
{
1467 struct ParenthesesStackEntry
{
1469 unsigned savedAlternativeIndex
;
1470 ParenthesesStackEntry(unsigned beginTerm
, unsigned savedAlternativeIndex
/*, unsigned subpatternId, bool capture = false*/)
1471 : beginTerm(beginTerm
)
1472 , savedAlternativeIndex(savedAlternativeIndex
)
1478 ByteCompiler(YarrPattern
& pattern
)
1479 : m_pattern(pattern
)
1481 m_currentAlternativeIndex
= 0;
1484 PassOwnPtr
<BytecodePattern
> compile(BumpPointerAllocator
* allocator
)
1486 regexBegin(m_pattern
.m_numSubpatterns
, m_pattern
.m_body
->m_callFrameSize
, m_pattern
.m_body
->m_alternatives
[0]->onceThrough());
1487 emitDisjunction(m_pattern
.m_body
);
1490 return adoptPtr(new BytecodePattern(m_bodyDisjunction
.release(), m_allParenthesesInfo
, m_pattern
, allocator
));
1493 void checkInput(unsigned count
)
1495 m_bodyDisjunction
->terms
.append(ByteTerm::CheckInput(count
));
1498 void uncheckInput(unsigned count
)
1500 m_bodyDisjunction
->terms
.append(ByteTerm::UncheckInput(count
));
1503 void assertionBOL(unsigned inputPosition
)
1505 m_bodyDisjunction
->terms
.append(ByteTerm::BOL(inputPosition
));
1508 void assertionEOL(unsigned inputPosition
)
1510 m_bodyDisjunction
->terms
.append(ByteTerm::EOL(inputPosition
));
1513 void assertionWordBoundary(bool invert
, unsigned inputPosition
)
1515 m_bodyDisjunction
->terms
.append(ByteTerm::WordBoundary(invert
, inputPosition
));
1518 void atomPatternCharacter(UChar ch
, unsigned inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1520 if (m_pattern
.m_ignoreCase
) {
1521 UChar lo
= Unicode::toLower(ch
);
1522 UChar hi
= Unicode::toUpper(ch
);
1525 m_bodyDisjunction
->terms
.append(ByteTerm(lo
, hi
, inputPosition
, frameLocation
, quantityCount
, quantityType
));
1530 m_bodyDisjunction
->terms
.append(ByteTerm(ch
, inputPosition
, frameLocation
, quantityCount
, quantityType
));
1533 void atomCharacterClass(CharacterClass
* characterClass
, bool invert
, unsigned inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1535 m_bodyDisjunction
->terms
.append(ByteTerm(characterClass
, invert
, inputPosition
));
1537 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].atom
.quantityCount
= quantityCount
.unsafeGet();
1538 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].atom
.quantityType
= quantityType
;
1539 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1542 void atomBackReference(unsigned subpatternId
, unsigned inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1544 ASSERT(subpatternId
);
1546 m_bodyDisjunction
->terms
.append(ByteTerm::BackReference(subpatternId
, inputPosition
));
1548 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].atom
.quantityCount
= quantityCount
.unsafeGet();
1549 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].atom
.quantityType
= quantityType
;
1550 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1553 void atomParenthesesOnceBegin(unsigned subpatternId
, bool capture
, unsigned inputPosition
, unsigned frameLocation
, unsigned alternativeFrameLocation
)
1555 int beginTerm
= m_bodyDisjunction
->terms
.size();
1557 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin
, subpatternId
, capture
, false, inputPosition
));
1558 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1559 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeBegin());
1560 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= alternativeFrameLocation
;
1562 m_parenthesesStack
.append(ParenthesesStackEntry(beginTerm
, m_currentAlternativeIndex
));
1563 m_currentAlternativeIndex
= beginTerm
+ 1;
1566 void atomParenthesesTerminalBegin(unsigned subpatternId
, bool capture
, unsigned inputPosition
, unsigned frameLocation
, unsigned alternativeFrameLocation
)
1568 int beginTerm
= m_bodyDisjunction
->terms
.size();
1570 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin
, subpatternId
, capture
, false, inputPosition
));
1571 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1572 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeBegin());
1573 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= alternativeFrameLocation
;
1575 m_parenthesesStack
.append(ParenthesesStackEntry(beginTerm
, m_currentAlternativeIndex
));
1576 m_currentAlternativeIndex
= beginTerm
+ 1;
1579 void atomParenthesesSubpatternBegin(unsigned subpatternId
, bool capture
, unsigned inputPosition
, unsigned frameLocation
, unsigned alternativeFrameLocation
)
1581 // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1582 // then fix this up at the end! - simplifying this should make it much clearer.
1583 // https://bugs.webkit.org/show_bug.cgi?id=50136
1585 int beginTerm
= m_bodyDisjunction
->terms
.size();
1587 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin
, subpatternId
, capture
, false, inputPosition
));
1588 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1589 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeBegin());
1590 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= alternativeFrameLocation
;
1592 m_parenthesesStack
.append(ParenthesesStackEntry(beginTerm
, m_currentAlternativeIndex
));
1593 m_currentAlternativeIndex
= beginTerm
+ 1;
1596 void atomParentheticalAssertionBegin(unsigned subpatternId
, bool invert
, unsigned frameLocation
, unsigned alternativeFrameLocation
)
1598 int beginTerm
= m_bodyDisjunction
->terms
.size();
1600 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin
, subpatternId
, false, invert
, 0));
1601 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1602 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeBegin());
1603 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= alternativeFrameLocation
;
1605 m_parenthesesStack
.append(ParenthesesStackEntry(beginTerm
, m_currentAlternativeIndex
));
1606 m_currentAlternativeIndex
= beginTerm
+ 1;
1609 void atomParentheticalAssertionEnd(unsigned inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1611 unsigned beginTerm
= popParenthesesStack();
1612 closeAlternative(beginTerm
+ 1);
1613 unsigned endTerm
= m_bodyDisjunction
->terms
.size();
1615 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeParentheticalAssertionBegin
);
1617 bool invert
= m_bodyDisjunction
->terms
[beginTerm
].invert();
1618 unsigned subpatternId
= m_bodyDisjunction
->terms
[beginTerm
].atom
.subpatternId
;
1620 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd
, subpatternId
, false, invert
, inputPosition
));
1621 m_bodyDisjunction
->terms
[beginTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1622 m_bodyDisjunction
->terms
[endTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1623 m_bodyDisjunction
->terms
[endTerm
].frameLocation
= frameLocation
;
1625 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1626 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityType
= quantityType
;
1627 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1628 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityType
= quantityType
;
1631 void assertionDotStarEnclosure(bool bolAnchored
, bool eolAnchored
)
1633 m_bodyDisjunction
->terms
.append(ByteTerm::DotStarEnclosure(bolAnchored
, eolAnchored
));
1636 unsigned popParenthesesStack()
1638 ASSERT(m_parenthesesStack
.size());
1639 int stackEnd
= m_parenthesesStack
.size() - 1;
1640 unsigned beginTerm
= m_parenthesesStack
[stackEnd
].beginTerm
;
1641 m_currentAlternativeIndex
= m_parenthesesStack
[stackEnd
].savedAlternativeIndex
;
1642 m_parenthesesStack
.shrink(stackEnd
);
1644 ASSERT(beginTerm
< m_bodyDisjunction
->terms
.size());
1645 ASSERT(m_currentAlternativeIndex
< m_bodyDisjunction
->terms
.size());
1651 void dumpDisjunction(ByteDisjunction
* disjunction
)
1653 dataLog("ByteDisjunction(%p):\n\t", disjunction
);
1654 for (unsigned i
= 0; i
< disjunction
->terms
.size(); ++i
)
1655 dataLog("{ %d } ", disjunction
->terms
[i
].type
);
1660 void closeAlternative(int beginTerm
)
1662 int origBeginTerm
= beginTerm
;
1663 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeAlternativeBegin
);
1664 int endIndex
= m_bodyDisjunction
->terms
.size();
1666 unsigned frameLocation
= m_bodyDisjunction
->terms
[beginTerm
].frameLocation
;
1668 if (!m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
)
1669 m_bodyDisjunction
->terms
.remove(beginTerm
);
1671 while (m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
) {
1672 beginTerm
+= m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
;
1673 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeAlternativeDisjunction
);
1674 m_bodyDisjunction
->terms
[beginTerm
].alternative
.end
= endIndex
- beginTerm
;
1675 m_bodyDisjunction
->terms
[beginTerm
].frameLocation
= frameLocation
;
1678 m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
= origBeginTerm
- beginTerm
;
1680 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeEnd());
1681 m_bodyDisjunction
->terms
[endIndex
].frameLocation
= frameLocation
;
1685 void closeBodyAlternative()
1688 int origBeginTerm
= 0;
1689 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeBodyAlternativeBegin
);
1690 int endIndex
= m_bodyDisjunction
->terms
.size();
1692 unsigned frameLocation
= m_bodyDisjunction
->terms
[beginTerm
].frameLocation
;
1694 while (m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
) {
1695 beginTerm
+= m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
;
1696 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeBodyAlternativeDisjunction
);
1697 m_bodyDisjunction
->terms
[beginTerm
].alternative
.end
= endIndex
- beginTerm
;
1698 m_bodyDisjunction
->terms
[beginTerm
].frameLocation
= frameLocation
;
1701 m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
= origBeginTerm
- beginTerm
;
1703 m_bodyDisjunction
->terms
.append(ByteTerm::BodyAlternativeEnd());
1704 m_bodyDisjunction
->terms
[endIndex
].frameLocation
= frameLocation
;
1707 void atomParenthesesSubpatternEnd(unsigned lastSubpatternId
, int inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
, unsigned callFrameSize
= 0)
1709 unsigned beginTerm
= popParenthesesStack();
1710 closeAlternative(beginTerm
+ 1);
1711 unsigned endTerm
= m_bodyDisjunction
->terms
.size();
1713 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
1715 ByteTerm
& parenthesesBegin
= m_bodyDisjunction
->terms
[beginTerm
];
1717 bool capture
= parenthesesBegin
.capture();
1718 unsigned subpatternId
= parenthesesBegin
.atom
.subpatternId
;
1720 unsigned numSubpatterns
= lastSubpatternId
- subpatternId
+ 1;
1721 ByteDisjunction
* parenthesesDisjunction
= new ByteDisjunction(numSubpatterns
, callFrameSize
);
1723 parenthesesDisjunction
->terms
.append(ByteTerm::SubpatternBegin());
1724 for (unsigned termInParentheses
= beginTerm
+ 1; termInParentheses
< endTerm
; ++termInParentheses
)
1725 parenthesesDisjunction
->terms
.append(m_bodyDisjunction
->terms
[termInParentheses
]);
1726 parenthesesDisjunction
->terms
.append(ByteTerm::SubpatternEnd());
1728 m_bodyDisjunction
->terms
.shrink(beginTerm
);
1730 m_allParenthesesInfo
.append(parenthesesDisjunction
);
1731 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern
, subpatternId
, parenthesesDisjunction
, capture
, inputPosition
));
1733 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1734 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityType
= quantityType
;
1735 m_bodyDisjunction
->terms
[beginTerm
].frameLocation
= frameLocation
;
1738 void atomParenthesesOnceEnd(int inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1740 unsigned beginTerm
= popParenthesesStack();
1741 closeAlternative(beginTerm
+ 1);
1742 unsigned endTerm
= m_bodyDisjunction
->terms
.size();
1744 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
1746 bool capture
= m_bodyDisjunction
->terms
[beginTerm
].capture();
1747 unsigned subpatternId
= m_bodyDisjunction
->terms
[beginTerm
].atom
.subpatternId
;
1749 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd
, subpatternId
, capture
, false, inputPosition
));
1750 m_bodyDisjunction
->terms
[beginTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1751 m_bodyDisjunction
->terms
[endTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1752 m_bodyDisjunction
->terms
[endTerm
].frameLocation
= frameLocation
;
1754 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1755 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityType
= quantityType
;
1756 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1757 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityType
= quantityType
;
1760 void atomParenthesesTerminalEnd(int inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1762 unsigned beginTerm
= popParenthesesStack();
1763 closeAlternative(beginTerm
+ 1);
1764 unsigned endTerm
= m_bodyDisjunction
->terms
.size();
1766 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeParenthesesSubpatternTerminalBegin
);
1768 bool capture
= m_bodyDisjunction
->terms
[beginTerm
].capture();
1769 unsigned subpatternId
= m_bodyDisjunction
->terms
[beginTerm
].atom
.subpatternId
;
1771 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd
, subpatternId
, capture
, false, inputPosition
));
1772 m_bodyDisjunction
->terms
[beginTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1773 m_bodyDisjunction
->terms
[endTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1774 m_bodyDisjunction
->terms
[endTerm
].frameLocation
= frameLocation
;
1776 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1777 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityType
= quantityType
;
1778 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1779 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityType
= quantityType
;
1782 void regexBegin(unsigned numSubpatterns
, unsigned callFrameSize
, bool onceThrough
)
1784 m_bodyDisjunction
= adoptPtr(new ByteDisjunction(numSubpatterns
, callFrameSize
));
1785 m_bodyDisjunction
->terms
.append(ByteTerm::BodyAlternativeBegin(onceThrough
));
1786 m_bodyDisjunction
->terms
[0].frameLocation
= 0;
1787 m_currentAlternativeIndex
= 0;
1792 closeBodyAlternative();
1795 void alternativeBodyDisjunction(bool onceThrough
)
1797 int newAlternativeIndex
= m_bodyDisjunction
->terms
.size();
1798 m_bodyDisjunction
->terms
[m_currentAlternativeIndex
].alternative
.next
= newAlternativeIndex
- m_currentAlternativeIndex
;
1799 m_bodyDisjunction
->terms
.append(ByteTerm::BodyAlternativeDisjunction(onceThrough
));
1801 m_currentAlternativeIndex
= newAlternativeIndex
;
1804 void alternativeDisjunction()
1806 int newAlternativeIndex
= m_bodyDisjunction
->terms
.size();
1807 m_bodyDisjunction
->terms
[m_currentAlternativeIndex
].alternative
.next
= newAlternativeIndex
- m_currentAlternativeIndex
;
1808 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeDisjunction());
1810 m_currentAlternativeIndex
= newAlternativeIndex
;
1813 void emitDisjunction(PatternDisjunction
* disjunction
, unsigned inputCountAlreadyChecked
= 0, unsigned parenthesesInputCountAlreadyChecked
= 0)
1815 for (unsigned alt
= 0; alt
< disjunction
->m_alternatives
.size(); ++alt
) {
1816 unsigned currentCountAlreadyChecked
= inputCountAlreadyChecked
;
1818 PatternAlternative
* alternative
= disjunction
->m_alternatives
[alt
];
1821 if (disjunction
== m_pattern
.m_body
)
1822 alternativeBodyDisjunction(alternative
->onceThrough());
1824 alternativeDisjunction();
1827 unsigned minimumSize
= alternative
->m_minimumSize
;
1828 ASSERT(minimumSize
>= parenthesesInputCountAlreadyChecked
);
1829 unsigned countToCheck
= minimumSize
- parenthesesInputCountAlreadyChecked
;
1832 checkInput(countToCheck
);
1833 currentCountAlreadyChecked
+= countToCheck
;
1836 for (unsigned i
= 0; i
< alternative
->m_terms
.size(); ++i
) {
1837 PatternTerm
& term
= alternative
->m_terms
[i
];
1839 switch (term
.type
) {
1840 case PatternTerm::TypeAssertionBOL
:
1841 assertionBOL(currentCountAlreadyChecked
- term
.inputPosition
);
1844 case PatternTerm::TypeAssertionEOL
:
1845 assertionEOL(currentCountAlreadyChecked
- term
.inputPosition
);
1848 case PatternTerm::TypeAssertionWordBoundary
:
1849 assertionWordBoundary(term
.invert(), currentCountAlreadyChecked
- term
.inputPosition
);
1852 case PatternTerm::TypePatternCharacter
:
1853 atomPatternCharacter(term
.patternCharacter
, currentCountAlreadyChecked
- term
.inputPosition
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1856 case PatternTerm::TypeCharacterClass
:
1857 atomCharacterClass(term
.characterClass
, term
.invert(), currentCountAlreadyChecked
- term
.inputPosition
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1860 case PatternTerm::TypeBackReference
:
1861 atomBackReference(term
.backReferenceSubpatternId
, currentCountAlreadyChecked
- term
.inputPosition
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1864 case PatternTerm::TypeForwardReference
:
1867 case PatternTerm::TypeParenthesesSubpattern
: {
1868 unsigned disjunctionAlreadyCheckedCount
= 0;
1869 if (term
.quantityCount
== 1 && !term
.parentheses
.isCopy
) {
1870 unsigned alternativeFrameLocation
= term
.frameLocation
;
1871 // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
1872 if (term
.quantityType
== QuantifierFixedCount
)
1873 disjunctionAlreadyCheckedCount
= term
.parentheses
.disjunction
->m_minimumSize
;
1875 alternativeFrameLocation
+= YarrStackSpaceForBackTrackInfoParenthesesOnce
;
1876 unsigned delegateEndInputOffset
= term
.inputPosition
- currentCountAlreadyChecked
;
1877 atomParenthesesOnceBegin(term
.parentheses
.subpatternId
, term
.capture(), disjunctionAlreadyCheckedCount
- delegateEndInputOffset
, term
.frameLocation
, alternativeFrameLocation
);
1878 emitDisjunction(term
.parentheses
.disjunction
, currentCountAlreadyChecked
, disjunctionAlreadyCheckedCount
);
1879 atomParenthesesOnceEnd(delegateEndInputOffset
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1880 } else if (term
.parentheses
.isTerminal
) {
1881 unsigned delegateEndInputOffset
= term
.inputPosition
- currentCountAlreadyChecked
;
1882 atomParenthesesTerminalBegin(term
.parentheses
.subpatternId
, term
.capture(), disjunctionAlreadyCheckedCount
- delegateEndInputOffset
, term
.frameLocation
, term
.frameLocation
+ YarrStackSpaceForBackTrackInfoParenthesesOnce
);
1883 emitDisjunction(term
.parentheses
.disjunction
, currentCountAlreadyChecked
, disjunctionAlreadyCheckedCount
);
1884 atomParenthesesTerminalEnd(delegateEndInputOffset
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1886 unsigned delegateEndInputOffset
= term
.inputPosition
- currentCountAlreadyChecked
;
1887 atomParenthesesSubpatternBegin(term
.parentheses
.subpatternId
, term
.capture(), disjunctionAlreadyCheckedCount
- delegateEndInputOffset
, term
.frameLocation
, 0);
1888 emitDisjunction(term
.parentheses
.disjunction
, currentCountAlreadyChecked
, 0);
1889 atomParenthesesSubpatternEnd(term
.parentheses
.lastSubpatternId
, delegateEndInputOffset
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
, term
.parentheses
.disjunction
->m_callFrameSize
);
1894 case PatternTerm::TypeParentheticalAssertion
: {
1895 unsigned alternativeFrameLocation
= term
.frameLocation
+ YarrStackSpaceForBackTrackInfoParentheticalAssertion
;
1897 ASSERT(currentCountAlreadyChecked
>= static_cast<unsigned>(term
.inputPosition
));
1898 unsigned positiveInputOffset
= currentCountAlreadyChecked
- static_cast<unsigned>(term
.inputPosition
);
1899 unsigned uncheckAmount
= 0;
1900 if (positiveInputOffset
> term
.parentheses
.disjunction
->m_minimumSize
) {
1901 uncheckAmount
= positiveInputOffset
- term
.parentheses
.disjunction
->m_minimumSize
;
1902 uncheckInput(uncheckAmount
);
1903 currentCountAlreadyChecked
-= uncheckAmount
;
1906 atomParentheticalAssertionBegin(term
.parentheses
.subpatternId
, term
.invert(), term
.frameLocation
, alternativeFrameLocation
);
1907 emitDisjunction(term
.parentheses
.disjunction
, currentCountAlreadyChecked
, positiveInputOffset
- uncheckAmount
);
1908 atomParentheticalAssertionEnd(0, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1909 if (uncheckAmount
) {
1910 checkInput(uncheckAmount
);
1911 currentCountAlreadyChecked
+= uncheckAmount
;
1916 case PatternTerm::TypeDotStarEnclosure
:
1917 assertionDotStarEnclosure(term
.anchors
.bolAnchor
, term
.anchors
.eolAnchor
);
1925 YarrPattern
& m_pattern
;
1926 OwnPtr
<ByteDisjunction
> m_bodyDisjunction
;
1927 unsigned m_currentAlternativeIndex
;
1928 Vector
<ParenthesesStackEntry
> m_parenthesesStack
;
1929 Vector
<ByteDisjunction
*> m_allParenthesesInfo
;
1932 PassOwnPtr
<BytecodePattern
> byteCompile(YarrPattern
& pattern
, BumpPointerAllocator
* allocator
)
1934 return ByteCompiler(pattern
).compile(allocator
);
1937 unsigned interpret(BytecodePattern
* bytecode
, const UString
& input
, unsigned start
, unsigned* output
)
1940 return Interpreter
<LChar
>(bytecode
, output
, input
.characters8(), input
.length(), start
).interpret();
1941 return Interpreter
<UChar
>(bytecode
, output
, input
.characters16(), input
.length(), start
).interpret();
1944 unsigned interpret(BytecodePattern
* bytecode
, const LChar
* input
, unsigned length
, unsigned start
, unsigned* output
)
1946 return Interpreter
<LChar
>(bytecode
, output
, input
, length
, start
).interpret();
1949 unsigned interpret(BytecodePattern
* bytecode
, const UChar
* input
, unsigned length
, unsigned start
, unsigned* output
)
1951 return Interpreter
<UChar
>(bytecode
, output
, input
, length
, start
).interpret();
1954 // These should be the same for both UChar & LChar.
1955 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoPatternCharacter
) == (YarrStackSpaceForBackTrackInfoPatternCharacter
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter
);
1956 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoCharacterClass
) == (YarrStackSpaceForBackTrackInfoCharacterClass
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass
);
1957 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoBackReference
) == (YarrStackSpaceForBackTrackInfoBackReference
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference
);
1958 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoAlternative
) == (YarrStackSpaceForBackTrackInfoAlternative
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative
);
1959 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoParentheticalAssertion
) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion
);
1960 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoParenthesesOnce
) == (YarrStackSpaceForBackTrackInfoParenthesesOnce
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce
);
1961 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoParentheses
) == (YarrStackSpaceForBackTrackInfoParentheses
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses
);