2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "YarrInterpreter.h"
31 #include "YarrCanonicalizeUCS2.h"
32 #include <wtf/BumpPointerAllocator.h>
33 #include <wtf/DataLog.h>
34 #include <wtf/text/CString.h>
35 #include <wtf/text/WTFString.h>
39 namespace JSC
{ namespace Yarr
{
41 template<typename CharType
>
44 struct ParenthesesDisjunctionContext
;
46 struct BackTrackInfoPatternCharacter
{
47 uintptr_t matchAmount
;
49 struct BackTrackInfoCharacterClass
{
50 uintptr_t matchAmount
;
52 struct BackTrackInfoBackReference
{
53 uintptr_t begin
; // Not really needed for greedy quantifiers.
54 uintptr_t matchAmount
; // Not really needed for fixed quantifiers.
56 struct BackTrackInfoAlternative
{
59 struct BackTrackInfoParentheticalAssertion
{
62 struct BackTrackInfoParenthesesOnce
{
65 struct BackTrackInfoParenthesesTerminal
{
68 struct BackTrackInfoParentheses
{
69 uintptr_t matchAmount
;
70 ParenthesesDisjunctionContext
* lastContext
;
73 static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses
* backTrack
, ParenthesesDisjunctionContext
* context
)
75 context
->next
= backTrack
->lastContext
;
76 backTrack
->lastContext
= context
;
77 ++backTrack
->matchAmount
;
80 static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses
* backTrack
)
82 RELEASE_ASSERT(backTrack
->matchAmount
);
83 RELEASE_ASSERT(backTrack
->lastContext
);
84 backTrack
->lastContext
= backTrack
->lastContext
->next
;
85 --backTrack
->matchAmount
;
88 struct DisjunctionContext
95 void* operator new(size_t, void* where
)
106 DisjunctionContext
* allocDisjunctionContext(ByteDisjunction
* disjunction
)
108 size_t size
= sizeof(DisjunctionContext
) - sizeof(uintptr_t) + disjunction
->m_frameSize
* sizeof(uintptr_t);
109 allocatorPool
= allocatorPool
->ensureCapacity(size
);
110 RELEASE_ASSERT(allocatorPool
);
111 return new (allocatorPool
->alloc(size
)) DisjunctionContext();
114 void freeDisjunctionContext(DisjunctionContext
* context
)
116 allocatorPool
= allocatorPool
->dealloc(context
);
119 struct ParenthesesDisjunctionContext
121 ParenthesesDisjunctionContext(unsigned* output
, ByteTerm
& term
)
124 unsigned firstSubpatternId
= term
.atom
.subpatternId
;
125 unsigned numNestedSubpatterns
= term
.atom
.parenthesesDisjunction
->m_numSubpatterns
;
127 for (unsigned i
= 0; i
< (numNestedSubpatterns
<< 1); ++i
) {
128 subpatternBackup
[i
] = output
[(firstSubpatternId
<< 1) + i
];
129 output
[(firstSubpatternId
<< 1) + i
] = offsetNoMatch
;
132 new (getDisjunctionContext(term
)) DisjunctionContext();
135 void* operator new(size_t, void* where
)
140 void restoreOutput(unsigned* output
, unsigned firstSubpatternId
, unsigned numNestedSubpatterns
)
142 for (unsigned i
= 0; i
< (numNestedSubpatterns
<< 1); ++i
)
143 output
[(firstSubpatternId
<< 1) + i
] = subpatternBackup
[i
];
146 DisjunctionContext
* getDisjunctionContext(ByteTerm
& term
)
148 return reinterpret_cast<DisjunctionContext
*>(&(subpatternBackup
[term
.atom
.parenthesesDisjunction
->m_numSubpatterns
<< 1]));
151 ParenthesesDisjunctionContext
* next
;
152 unsigned subpatternBackup
[1];
155 ParenthesesDisjunctionContext
* allocParenthesesDisjunctionContext(ByteDisjunction
* disjunction
, unsigned* output
, ByteTerm
& term
)
157 size_t size
= sizeof(ParenthesesDisjunctionContext
) - sizeof(unsigned) + (term
.atom
.parenthesesDisjunction
->m_numSubpatterns
<< 1) * sizeof(unsigned) + sizeof(DisjunctionContext
) - sizeof(uintptr_t) + static_cast<size_t>(disjunction
->m_frameSize
) * sizeof(uintptr_t);
158 allocatorPool
= allocatorPool
->ensureCapacity(size
);
159 RELEASE_ASSERT(allocatorPool
);
160 return new (allocatorPool
->alloc(size
)) ParenthesesDisjunctionContext(output
, term
);
163 void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext
* context
)
165 allocatorPool
= allocatorPool
->dealloc(context
);
170 InputStream(const CharType
* input
, unsigned start
, unsigned length
)
182 void rewind(unsigned amount
)
184 ASSERT(pos
>= amount
);
190 ASSERT(pos
< length
);
198 ASSERT(pos
+ 1 < length
);
199 return input
[pos
] | input
[pos
+ 1] << 16;
202 int readChecked(unsigned negativePositionOffest
)
204 RELEASE_ASSERT(pos
>= negativePositionOffest
);
205 unsigned p
= pos
- negativePositionOffest
;
210 int reread(unsigned from
)
212 ASSERT(from
< length
);
218 ASSERT(!(pos
> length
));
220 return input
[pos
- 1];
229 void setPos(unsigned p
)
241 return pos
== length
;
249 bool checkInput(unsigned count
)
251 if (((pos
+ count
) <= length
) && ((pos
+ count
) >= pos
)) {
258 void uncheckInput(unsigned count
)
260 RELEASE_ASSERT(pos
>= count
);
264 bool atStart(unsigned negativePositionOffest
)
266 return pos
== negativePositionOffest
;
269 bool atEnd(unsigned negativePositionOffest
)
271 RELEASE_ASSERT(pos
>= negativePositionOffest
);
272 return (pos
- negativePositionOffest
) == length
;
275 bool isAvailableInput(unsigned offset
)
277 return (((pos
+ offset
) <= length
) && ((pos
+ offset
) >= pos
));
281 const CharType
* input
;
286 bool testCharacterClass(CharacterClass
* characterClass
, int ch
)
289 for (unsigned i
= 0; i
< characterClass
->m_matchesUnicode
.size(); ++i
)
290 if (ch
== characterClass
->m_matchesUnicode
[i
])
292 for (unsigned i
= 0; i
< characterClass
->m_rangesUnicode
.size(); ++i
)
293 if ((ch
>= characterClass
->m_rangesUnicode
[i
].begin
) && (ch
<= characterClass
->m_rangesUnicode
[i
].end
))
296 for (unsigned i
= 0; i
< characterClass
->m_matches
.size(); ++i
)
297 if (ch
== characterClass
->m_matches
[i
])
299 for (unsigned i
= 0; i
< characterClass
->m_ranges
.size(); ++i
)
300 if ((ch
>= characterClass
->m_ranges
[i
].begin
) && (ch
<= characterClass
->m_ranges
[i
].end
))
307 bool checkCharacter(int testChar
, unsigned negativeInputOffset
)
309 return testChar
== input
.readChecked(negativeInputOffset
);
312 bool checkCasedCharacter(int loChar
, int hiChar
, unsigned negativeInputOffset
)
314 int ch
= input
.readChecked(negativeInputOffset
);
315 return (loChar
== ch
) || (hiChar
== ch
);
318 bool checkCharacterClass(CharacterClass
* characterClass
, bool invert
, unsigned negativeInputOffset
)
320 bool match
= testCharacterClass(characterClass
, input
.readChecked(negativeInputOffset
));
321 return invert
? !match
: match
;
324 bool tryConsumeBackReference(int matchBegin
, int matchEnd
, unsigned negativeInputOffset
)
326 unsigned matchSize
= (unsigned)(matchEnd
- matchBegin
);
328 if (!input
.checkInput(matchSize
))
331 if (pattern
->m_ignoreCase
) {
332 for (unsigned i
= 0; i
< matchSize
; ++i
) {
333 int oldCh
= input
.reread(matchBegin
+ i
);
334 int ch
= input
.readChecked(negativeInputOffset
+ matchSize
- i
);
339 // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that
340 // unicode values are never allowed to match against ascii ones.
341 if (isASCII(oldCh
) || isASCII(ch
)) {
342 if (toASCIIUpper(oldCh
) == toASCIIUpper(ch
))
344 } else if (areCanonicallyEquivalent(oldCh
, ch
))
347 input
.uncheckInput(matchSize
);
351 for (unsigned i
= 0; i
< matchSize
; ++i
) {
352 if (!checkCharacter(input
.reread(matchBegin
+ i
), negativeInputOffset
+ matchSize
- i
)) {
353 input
.uncheckInput(matchSize
);
362 bool matchAssertionBOL(ByteTerm
& term
)
364 return (input
.atStart(term
.inputPosition
)) || (pattern
->m_multiline
&& testCharacterClass(pattern
->newlineCharacterClass
, input
.readChecked(term
.inputPosition
+ 1)));
367 bool matchAssertionEOL(ByteTerm
& term
)
369 if (term
.inputPosition
)
370 return (input
.atEnd(term
.inputPosition
)) || (pattern
->m_multiline
&& testCharacterClass(pattern
->newlineCharacterClass
, input
.readChecked(term
.inputPosition
)));
372 return (input
.atEnd()) || (pattern
->m_multiline
&& testCharacterClass(pattern
->newlineCharacterClass
, input
.read()));
375 bool matchAssertionWordBoundary(ByteTerm
& term
)
377 bool prevIsWordchar
= !input
.atStart(term
.inputPosition
) && testCharacterClass(pattern
->wordcharCharacterClass
, input
.readChecked(term
.inputPosition
+ 1));
379 if (term
.inputPosition
)
380 readIsWordchar
= !input
.atEnd(term
.inputPosition
) && testCharacterClass(pattern
->wordcharCharacterClass
, input
.readChecked(term
.inputPosition
));
382 readIsWordchar
= !input
.atEnd() && testCharacterClass(pattern
->wordcharCharacterClass
, input
.read());
384 bool wordBoundary
= prevIsWordchar
!= readIsWordchar
;
385 return term
.invert() ? !wordBoundary
: wordBoundary
;
388 bool backtrackPatternCharacter(ByteTerm
& term
, DisjunctionContext
* context
)
390 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ term
.frameLocation
);
392 switch (term
.atom
.quantityType
) {
393 case QuantifierFixedCount
:
396 case QuantifierGreedy
:
397 if (backTrack
->matchAmount
) {
398 --backTrack
->matchAmount
;
399 input
.uncheckInput(1);
404 case QuantifierNonGreedy
:
405 if ((backTrack
->matchAmount
< term
.atom
.quantityCount
) && input
.checkInput(1)) {
406 ++backTrack
->matchAmount
;
407 if (checkCharacter(term
.atom
.patternCharacter
, term
.inputPosition
+ 1))
410 input
.uncheckInput(backTrack
->matchAmount
);
417 bool backtrackPatternCasedCharacter(ByteTerm
& term
, DisjunctionContext
* context
)
419 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ term
.frameLocation
);
421 switch (term
.atom
.quantityType
) {
422 case QuantifierFixedCount
:
425 case QuantifierGreedy
:
426 if (backTrack
->matchAmount
) {
427 --backTrack
->matchAmount
;
428 input
.uncheckInput(1);
433 case QuantifierNonGreedy
:
434 if ((backTrack
->matchAmount
< term
.atom
.quantityCount
) && input
.checkInput(1)) {
435 ++backTrack
->matchAmount
;
436 if (checkCasedCharacter(term
.atom
.casedCharacter
.lo
, term
.atom
.casedCharacter
.hi
, term
.inputPosition
+ 1))
439 input
.uncheckInput(backTrack
->matchAmount
);
446 bool matchCharacterClass(ByteTerm
& term
, DisjunctionContext
* context
)
448 ASSERT(term
.type
== ByteTerm::TypeCharacterClass
);
449 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ term
.frameLocation
);
451 switch (term
.atom
.quantityType
) {
452 case QuantifierFixedCount
: {
453 for (unsigned matchAmount
= 0; matchAmount
< term
.atom
.quantityCount
; ++matchAmount
) {
454 if (!checkCharacterClass(term
.atom
.characterClass
, term
.invert(), term
.inputPosition
- matchAmount
))
460 case QuantifierGreedy
: {
461 unsigned matchAmount
= 0;
462 while ((matchAmount
< term
.atom
.quantityCount
) && input
.checkInput(1)) {
463 if (!checkCharacterClass(term
.atom
.characterClass
, term
.invert(), term
.inputPosition
+ 1)) {
464 input
.uncheckInput(1);
469 backTrack
->matchAmount
= matchAmount
;
474 case QuantifierNonGreedy
:
475 backTrack
->matchAmount
= 0;
479 RELEASE_ASSERT_NOT_REACHED();
483 bool backtrackCharacterClass(ByteTerm
& term
, DisjunctionContext
* context
)
485 ASSERT(term
.type
== ByteTerm::TypeCharacterClass
);
486 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ term
.frameLocation
);
488 switch (term
.atom
.quantityType
) {
489 case QuantifierFixedCount
:
492 case QuantifierGreedy
:
493 if (backTrack
->matchAmount
) {
494 --backTrack
->matchAmount
;
495 input
.uncheckInput(1);
500 case QuantifierNonGreedy
:
501 if ((backTrack
->matchAmount
< term
.atom
.quantityCount
) && input
.checkInput(1)) {
502 ++backTrack
->matchAmount
;
503 if (checkCharacterClass(term
.atom
.characterClass
, term
.invert(), term
.inputPosition
+ 1))
506 input
.uncheckInput(backTrack
->matchAmount
);
513 bool matchBackReference(ByteTerm
& term
, DisjunctionContext
* context
)
515 ASSERT(term
.type
== ByteTerm::TypeBackReference
);
516 BackTrackInfoBackReference
* backTrack
= reinterpret_cast<BackTrackInfoBackReference
*>(context
->frame
+ term
.frameLocation
);
518 unsigned matchBegin
= output
[(term
.atom
.subpatternId
<< 1)];
519 unsigned matchEnd
= output
[(term
.atom
.subpatternId
<< 1) + 1];
521 // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
522 // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
524 if (matchEnd
== offsetNoMatch
)
527 if (matchBegin
== offsetNoMatch
)
530 ASSERT(matchBegin
<= matchEnd
);
532 if (matchBegin
== matchEnd
)
535 switch (term
.atom
.quantityType
) {
536 case QuantifierFixedCount
: {
537 backTrack
->begin
= input
.getPos();
538 for (unsigned matchAmount
= 0; matchAmount
< term
.atom
.quantityCount
; ++matchAmount
) {
539 if (!tryConsumeBackReference(matchBegin
, matchEnd
, term
.inputPosition
)) {
540 input
.setPos(backTrack
->begin
);
547 case QuantifierGreedy
: {
548 unsigned matchAmount
= 0;
549 while ((matchAmount
< term
.atom
.quantityCount
) && tryConsumeBackReference(matchBegin
, matchEnd
, term
.inputPosition
))
551 backTrack
->matchAmount
= matchAmount
;
555 case QuantifierNonGreedy
:
556 backTrack
->begin
= input
.getPos();
557 backTrack
->matchAmount
= 0;
561 RELEASE_ASSERT_NOT_REACHED();
565 bool backtrackBackReference(ByteTerm
& term
, DisjunctionContext
* context
)
567 ASSERT(term
.type
== ByteTerm::TypeBackReference
);
568 BackTrackInfoBackReference
* backTrack
= reinterpret_cast<BackTrackInfoBackReference
*>(context
->frame
+ term
.frameLocation
);
570 unsigned matchBegin
= output
[(term
.atom
.subpatternId
<< 1)];
571 unsigned matchEnd
= output
[(term
.atom
.subpatternId
<< 1) + 1];
573 if (matchBegin
== offsetNoMatch
)
576 ASSERT(matchBegin
<= matchEnd
);
578 if (matchBegin
== matchEnd
)
581 switch (term
.atom
.quantityType
) {
582 case QuantifierFixedCount
:
583 // for quantityCount == 1, could rewind.
584 input
.setPos(backTrack
->begin
);
587 case QuantifierGreedy
:
588 if (backTrack
->matchAmount
) {
589 --backTrack
->matchAmount
;
590 input
.rewind(matchEnd
- matchBegin
);
595 case QuantifierNonGreedy
:
596 if ((backTrack
->matchAmount
< term
.atom
.quantityCount
) && tryConsumeBackReference(matchBegin
, matchEnd
, term
.inputPosition
)) {
597 ++backTrack
->matchAmount
;
600 input
.setPos(backTrack
->begin
);
607 void recordParenthesesMatch(ByteTerm
& term
, ParenthesesDisjunctionContext
* context
)
609 if (term
.capture()) {
610 unsigned subpatternId
= term
.atom
.subpatternId
;
611 output
[(subpatternId
<< 1)] = context
->getDisjunctionContext(term
)->matchBegin
+ term
.inputPosition
;
612 output
[(subpatternId
<< 1) + 1] = context
->getDisjunctionContext(term
)->matchEnd
+ term
.inputPosition
;
615 void resetMatches(ByteTerm
& term
, ParenthesesDisjunctionContext
* context
)
617 unsigned firstSubpatternId
= term
.atom
.subpatternId
;
618 unsigned count
= term
.atom
.parenthesesDisjunction
->m_numSubpatterns
;
619 context
->restoreOutput(output
, firstSubpatternId
, count
);
621 JSRegExpResult
parenthesesDoBacktrack(ByteTerm
& term
, BackTrackInfoParentheses
* backTrack
)
623 while (backTrack
->matchAmount
) {
624 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
626 JSRegExpResult result
= matchDisjunction(term
.atom
.parenthesesDisjunction
, context
->getDisjunctionContext(term
), true);
627 if (result
== JSRegExpMatch
)
628 return JSRegExpMatch
;
630 resetMatches(term
, context
);
631 popParenthesesDisjunctionContext(backTrack
);
632 freeParenthesesDisjunctionContext(context
);
634 if (result
!= JSRegExpNoMatch
)
638 return JSRegExpNoMatch
;
641 bool matchParenthesesOnceBegin(ByteTerm
& term
, DisjunctionContext
* context
)
643 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
644 ASSERT(term
.atom
.quantityCount
== 1);
646 BackTrackInfoParenthesesOnce
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesOnce
*>(context
->frame
+ term
.frameLocation
);
648 switch (term
.atom
.quantityType
) {
649 case QuantifierGreedy
: {
650 // set this speculatively; if we get to the parens end this will be true.
651 backTrack
->begin
= input
.getPos();
654 case QuantifierNonGreedy
: {
655 backTrack
->begin
= notFound
;
656 context
->term
+= term
.atom
.parenthesesWidth
;
659 case QuantifierFixedCount
:
663 if (term
.capture()) {
664 unsigned subpatternId
= term
.atom
.subpatternId
;
665 output
[(subpatternId
<< 1)] = input
.getPos() - term
.inputPosition
;
671 bool matchParenthesesOnceEnd(ByteTerm
& term
, DisjunctionContext
* context
)
673 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternOnceEnd
);
674 ASSERT(term
.atom
.quantityCount
== 1);
676 if (term
.capture()) {
677 unsigned subpatternId
= term
.atom
.subpatternId
;
678 output
[(subpatternId
<< 1) + 1] = input
.getPos() + term
.inputPosition
;
681 if (term
.atom
.quantityType
== QuantifierFixedCount
)
684 BackTrackInfoParenthesesOnce
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesOnce
*>(context
->frame
+ term
.frameLocation
);
685 return backTrack
->begin
!= input
.getPos();
688 bool backtrackParenthesesOnceBegin(ByteTerm
& term
, DisjunctionContext
* context
)
690 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
691 ASSERT(term
.atom
.quantityCount
== 1);
693 BackTrackInfoParenthesesOnce
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesOnce
*>(context
->frame
+ term
.frameLocation
);
695 if (term
.capture()) {
696 unsigned subpatternId
= term
.atom
.subpatternId
;
697 output
[(subpatternId
<< 1)] = offsetNoMatch
;
698 output
[(subpatternId
<< 1) + 1] = offsetNoMatch
;
701 switch (term
.atom
.quantityType
) {
702 case QuantifierGreedy
:
703 // if we backtrack to this point, there is another chance - try matching nothing.
704 ASSERT(backTrack
->begin
!= notFound
);
705 backTrack
->begin
= notFound
;
706 context
->term
+= term
.atom
.parenthesesWidth
;
708 case QuantifierNonGreedy
:
709 ASSERT(backTrack
->begin
!= notFound
);
711 case QuantifierFixedCount
:
718 bool backtrackParenthesesOnceEnd(ByteTerm
& term
, DisjunctionContext
* context
)
720 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternOnceEnd
);
721 ASSERT(term
.atom
.quantityCount
== 1);
723 BackTrackInfoParenthesesOnce
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesOnce
*>(context
->frame
+ term
.frameLocation
);
725 switch (term
.atom
.quantityType
) {
726 case QuantifierGreedy
:
727 if (backTrack
->begin
== notFound
) {
728 context
->term
-= term
.atom
.parenthesesWidth
;
732 case QuantifierNonGreedy
:
733 if (backTrack
->begin
== notFound
) {
734 backTrack
->begin
= input
.getPos();
735 if (term
.capture()) {
736 // Technically this access to inputPosition should be accessing the begin term's
737 // inputPosition, but for repeats other than fixed these values should be
738 // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
739 ASSERT((&term
- term
.atom
.parenthesesWidth
)->type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
740 ASSERT((&term
- term
.atom
.parenthesesWidth
)->inputPosition
== term
.inputPosition
);
741 unsigned subpatternId
= term
.atom
.subpatternId
;
742 output
[subpatternId
<< 1] = input
.getPos() + term
.inputPosition
;
744 context
->term
-= term
.atom
.parenthesesWidth
;
748 case QuantifierFixedCount
:
755 bool matchParenthesesTerminalBegin(ByteTerm
& term
, DisjunctionContext
* context
)
757 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternTerminalBegin
);
758 ASSERT(term
.atom
.quantityType
== QuantifierGreedy
);
759 ASSERT(term
.atom
.quantityCount
== quantifyInfinite
);
760 ASSERT(!term
.capture());
762 BackTrackInfoParenthesesTerminal
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesTerminal
*>(context
->frame
+ term
.frameLocation
);
763 backTrack
->begin
= input
.getPos();
767 bool matchParenthesesTerminalEnd(ByteTerm
& term
, DisjunctionContext
* context
)
769 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternTerminalEnd
);
771 BackTrackInfoParenthesesTerminal
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesTerminal
*>(context
->frame
+ term
.frameLocation
);
772 // Empty match is a failed match.
773 if (backTrack
->begin
== input
.getPos())
776 // Successful match! Okay, what's next? - loop around and try to match moar!
777 context
->term
-= (term
.atom
.parenthesesWidth
+ 1);
781 bool backtrackParenthesesTerminalBegin(ByteTerm
& term
, DisjunctionContext
* context
)
783 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternTerminalBegin
);
784 ASSERT(term
.atom
.quantityType
== QuantifierGreedy
);
785 ASSERT(term
.atom
.quantityCount
== quantifyInfinite
);
786 ASSERT(!term
.capture());
788 // If we backtrack to this point, we have failed to match this iteration of the parens.
789 // Since this is greedy / zero minimum a failed is also accepted as a match!
790 context
->term
+= term
.atom
.parenthesesWidth
;
794 bool backtrackParenthesesTerminalEnd(ByteTerm
&, DisjunctionContext
*)
796 // 'Terminal' parentheses are at the end of the regex, and as such a match past end
797 // should always be returned as a successful match - we should never backtrack to here.
798 RELEASE_ASSERT_NOT_REACHED();
802 bool matchParentheticalAssertionBegin(ByteTerm
& term
, DisjunctionContext
* context
)
804 ASSERT(term
.type
== ByteTerm::TypeParentheticalAssertionBegin
);
805 ASSERT(term
.atom
.quantityCount
== 1);
807 BackTrackInfoParentheticalAssertion
* backTrack
= reinterpret_cast<BackTrackInfoParentheticalAssertion
*>(context
->frame
+ term
.frameLocation
);
809 backTrack
->begin
= input
.getPos();
813 bool matchParentheticalAssertionEnd(ByteTerm
& term
, DisjunctionContext
* context
)
815 ASSERT(term
.type
== ByteTerm::TypeParentheticalAssertionEnd
);
816 ASSERT(term
.atom
.quantityCount
== 1);
818 BackTrackInfoParentheticalAssertion
* backTrack
= reinterpret_cast<BackTrackInfoParentheticalAssertion
*>(context
->frame
+ term
.frameLocation
);
820 input
.setPos(backTrack
->begin
);
822 // We've reached the end of the parens; if they are inverted, this is failure.
824 context
->term
-= term
.atom
.parenthesesWidth
;
831 bool backtrackParentheticalAssertionBegin(ByteTerm
& term
, DisjunctionContext
* context
)
833 ASSERT(term
.type
== ByteTerm::TypeParentheticalAssertionBegin
);
834 ASSERT(term
.atom
.quantityCount
== 1);
836 // We've failed to match parens; if they are inverted, this is win!
838 context
->term
+= term
.atom
.parenthesesWidth
;
845 bool backtrackParentheticalAssertionEnd(ByteTerm
& term
, DisjunctionContext
* context
)
847 ASSERT(term
.type
== ByteTerm::TypeParentheticalAssertionEnd
);
848 ASSERT(term
.atom
.quantityCount
== 1);
850 BackTrackInfoParentheticalAssertion
* backTrack
= reinterpret_cast<BackTrackInfoParentheticalAssertion
*>(context
->frame
+ term
.frameLocation
);
852 input
.setPos(backTrack
->begin
);
854 context
->term
-= term
.atom
.parenthesesWidth
;
858 JSRegExpResult
matchParentheses(ByteTerm
& term
, DisjunctionContext
* context
)
860 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpattern
);
862 BackTrackInfoParentheses
* backTrack
= reinterpret_cast<BackTrackInfoParentheses
*>(context
->frame
+ term
.frameLocation
);
863 ByteDisjunction
* disjunctionBody
= term
.atom
.parenthesesDisjunction
;
865 backTrack
->matchAmount
= 0;
866 backTrack
->lastContext
= 0;
868 switch (term
.atom
.quantityType
) {
869 case QuantifierFixedCount
: {
870 // While we haven't yet reached our fixed limit,
871 while (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
872 // Try to do a match, and it it succeeds, add it to the list.
873 ParenthesesDisjunctionContext
* context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
874 JSRegExpResult result
= matchDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
875 if (result
== JSRegExpMatch
)
876 appendParenthesesDisjunctionContext(backTrack
, context
);
878 // The match failed; try to find an alternate point to carry on from.
879 resetMatches(term
, context
);
880 freeParenthesesDisjunctionContext(context
);
882 if (result
!= JSRegExpNoMatch
)
884 JSRegExpResult backtrackResult
= parenthesesDoBacktrack(term
, backTrack
);
885 if (backtrackResult
!= JSRegExpMatch
)
886 return backtrackResult
;
890 ASSERT(backTrack
->matchAmount
== term
.atom
.quantityCount
);
891 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
892 recordParenthesesMatch(term
, context
);
893 return JSRegExpMatch
;
896 case QuantifierGreedy
: {
897 while (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
898 ParenthesesDisjunctionContext
* context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
899 JSRegExpResult result
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
900 if (result
== JSRegExpMatch
)
901 appendParenthesesDisjunctionContext(backTrack
, context
);
903 resetMatches(term
, context
);
904 freeParenthesesDisjunctionContext(context
);
906 if (result
!= JSRegExpNoMatch
)
913 if (backTrack
->matchAmount
) {
914 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
915 recordParenthesesMatch(term
, context
);
917 return JSRegExpMatch
;
920 case QuantifierNonGreedy
:
921 return JSRegExpMatch
;
924 RELEASE_ASSERT_NOT_REACHED();
925 return JSRegExpErrorNoMatch
;
928 // Rules for backtracking differ depending on whether this is greedy or non-greedy.
930 // Greedy matches never should try just adding more - you should already have done
931 // the 'more' cases. Always backtrack, at least a leetle bit. However cases where
932 // you backtrack an item off the list needs checking, since we'll never have matched
933 // the one less case. Tracking forwards, still add as much as possible.
935 // Non-greedy, we've already done the one less case, so don't match on popping.
936 // We haven't done the one more case, so always try to add that.
938 JSRegExpResult
backtrackParentheses(ByteTerm
& term
, DisjunctionContext
* context
)
940 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpattern
);
942 BackTrackInfoParentheses
* backTrack
= reinterpret_cast<BackTrackInfoParentheses
*>(context
->frame
+ term
.frameLocation
);
943 ByteDisjunction
* disjunctionBody
= term
.atom
.parenthesesDisjunction
;
945 switch (term
.atom
.quantityType
) {
946 case QuantifierFixedCount
: {
947 ASSERT(backTrack
->matchAmount
== term
.atom
.quantityCount
);
949 ParenthesesDisjunctionContext
* context
= 0;
950 JSRegExpResult result
= parenthesesDoBacktrack(term
, backTrack
);
952 if (result
!= JSRegExpMatch
)
955 // While we haven't yet reached our fixed limit,
956 while (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
957 // Try to do a match, and it it succeeds, add it to the list.
958 context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
959 result
= matchDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
961 if (result
== JSRegExpMatch
)
962 appendParenthesesDisjunctionContext(backTrack
, context
);
964 // The match failed; try to find an alternate point to carry on from.
965 resetMatches(term
, context
);
966 freeParenthesesDisjunctionContext(context
);
968 if (result
!= JSRegExpNoMatch
)
970 JSRegExpResult backtrackResult
= parenthesesDoBacktrack(term
, backTrack
);
971 if (backtrackResult
!= JSRegExpMatch
)
972 return backtrackResult
;
976 ASSERT(backTrack
->matchAmount
== term
.atom
.quantityCount
);
977 context
= backTrack
->lastContext
;
978 recordParenthesesMatch(term
, context
);
979 return JSRegExpMatch
;
982 case QuantifierGreedy
: {
983 if (!backTrack
->matchAmount
)
984 return JSRegExpNoMatch
;
986 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
987 JSRegExpResult result
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
), true);
988 if (result
== JSRegExpMatch
) {
989 while (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
990 ParenthesesDisjunctionContext
* context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
991 JSRegExpResult parenthesesResult
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
992 if (parenthesesResult
== JSRegExpMatch
)
993 appendParenthesesDisjunctionContext(backTrack
, context
);
995 resetMatches(term
, context
);
996 freeParenthesesDisjunctionContext(context
);
998 if (parenthesesResult
!= JSRegExpNoMatch
)
999 return parenthesesResult
;
1005 resetMatches(term
, context
);
1006 popParenthesesDisjunctionContext(backTrack
);
1007 freeParenthesesDisjunctionContext(context
);
1009 if (result
!= JSRegExpNoMatch
)
1013 if (backTrack
->matchAmount
) {
1014 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
1015 recordParenthesesMatch(term
, context
);
1017 return JSRegExpMatch
;
1020 case QuantifierNonGreedy
: {
1021 // If we've not reached the limit, try to add one more match.
1022 if (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
1023 ParenthesesDisjunctionContext
* context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
1024 JSRegExpResult result
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
1025 if (result
== JSRegExpMatch
) {
1026 appendParenthesesDisjunctionContext(backTrack
, context
);
1027 recordParenthesesMatch(term
, context
);
1028 return JSRegExpMatch
;
1031 resetMatches(term
, context
);
1032 freeParenthesesDisjunctionContext(context
);
1034 if (result
!= JSRegExpNoMatch
)
1038 // Nope - okay backtrack looking for an alternative.
1039 while (backTrack
->matchAmount
) {
1040 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
1041 JSRegExpResult result
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
), true);
1042 if (result
== JSRegExpMatch
) {
1043 // successful backtrack! we're back in the game!
1044 if (backTrack
->matchAmount
) {
1045 context
= backTrack
->lastContext
;
1046 recordParenthesesMatch(term
, context
);
1048 return JSRegExpMatch
;
1051 // pop a match off the stack
1052 resetMatches(term
, context
);
1053 popParenthesesDisjunctionContext(backTrack
);
1054 freeParenthesesDisjunctionContext(context
);
1056 if (result
!= JSRegExpNoMatch
)
1060 return JSRegExpNoMatch
;
1064 RELEASE_ASSERT_NOT_REACHED();
1065 return JSRegExpErrorNoMatch
;
1068 bool matchDotStarEnclosure(ByteTerm
& term
, DisjunctionContext
* context
)
1071 unsigned matchBegin
= context
->matchBegin
;
1074 for (matchBegin
--; true; matchBegin
--) {
1075 if (testCharacterClass(pattern
->newlineCharacterClass
, input
.reread(matchBegin
))) {
1085 unsigned matchEnd
= input
.getPos();
1087 for (; (matchEnd
!= input
.end())
1088 && (!testCharacterClass(pattern
->newlineCharacterClass
, input
.reread(matchEnd
))); matchEnd
++) { }
1090 if (((matchBegin
&& term
.anchors
.m_bol
)
1091 || ((matchEnd
!= input
.end()) && term
.anchors
.m_eol
))
1092 && !pattern
->m_multiline
)
1095 context
->matchBegin
= matchBegin
;
1096 context
->matchEnd
= matchEnd
;
1100 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
1101 #define BACKTRACK() { --context->term; goto backtrack; }
1102 #define currentTerm() (disjunction->terms[context->term])
1103 JSRegExpResult
matchDisjunction(ByteDisjunction
* disjunction
, DisjunctionContext
* context
, bool btrack
= false)
1105 if (!--remainingMatchCount
)
1106 return JSRegExpErrorHitLimit
;
1111 context
->matchBegin
= input
.getPos();
1115 ASSERT(context
->term
< static_cast<int>(disjunction
->terms
.size()));
1117 switch (currentTerm().type
) {
1118 case ByteTerm::TypeSubpatternBegin
:
1120 case ByteTerm::TypeSubpatternEnd
:
1121 context
->matchEnd
= input
.getPos();
1122 return JSRegExpMatch
;
1124 case ByteTerm::TypeBodyAlternativeBegin
:
1126 case ByteTerm::TypeBodyAlternativeDisjunction
:
1127 case ByteTerm::TypeBodyAlternativeEnd
:
1128 context
->matchEnd
= input
.getPos();
1129 return JSRegExpMatch
;
1131 case ByteTerm::TypeAlternativeBegin
:
1133 case ByteTerm::TypeAlternativeDisjunction
:
1134 case ByteTerm::TypeAlternativeEnd
: {
1135 int offset
= currentTerm().alternative
.end
;
1136 BackTrackInfoAlternative
* backTrack
= reinterpret_cast<BackTrackInfoAlternative
*>(context
->frame
+ currentTerm().frameLocation
);
1137 backTrack
->offset
= offset
;
1138 context
->term
+= offset
;
1142 case ByteTerm::TypeAssertionBOL
:
1143 if (matchAssertionBOL(currentTerm()))
1146 case ByteTerm::TypeAssertionEOL
:
1147 if (matchAssertionEOL(currentTerm()))
1150 case ByteTerm::TypeAssertionWordBoundary
:
1151 if (matchAssertionWordBoundary(currentTerm()))
1155 case ByteTerm::TypePatternCharacterOnce
:
1156 case ByteTerm::TypePatternCharacterFixed
: {
1157 for (unsigned matchAmount
= 0; matchAmount
< currentTerm().atom
.quantityCount
; ++matchAmount
) {
1158 if (!checkCharacter(currentTerm().atom
.patternCharacter
, currentTerm().inputPosition
- matchAmount
))
1163 case ByteTerm::TypePatternCharacterGreedy
: {
1164 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ currentTerm().frameLocation
);
1165 unsigned matchAmount
= 0;
1166 while ((matchAmount
< currentTerm().atom
.quantityCount
) && input
.checkInput(1)) {
1167 if (!checkCharacter(currentTerm().atom
.patternCharacter
, currentTerm().inputPosition
+ 1)) {
1168 input
.uncheckInput(1);
1173 backTrack
->matchAmount
= matchAmount
;
1177 case ByteTerm::TypePatternCharacterNonGreedy
: {
1178 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ currentTerm().frameLocation
);
1179 backTrack
->matchAmount
= 0;
1183 case ByteTerm::TypePatternCasedCharacterOnce
:
1184 case ByteTerm::TypePatternCasedCharacterFixed
: {
1185 for (unsigned matchAmount
= 0; matchAmount
< currentTerm().atom
.quantityCount
; ++matchAmount
) {
1186 if (!checkCasedCharacter(currentTerm().atom
.casedCharacter
.lo
, currentTerm().atom
.casedCharacter
.hi
, currentTerm().inputPosition
- matchAmount
))
1191 case ByteTerm::TypePatternCasedCharacterGreedy
: {
1192 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ currentTerm().frameLocation
);
1193 unsigned matchAmount
= 0;
1194 while ((matchAmount
< currentTerm().atom
.quantityCount
) && input
.checkInput(1)) {
1195 if (!checkCasedCharacter(currentTerm().atom
.casedCharacter
.lo
, currentTerm().atom
.casedCharacter
.hi
, currentTerm().inputPosition
+ 1)) {
1196 input
.uncheckInput(1);
1201 backTrack
->matchAmount
= matchAmount
;
1205 case ByteTerm::TypePatternCasedCharacterNonGreedy
: {
1206 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ currentTerm().frameLocation
);
1207 backTrack
->matchAmount
= 0;
1211 case ByteTerm::TypeCharacterClass
:
1212 if (matchCharacterClass(currentTerm(), context
))
1215 case ByteTerm::TypeBackReference
:
1216 if (matchBackReference(currentTerm(), context
))
1219 case ByteTerm::TypeParenthesesSubpattern
: {
1220 JSRegExpResult result
= matchParentheses(currentTerm(), context
);
1222 if (result
== JSRegExpMatch
) {
1224 } else if (result
!= JSRegExpNoMatch
)
1229 case ByteTerm::TypeParenthesesSubpatternOnceBegin
:
1230 if (matchParenthesesOnceBegin(currentTerm(), context
))
1233 case ByteTerm::TypeParenthesesSubpatternOnceEnd
:
1234 if (matchParenthesesOnceEnd(currentTerm(), context
))
1237 case ByteTerm::TypeParenthesesSubpatternTerminalBegin
:
1238 if (matchParenthesesTerminalBegin(currentTerm(), context
))
1241 case ByteTerm::TypeParenthesesSubpatternTerminalEnd
:
1242 if (matchParenthesesTerminalEnd(currentTerm(), context
))
1245 case ByteTerm::TypeParentheticalAssertionBegin
:
1246 if (matchParentheticalAssertionBegin(currentTerm(), context
))
1249 case ByteTerm::TypeParentheticalAssertionEnd
:
1250 if (matchParentheticalAssertionEnd(currentTerm(), context
))
1254 case ByteTerm::TypeCheckInput
:
1255 if (input
.checkInput(currentTerm().checkInputCount
))
1259 case ByteTerm::TypeUncheckInput
:
1260 input
.uncheckInput(currentTerm().checkInputCount
);
1263 case ByteTerm::TypeDotStarEnclosure
:
1264 if (matchDotStarEnclosure(currentTerm(), context
))
1265 return JSRegExpMatch
;
1269 // We should never fall-through to here.
1270 RELEASE_ASSERT_NOT_REACHED();
1273 ASSERT(context
->term
< static_cast<int>(disjunction
->terms
.size()));
1275 switch (currentTerm().type
) {
1276 case ByteTerm::TypeSubpatternBegin
:
1277 return JSRegExpNoMatch
;
1278 case ByteTerm::TypeSubpatternEnd
:
1279 RELEASE_ASSERT_NOT_REACHED();
1281 case ByteTerm::TypeBodyAlternativeBegin
:
1282 case ByteTerm::TypeBodyAlternativeDisjunction
: {
1283 int offset
= currentTerm().alternative
.next
;
1284 context
->term
+= offset
;
1289 return JSRegExpNoMatch
;
1293 context
->matchBegin
= input
.getPos();
1295 if (currentTerm().alternative
.onceThrough
)
1296 context
->term
+= currentTerm().alternative
.next
;
1300 case ByteTerm::TypeBodyAlternativeEnd
:
1301 RELEASE_ASSERT_NOT_REACHED();
1303 case ByteTerm::TypeAlternativeBegin
:
1304 case ByteTerm::TypeAlternativeDisjunction
: {
1305 int offset
= currentTerm().alternative
.next
;
1306 context
->term
+= offset
;
1311 case ByteTerm::TypeAlternativeEnd
: {
1312 // We should never backtrack back into an alternative of the main body of the regex.
1313 BackTrackInfoAlternative
* backTrack
= reinterpret_cast<BackTrackInfoAlternative
*>(context
->frame
+ currentTerm().frameLocation
);
1314 unsigned offset
= backTrack
->offset
;
1315 context
->term
-= offset
;
1319 case ByteTerm::TypeAssertionBOL
:
1320 case ByteTerm::TypeAssertionEOL
:
1321 case ByteTerm::TypeAssertionWordBoundary
:
1324 case ByteTerm::TypePatternCharacterOnce
:
1325 case ByteTerm::TypePatternCharacterFixed
:
1326 case ByteTerm::TypePatternCharacterGreedy
:
1327 case ByteTerm::TypePatternCharacterNonGreedy
:
1328 if (backtrackPatternCharacter(currentTerm(), context
))
1331 case ByteTerm::TypePatternCasedCharacterOnce
:
1332 case ByteTerm::TypePatternCasedCharacterFixed
:
1333 case ByteTerm::TypePatternCasedCharacterGreedy
:
1334 case ByteTerm::TypePatternCasedCharacterNonGreedy
:
1335 if (backtrackPatternCasedCharacter(currentTerm(), context
))
1338 case ByteTerm::TypeCharacterClass
:
1339 if (backtrackCharacterClass(currentTerm(), context
))
1342 case ByteTerm::TypeBackReference
:
1343 if (backtrackBackReference(currentTerm(), context
))
1346 case ByteTerm::TypeParenthesesSubpattern
: {
1347 JSRegExpResult result
= backtrackParentheses(currentTerm(), context
);
1349 if (result
== JSRegExpMatch
) {
1351 } else if (result
!= JSRegExpNoMatch
)
1356 case ByteTerm::TypeParenthesesSubpatternOnceBegin
:
1357 if (backtrackParenthesesOnceBegin(currentTerm(), context
))
1360 case ByteTerm::TypeParenthesesSubpatternOnceEnd
:
1361 if (backtrackParenthesesOnceEnd(currentTerm(), context
))
1364 case ByteTerm::TypeParenthesesSubpatternTerminalBegin
:
1365 if (backtrackParenthesesTerminalBegin(currentTerm(), context
))
1368 case ByteTerm::TypeParenthesesSubpatternTerminalEnd
:
1369 if (backtrackParenthesesTerminalEnd(currentTerm(), context
))
1372 case ByteTerm::TypeParentheticalAssertionBegin
:
1373 if (backtrackParentheticalAssertionBegin(currentTerm(), context
))
1376 case ByteTerm::TypeParentheticalAssertionEnd
:
1377 if (backtrackParentheticalAssertionEnd(currentTerm(), context
))
1381 case ByteTerm::TypeCheckInput
:
1382 input
.uncheckInput(currentTerm().checkInputCount
);
1385 case ByteTerm::TypeUncheckInput
:
1386 input
.checkInput(currentTerm().checkInputCount
);
1389 case ByteTerm::TypeDotStarEnclosure
:
1390 RELEASE_ASSERT_NOT_REACHED();
1393 RELEASE_ASSERT_NOT_REACHED();
1394 return JSRegExpErrorNoMatch
;
1397 JSRegExpResult
matchNonZeroDisjunction(ByteDisjunction
* disjunction
, DisjunctionContext
* context
, bool btrack
= false)
1399 JSRegExpResult result
= matchDisjunction(disjunction
, context
, btrack
);
1401 if (result
== JSRegExpMatch
) {
1402 while (context
->matchBegin
== context
->matchEnd
) {
1403 result
= matchDisjunction(disjunction
, context
, true);
1404 if (result
!= JSRegExpMatch
)
1407 return JSRegExpMatch
;
1413 unsigned interpret()
1415 if (!input
.isAvailableInput(0))
1416 return offsetNoMatch
;
1418 for (unsigned i
= 0; i
< pattern
->m_body
->m_numSubpatterns
+ 1; ++i
)
1419 output
[i
<< 1] = offsetNoMatch
;
1421 allocatorPool
= pattern
->m_allocator
->startAllocator();
1422 RELEASE_ASSERT(allocatorPool
);
1424 DisjunctionContext
* context
= allocDisjunctionContext(pattern
->m_body
.get());
1426 JSRegExpResult result
= matchDisjunction(pattern
->m_body
.get(), context
, false);
1427 if (result
== JSRegExpMatch
) {
1428 output
[0] = context
->matchBegin
;
1429 output
[1] = context
->matchEnd
;
1432 freeDisjunctionContext(context
);
1434 pattern
->m_allocator
->stopAllocator();
1436 ASSERT((result
== JSRegExpMatch
) == (output
[0] != offsetNoMatch
));
1440 Interpreter(BytecodePattern
* pattern
, unsigned* output
, const CharType
* input
, unsigned length
, unsigned start
)
1443 , input(input
, start
, length
)
1445 , remainingMatchCount(matchLimit
)
1450 BytecodePattern
* pattern
;
1453 BumpPointerPool
* allocatorPool
;
1454 unsigned remainingMatchCount
;
1457 class ByteCompiler
{
1458 struct ParenthesesStackEntry
{
1460 unsigned savedAlternativeIndex
;
1461 ParenthesesStackEntry(unsigned beginTerm
, unsigned savedAlternativeIndex
/*, unsigned subpatternId, bool capture = false*/)
1462 : beginTerm(beginTerm
)
1463 , savedAlternativeIndex(savedAlternativeIndex
)
1469 ByteCompiler(YarrPattern
& pattern
)
1470 : m_pattern(pattern
)
1472 m_currentAlternativeIndex
= 0;
1475 std::unique_ptr
<BytecodePattern
> compile(BumpPointerAllocator
* allocator
)
1477 regexBegin(m_pattern
.m_numSubpatterns
, m_pattern
.m_body
->m_callFrameSize
, m_pattern
.m_body
->m_alternatives
[0]->onceThrough());
1478 emitDisjunction(m_pattern
.m_body
);
1481 return std::make_unique
<BytecodePattern
>(WTF::move(m_bodyDisjunction
), m_allParenthesesInfo
, m_pattern
, allocator
);
1484 void checkInput(unsigned count
)
1486 m_bodyDisjunction
->terms
.append(ByteTerm::CheckInput(count
));
1489 void uncheckInput(unsigned count
)
1491 m_bodyDisjunction
->terms
.append(ByteTerm::UncheckInput(count
));
1494 void assertionBOL(unsigned inputPosition
)
1496 m_bodyDisjunction
->terms
.append(ByteTerm::BOL(inputPosition
));
1499 void assertionEOL(unsigned inputPosition
)
1501 m_bodyDisjunction
->terms
.append(ByteTerm::EOL(inputPosition
));
1504 void assertionWordBoundary(bool invert
, unsigned inputPosition
)
1506 m_bodyDisjunction
->terms
.append(ByteTerm::WordBoundary(invert
, inputPosition
));
1509 void atomPatternCharacter(UChar ch
, unsigned inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1511 if (m_pattern
.m_ignoreCase
) {
1512 ASSERT(u_tolower(ch
) <= 0xFFFF);
1513 ASSERT(u_toupper(ch
) <= 0xFFFF);
1515 UChar lo
= u_tolower(ch
);
1516 UChar hi
= u_toupper(ch
);
1519 m_bodyDisjunction
->terms
.append(ByteTerm(lo
, hi
, inputPosition
, frameLocation
, quantityCount
, quantityType
));
1524 m_bodyDisjunction
->terms
.append(ByteTerm(ch
, inputPosition
, frameLocation
, quantityCount
, quantityType
));
1527 void atomCharacterClass(CharacterClass
* characterClass
, bool invert
, unsigned inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1529 m_bodyDisjunction
->terms
.append(ByteTerm(characterClass
, invert
, inputPosition
));
1531 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].atom
.quantityCount
= quantityCount
.unsafeGet();
1532 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].atom
.quantityType
= quantityType
;
1533 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1536 void atomBackReference(unsigned subpatternId
, unsigned inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1538 ASSERT(subpatternId
);
1540 m_bodyDisjunction
->terms
.append(ByteTerm::BackReference(subpatternId
, inputPosition
));
1542 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].atom
.quantityCount
= quantityCount
.unsafeGet();
1543 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].atom
.quantityType
= quantityType
;
1544 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1547 void atomParenthesesOnceBegin(unsigned subpatternId
, bool capture
, unsigned inputPosition
, unsigned frameLocation
, unsigned alternativeFrameLocation
)
1549 int beginTerm
= m_bodyDisjunction
->terms
.size();
1551 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin
, subpatternId
, capture
, false, inputPosition
));
1552 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1553 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeBegin());
1554 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= alternativeFrameLocation
;
1556 m_parenthesesStack
.append(ParenthesesStackEntry(beginTerm
, m_currentAlternativeIndex
));
1557 m_currentAlternativeIndex
= beginTerm
+ 1;
1560 void atomParenthesesTerminalBegin(unsigned subpatternId
, bool capture
, unsigned inputPosition
, unsigned frameLocation
, unsigned alternativeFrameLocation
)
1562 int beginTerm
= m_bodyDisjunction
->terms
.size();
1564 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin
, subpatternId
, capture
, false, inputPosition
));
1565 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1566 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeBegin());
1567 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= alternativeFrameLocation
;
1569 m_parenthesesStack
.append(ParenthesesStackEntry(beginTerm
, m_currentAlternativeIndex
));
1570 m_currentAlternativeIndex
= beginTerm
+ 1;
1573 void atomParenthesesSubpatternBegin(unsigned subpatternId
, bool capture
, unsigned inputPosition
, unsigned frameLocation
, unsigned alternativeFrameLocation
)
1575 // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1576 // then fix this up at the end! - simplifying this should make it much clearer.
1577 // https://bugs.webkit.org/show_bug.cgi?id=50136
1579 int beginTerm
= m_bodyDisjunction
->terms
.size();
1581 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin
, subpatternId
, capture
, false, inputPosition
));
1582 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1583 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeBegin());
1584 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= alternativeFrameLocation
;
1586 m_parenthesesStack
.append(ParenthesesStackEntry(beginTerm
, m_currentAlternativeIndex
));
1587 m_currentAlternativeIndex
= beginTerm
+ 1;
1590 void atomParentheticalAssertionBegin(unsigned subpatternId
, bool invert
, unsigned frameLocation
, unsigned alternativeFrameLocation
)
1592 int beginTerm
= m_bodyDisjunction
->terms
.size();
1594 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin
, subpatternId
, false, invert
, 0));
1595 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1596 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeBegin());
1597 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= alternativeFrameLocation
;
1599 m_parenthesesStack
.append(ParenthesesStackEntry(beginTerm
, m_currentAlternativeIndex
));
1600 m_currentAlternativeIndex
= beginTerm
+ 1;
1603 void atomParentheticalAssertionEnd(unsigned inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1605 unsigned beginTerm
= popParenthesesStack();
1606 closeAlternative(beginTerm
+ 1);
1607 unsigned endTerm
= m_bodyDisjunction
->terms
.size();
1609 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeParentheticalAssertionBegin
);
1611 bool invert
= m_bodyDisjunction
->terms
[beginTerm
].invert();
1612 unsigned subpatternId
= m_bodyDisjunction
->terms
[beginTerm
].atom
.subpatternId
;
1614 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd
, subpatternId
, false, invert
, inputPosition
));
1615 m_bodyDisjunction
->terms
[beginTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1616 m_bodyDisjunction
->terms
[endTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1617 m_bodyDisjunction
->terms
[endTerm
].frameLocation
= frameLocation
;
1619 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1620 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityType
= quantityType
;
1621 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1622 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityType
= quantityType
;
1625 void assertionDotStarEnclosure(bool bolAnchored
, bool eolAnchored
)
1627 m_bodyDisjunction
->terms
.append(ByteTerm::DotStarEnclosure(bolAnchored
, eolAnchored
));
1630 unsigned popParenthesesStack()
1632 ASSERT(m_parenthesesStack
.size());
1633 int stackEnd
= m_parenthesesStack
.size() - 1;
1634 unsigned beginTerm
= m_parenthesesStack
[stackEnd
].beginTerm
;
1635 m_currentAlternativeIndex
= m_parenthesesStack
[stackEnd
].savedAlternativeIndex
;
1636 m_parenthesesStack
.shrink(stackEnd
);
1638 ASSERT(beginTerm
< m_bodyDisjunction
->terms
.size());
1639 ASSERT(m_currentAlternativeIndex
< m_bodyDisjunction
->terms
.size());
1645 void dumpDisjunction(ByteDisjunction
* disjunction
)
1647 dataLogF("ByteDisjunction(%p):\n\t", disjunction
);
1648 for (unsigned i
= 0; i
< disjunction
->terms
.size(); ++i
)
1649 dataLogF("{ %d } ", disjunction
->terms
[i
].type
);
1654 void closeAlternative(int beginTerm
)
1656 int origBeginTerm
= beginTerm
;
1657 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeAlternativeBegin
);
1658 int endIndex
= m_bodyDisjunction
->terms
.size();
1660 unsigned frameLocation
= m_bodyDisjunction
->terms
[beginTerm
].frameLocation
;
1662 if (!m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
)
1663 m_bodyDisjunction
->terms
.remove(beginTerm
);
1665 while (m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
) {
1666 beginTerm
+= m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
;
1667 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeAlternativeDisjunction
);
1668 m_bodyDisjunction
->terms
[beginTerm
].alternative
.end
= endIndex
- beginTerm
;
1669 m_bodyDisjunction
->terms
[beginTerm
].frameLocation
= frameLocation
;
1672 m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
= origBeginTerm
- beginTerm
;
1674 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeEnd());
1675 m_bodyDisjunction
->terms
[endIndex
].frameLocation
= frameLocation
;
1679 void closeBodyAlternative()
1682 int origBeginTerm
= 0;
1683 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeBodyAlternativeBegin
);
1684 int endIndex
= m_bodyDisjunction
->terms
.size();
1686 unsigned frameLocation
= m_bodyDisjunction
->terms
[beginTerm
].frameLocation
;
1688 while (m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
) {
1689 beginTerm
+= m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
;
1690 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeBodyAlternativeDisjunction
);
1691 m_bodyDisjunction
->terms
[beginTerm
].alternative
.end
= endIndex
- beginTerm
;
1692 m_bodyDisjunction
->terms
[beginTerm
].frameLocation
= frameLocation
;
1695 m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
= origBeginTerm
- beginTerm
;
1697 m_bodyDisjunction
->terms
.append(ByteTerm::BodyAlternativeEnd());
1698 m_bodyDisjunction
->terms
[endIndex
].frameLocation
= frameLocation
;
1701 void atomParenthesesSubpatternEnd(unsigned lastSubpatternId
, int inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
, unsigned callFrameSize
= 0)
1703 unsigned beginTerm
= popParenthesesStack();
1704 closeAlternative(beginTerm
+ 1);
1705 unsigned endTerm
= m_bodyDisjunction
->terms
.size();
1707 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
1709 ByteTerm
& parenthesesBegin
= m_bodyDisjunction
->terms
[beginTerm
];
1711 bool capture
= parenthesesBegin
.capture();
1712 unsigned subpatternId
= parenthesesBegin
.atom
.subpatternId
;
1714 unsigned numSubpatterns
= lastSubpatternId
- subpatternId
+ 1;
1715 auto parenthesesDisjunction
= std::make_unique
<ByteDisjunction
>(numSubpatterns
, callFrameSize
);
1717 unsigned firstTermInParentheses
= beginTerm
+ 1;
1718 parenthesesDisjunction
->terms
.reserveInitialCapacity(endTerm
- firstTermInParentheses
+ 2);
1720 parenthesesDisjunction
->terms
.append(ByteTerm::SubpatternBegin());
1721 for (unsigned termInParentheses
= firstTermInParentheses
; termInParentheses
< endTerm
; ++termInParentheses
)
1722 parenthesesDisjunction
->terms
.append(m_bodyDisjunction
->terms
[termInParentheses
]);
1723 parenthesesDisjunction
->terms
.append(ByteTerm::SubpatternEnd());
1725 m_bodyDisjunction
->terms
.shrink(beginTerm
);
1727 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern
, subpatternId
, parenthesesDisjunction
.get(), capture
, inputPosition
));
1728 m_allParenthesesInfo
.append(WTF::move(parenthesesDisjunction
));
1730 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1731 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityType
= quantityType
;
1732 m_bodyDisjunction
->terms
[beginTerm
].frameLocation
= frameLocation
;
1735 void atomParenthesesOnceEnd(int inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1737 unsigned beginTerm
= popParenthesesStack();
1738 closeAlternative(beginTerm
+ 1);
1739 unsigned endTerm
= m_bodyDisjunction
->terms
.size();
1741 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
1743 bool capture
= m_bodyDisjunction
->terms
[beginTerm
].capture();
1744 unsigned subpatternId
= m_bodyDisjunction
->terms
[beginTerm
].atom
.subpatternId
;
1746 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd
, subpatternId
, capture
, false, inputPosition
));
1747 m_bodyDisjunction
->terms
[beginTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1748 m_bodyDisjunction
->terms
[endTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1749 m_bodyDisjunction
->terms
[endTerm
].frameLocation
= frameLocation
;
1751 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1752 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityType
= quantityType
;
1753 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1754 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityType
= quantityType
;
1757 void atomParenthesesTerminalEnd(int inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1759 unsigned beginTerm
= popParenthesesStack();
1760 closeAlternative(beginTerm
+ 1);
1761 unsigned endTerm
= m_bodyDisjunction
->terms
.size();
1763 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeParenthesesSubpatternTerminalBegin
);
1765 bool capture
= m_bodyDisjunction
->terms
[beginTerm
].capture();
1766 unsigned subpatternId
= m_bodyDisjunction
->terms
[beginTerm
].atom
.subpatternId
;
1768 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd
, subpatternId
, capture
, false, inputPosition
));
1769 m_bodyDisjunction
->terms
[beginTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1770 m_bodyDisjunction
->terms
[endTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1771 m_bodyDisjunction
->terms
[endTerm
].frameLocation
= frameLocation
;
1773 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1774 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityType
= quantityType
;
1775 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1776 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityType
= quantityType
;
1779 void regexBegin(unsigned numSubpatterns
, unsigned callFrameSize
, bool onceThrough
)
1781 m_bodyDisjunction
= std::make_unique
<ByteDisjunction
>(numSubpatterns
, callFrameSize
);
1782 m_bodyDisjunction
->terms
.append(ByteTerm::BodyAlternativeBegin(onceThrough
));
1783 m_bodyDisjunction
->terms
[0].frameLocation
= 0;
1784 m_currentAlternativeIndex
= 0;
1789 closeBodyAlternative();
1792 void alternativeBodyDisjunction(bool onceThrough
)
1794 int newAlternativeIndex
= m_bodyDisjunction
->terms
.size();
1795 m_bodyDisjunction
->terms
[m_currentAlternativeIndex
].alternative
.next
= newAlternativeIndex
- m_currentAlternativeIndex
;
1796 m_bodyDisjunction
->terms
.append(ByteTerm::BodyAlternativeDisjunction(onceThrough
));
1798 m_currentAlternativeIndex
= newAlternativeIndex
;
1801 void alternativeDisjunction()
1803 int newAlternativeIndex
= m_bodyDisjunction
->terms
.size();
1804 m_bodyDisjunction
->terms
[m_currentAlternativeIndex
].alternative
.next
= newAlternativeIndex
- m_currentAlternativeIndex
;
1805 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeDisjunction());
1807 m_currentAlternativeIndex
= newAlternativeIndex
;
1810 void emitDisjunction(PatternDisjunction
* disjunction
, unsigned inputCountAlreadyChecked
= 0, unsigned parenthesesInputCountAlreadyChecked
= 0)
1812 for (unsigned alt
= 0; alt
< disjunction
->m_alternatives
.size(); ++alt
) {
1813 unsigned currentCountAlreadyChecked
= inputCountAlreadyChecked
;
1815 PatternAlternative
* alternative
= disjunction
->m_alternatives
[alt
].get();
1818 if (disjunction
== m_pattern
.m_body
)
1819 alternativeBodyDisjunction(alternative
->onceThrough());
1821 alternativeDisjunction();
1824 unsigned minimumSize
= alternative
->m_minimumSize
;
1825 ASSERT(minimumSize
>= parenthesesInputCountAlreadyChecked
);
1826 unsigned countToCheck
= minimumSize
- parenthesesInputCountAlreadyChecked
;
1829 checkInput(countToCheck
);
1830 currentCountAlreadyChecked
+= countToCheck
;
1833 for (unsigned i
= 0; i
< alternative
->m_terms
.size(); ++i
) {
1834 PatternTerm
& term
= alternative
->m_terms
[i
];
1836 switch (term
.type
) {
1837 case PatternTerm::TypeAssertionBOL
:
1838 assertionBOL(currentCountAlreadyChecked
- term
.inputPosition
);
1841 case PatternTerm::TypeAssertionEOL
:
1842 assertionEOL(currentCountAlreadyChecked
- term
.inputPosition
);
1845 case PatternTerm::TypeAssertionWordBoundary
:
1846 assertionWordBoundary(term
.invert(), currentCountAlreadyChecked
- term
.inputPosition
);
1849 case PatternTerm::TypePatternCharacter
:
1850 atomPatternCharacter(term
.patternCharacter
, currentCountAlreadyChecked
- term
.inputPosition
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1853 case PatternTerm::TypeCharacterClass
:
1854 atomCharacterClass(term
.characterClass
, term
.invert(), currentCountAlreadyChecked
- term
.inputPosition
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1857 case PatternTerm::TypeBackReference
:
1858 atomBackReference(term
.backReferenceSubpatternId
, currentCountAlreadyChecked
- term
.inputPosition
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1861 case PatternTerm::TypeForwardReference
:
1864 case PatternTerm::TypeParenthesesSubpattern
: {
1865 unsigned disjunctionAlreadyCheckedCount
= 0;
1866 if (term
.quantityCount
== 1 && !term
.parentheses
.isCopy
) {
1867 unsigned alternativeFrameLocation
= term
.frameLocation
;
1868 // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
1869 if (term
.quantityType
== QuantifierFixedCount
)
1870 disjunctionAlreadyCheckedCount
= term
.parentheses
.disjunction
->m_minimumSize
;
1872 alternativeFrameLocation
+= YarrStackSpaceForBackTrackInfoParenthesesOnce
;
1873 unsigned delegateEndInputOffset
= term
.inputPosition
- currentCountAlreadyChecked
;
1874 atomParenthesesOnceBegin(term
.parentheses
.subpatternId
, term
.capture(), disjunctionAlreadyCheckedCount
- delegateEndInputOffset
, term
.frameLocation
, alternativeFrameLocation
);
1875 emitDisjunction(term
.parentheses
.disjunction
, currentCountAlreadyChecked
, disjunctionAlreadyCheckedCount
);
1876 atomParenthesesOnceEnd(delegateEndInputOffset
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1877 } else if (term
.parentheses
.isTerminal
) {
1878 unsigned delegateEndInputOffset
= term
.inputPosition
- currentCountAlreadyChecked
;
1879 atomParenthesesTerminalBegin(term
.parentheses
.subpatternId
, term
.capture(), disjunctionAlreadyCheckedCount
- delegateEndInputOffset
, term
.frameLocation
, term
.frameLocation
+ YarrStackSpaceForBackTrackInfoParenthesesOnce
);
1880 emitDisjunction(term
.parentheses
.disjunction
, currentCountAlreadyChecked
, disjunctionAlreadyCheckedCount
);
1881 atomParenthesesTerminalEnd(delegateEndInputOffset
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1883 unsigned delegateEndInputOffset
= term
.inputPosition
- currentCountAlreadyChecked
;
1884 atomParenthesesSubpatternBegin(term
.parentheses
.subpatternId
, term
.capture(), disjunctionAlreadyCheckedCount
- delegateEndInputOffset
, term
.frameLocation
, 0);
1885 emitDisjunction(term
.parentheses
.disjunction
, currentCountAlreadyChecked
, 0);
1886 atomParenthesesSubpatternEnd(term
.parentheses
.lastSubpatternId
, delegateEndInputOffset
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
, term
.parentheses
.disjunction
->m_callFrameSize
);
1891 case PatternTerm::TypeParentheticalAssertion
: {
1892 unsigned alternativeFrameLocation
= term
.frameLocation
+ YarrStackSpaceForBackTrackInfoParentheticalAssertion
;
1894 ASSERT(currentCountAlreadyChecked
>= static_cast<unsigned>(term
.inputPosition
));
1895 unsigned positiveInputOffset
= currentCountAlreadyChecked
- static_cast<unsigned>(term
.inputPosition
);
1896 unsigned uncheckAmount
= 0;
1897 if (positiveInputOffset
> term
.parentheses
.disjunction
->m_minimumSize
) {
1898 uncheckAmount
= positiveInputOffset
- term
.parentheses
.disjunction
->m_minimumSize
;
1899 uncheckInput(uncheckAmount
);
1900 currentCountAlreadyChecked
-= uncheckAmount
;
1903 atomParentheticalAssertionBegin(term
.parentheses
.subpatternId
, term
.invert(), term
.frameLocation
, alternativeFrameLocation
);
1904 emitDisjunction(term
.parentheses
.disjunction
, currentCountAlreadyChecked
, positiveInputOffset
- uncheckAmount
);
1905 atomParentheticalAssertionEnd(0, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1906 if (uncheckAmount
) {
1907 checkInput(uncheckAmount
);
1908 currentCountAlreadyChecked
+= uncheckAmount
;
1913 case PatternTerm::TypeDotStarEnclosure
:
1914 assertionDotStarEnclosure(term
.anchors
.bolAnchor
, term
.anchors
.eolAnchor
);
1922 YarrPattern
& m_pattern
;
1923 std::unique_ptr
<ByteDisjunction
> m_bodyDisjunction
;
1924 unsigned m_currentAlternativeIndex
;
1925 Vector
<ParenthesesStackEntry
> m_parenthesesStack
;
1926 Vector
<std::unique_ptr
<ByteDisjunction
>> m_allParenthesesInfo
;
1929 std::unique_ptr
<BytecodePattern
> byteCompile(YarrPattern
& pattern
, BumpPointerAllocator
* allocator
)
1931 return ByteCompiler(pattern
).compile(allocator
);
1934 unsigned interpret(BytecodePattern
* bytecode
, const String
& input
, unsigned start
, unsigned* output
)
1937 return Interpreter
<LChar
>(bytecode
, output
, input
.characters8(), input
.length(), start
).interpret();
1938 return Interpreter
<UChar
>(bytecode
, output
, input
.characters16(), input
.length(), start
).interpret();
1941 unsigned interpret(BytecodePattern
* bytecode
, const LChar
* input
, unsigned length
, unsigned start
, unsigned* output
)
1943 return Interpreter
<LChar
>(bytecode
, output
, input
, length
, start
).interpret();
1946 unsigned interpret(BytecodePattern
* bytecode
, const UChar
* input
, unsigned length
, unsigned start
, unsigned* output
)
1948 return Interpreter
<UChar
>(bytecode
, output
, input
, length
, start
).interpret();
1951 // These should be the same for both UChar & LChar.
1952 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoPatternCharacter
) == (YarrStackSpaceForBackTrackInfoPatternCharacter
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter
);
1953 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoCharacterClass
) == (YarrStackSpaceForBackTrackInfoCharacterClass
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass
);
1954 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoBackReference
) == (YarrStackSpaceForBackTrackInfoBackReference
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference
);
1955 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoAlternative
) == (YarrStackSpaceForBackTrackInfoAlternative
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative
);
1956 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoParentheticalAssertion
) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion
);
1957 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoParenthesesOnce
) == (YarrStackSpaceForBackTrackInfoParenthesesOnce
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce
);
1958 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoParentheses
) == (YarrStackSpaceForBackTrackInfoParentheses
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses
);