2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "YarrInterpreter.h"
31 #include "YarrCanonicalizeUCS2.h"
32 #include <wtf/BumpPointerAllocator.h>
33 #include <wtf/DataLog.h>
34 #include <wtf/text/CString.h>
35 #include <wtf/text/WTFString.h>
43 namespace JSC
{ namespace Yarr
{
45 template<typename CharType
>
48 struct ParenthesesDisjunctionContext
;
50 struct BackTrackInfoPatternCharacter
{
51 uintptr_t matchAmount
;
53 struct BackTrackInfoCharacterClass
{
54 uintptr_t matchAmount
;
56 struct BackTrackInfoBackReference
{
57 uintptr_t begin
; // Not really needed for greedy quantifiers.
58 uintptr_t matchAmount
; // Not really needed for fixed quantifiers.
60 struct BackTrackInfoAlternative
{
63 struct BackTrackInfoParentheticalAssertion
{
66 struct BackTrackInfoParenthesesOnce
{
69 struct BackTrackInfoParenthesesTerminal
{
72 struct BackTrackInfoParentheses
{
73 uintptr_t matchAmount
;
74 ParenthesesDisjunctionContext
* lastContext
;
77 static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses
* backTrack
, ParenthesesDisjunctionContext
* context
)
79 context
->next
= backTrack
->lastContext
;
80 backTrack
->lastContext
= context
;
81 ++backTrack
->matchAmount
;
84 static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses
* backTrack
)
86 RELEASE_ASSERT(backTrack
->matchAmount
);
87 RELEASE_ASSERT(backTrack
->lastContext
);
88 backTrack
->lastContext
= backTrack
->lastContext
->next
;
89 --backTrack
->matchAmount
;
92 struct DisjunctionContext
99 void* operator new(size_t, void* where
)
110 DisjunctionContext
* allocDisjunctionContext(ByteDisjunction
* disjunction
)
112 size_t size
= sizeof(DisjunctionContext
) - sizeof(uintptr_t) + disjunction
->m_frameSize
* sizeof(uintptr_t);
113 allocatorPool
= allocatorPool
->ensureCapacity(size
);
114 RELEASE_ASSERT(allocatorPool
);
115 return new (allocatorPool
->alloc(size
)) DisjunctionContext();
118 void freeDisjunctionContext(DisjunctionContext
* context
)
120 allocatorPool
= allocatorPool
->dealloc(context
);
123 struct ParenthesesDisjunctionContext
125 ParenthesesDisjunctionContext(unsigned* output
, ByteTerm
& term
)
128 unsigned firstSubpatternId
= term
.atom
.subpatternId
;
129 unsigned numNestedSubpatterns
= term
.atom
.parenthesesDisjunction
->m_numSubpatterns
;
131 for (unsigned i
= 0; i
< (numNestedSubpatterns
<< 1); ++i
) {
132 subpatternBackup
[i
] = output
[(firstSubpatternId
<< 1) + i
];
133 output
[(firstSubpatternId
<< 1) + i
] = offsetNoMatch
;
136 new (getDisjunctionContext(term
)) DisjunctionContext();
139 void* operator new(size_t, void* where
)
144 void restoreOutput(unsigned* output
, unsigned firstSubpatternId
, unsigned numNestedSubpatterns
)
146 for (unsigned i
= 0; i
< (numNestedSubpatterns
<< 1); ++i
)
147 output
[(firstSubpatternId
<< 1) + i
] = subpatternBackup
[i
];
150 DisjunctionContext
* getDisjunctionContext(ByteTerm
& term
)
152 return reinterpret_cast<DisjunctionContext
*>(&(subpatternBackup
[term
.atom
.parenthesesDisjunction
->m_numSubpatterns
<< 1]));
155 ParenthesesDisjunctionContext
* next
;
156 unsigned subpatternBackup
[1];
159 ParenthesesDisjunctionContext
* allocParenthesesDisjunctionContext(ByteDisjunction
* disjunction
, unsigned* output
, ByteTerm
& term
)
161 size_t size
= sizeof(ParenthesesDisjunctionContext
) - sizeof(unsigned) + (term
.atom
.parenthesesDisjunction
->m_numSubpatterns
<< 1) * sizeof(unsigned) + sizeof(DisjunctionContext
) - sizeof(uintptr_t) + disjunction
->m_frameSize
* sizeof(uintptr_t);
162 allocatorPool
= allocatorPool
->ensureCapacity(size
);
163 RELEASE_ASSERT(allocatorPool
);
164 return new (allocatorPool
->alloc(size
)) ParenthesesDisjunctionContext(output
, term
);
167 void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext
* context
)
169 allocatorPool
= allocatorPool
->dealloc(context
);
174 InputStream(const CharType
* input
, unsigned start
, unsigned length
)
186 void rewind(unsigned amount
)
188 ASSERT(pos
>= amount
);
194 ASSERT(pos
< length
);
202 ASSERT(pos
+ 1 < length
);
203 return input
[pos
] | input
[pos
+ 1] << 16;
206 int readChecked(unsigned negativePositionOffest
)
208 RELEASE_ASSERT(pos
>= negativePositionOffest
);
209 unsigned p
= pos
- negativePositionOffest
;
214 int reread(unsigned from
)
216 ASSERT(from
< length
);
222 ASSERT(!(pos
> length
));
224 return input
[pos
- 1];
233 void setPos(unsigned p
)
245 return pos
== length
;
253 bool checkInput(unsigned count
)
255 if (((pos
+ count
) <= length
) && ((pos
+ count
) >= pos
)) {
262 void uncheckInput(unsigned count
)
264 RELEASE_ASSERT(pos
>= count
);
268 bool atStart(unsigned negativePositionOffest
)
270 return pos
== negativePositionOffest
;
273 bool atEnd(unsigned negativePositionOffest
)
275 RELEASE_ASSERT(pos
>= negativePositionOffest
);
276 return (pos
- negativePositionOffest
) == length
;
279 bool isAvailableInput(unsigned offset
)
281 return (((pos
+ offset
) <= length
) && ((pos
+ offset
) >= pos
));
285 const CharType
* input
;
290 bool testCharacterClass(CharacterClass
* characterClass
, int ch
)
293 for (unsigned i
= 0; i
< characterClass
->m_matchesUnicode
.size(); ++i
)
294 if (ch
== characterClass
->m_matchesUnicode
[i
])
296 for (unsigned i
= 0; i
< characterClass
->m_rangesUnicode
.size(); ++i
)
297 if ((ch
>= characterClass
->m_rangesUnicode
[i
].begin
) && (ch
<= characterClass
->m_rangesUnicode
[i
].end
))
300 for (unsigned i
= 0; i
< characterClass
->m_matches
.size(); ++i
)
301 if (ch
== characterClass
->m_matches
[i
])
303 for (unsigned i
= 0; i
< characterClass
->m_ranges
.size(); ++i
)
304 if ((ch
>= characterClass
->m_ranges
[i
].begin
) && (ch
<= characterClass
->m_ranges
[i
].end
))
311 bool checkCharacter(int testChar
, unsigned negativeInputOffset
)
313 return testChar
== input
.readChecked(negativeInputOffset
);
316 bool checkCasedCharacter(int loChar
, int hiChar
, unsigned negativeInputOffset
)
318 int ch
= input
.readChecked(negativeInputOffset
);
319 return (loChar
== ch
) || (hiChar
== ch
);
322 bool checkCharacterClass(CharacterClass
* characterClass
, bool invert
, unsigned negativeInputOffset
)
324 bool match
= testCharacterClass(characterClass
, input
.readChecked(negativeInputOffset
));
325 return invert
? !match
: match
;
328 bool tryConsumeBackReference(int matchBegin
, int matchEnd
, unsigned negativeInputOffset
)
330 unsigned matchSize
= (unsigned)(matchEnd
- matchBegin
);
332 if (!input
.checkInput(matchSize
))
335 if (pattern
->m_ignoreCase
) {
336 for (unsigned i
= 0; i
< matchSize
; ++i
) {
337 int oldCh
= input
.reread(matchBegin
+ i
);
338 int ch
= input
.readChecked(negativeInputOffset
+ matchSize
- i
);
343 // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that
344 // unicode values are never allowed to match against ascii ones.
345 if (isASCII(oldCh
) || isASCII(ch
)) {
346 if (toASCIIUpper(oldCh
) == toASCIIUpper(ch
))
348 } else if (areCanonicallyEquivalent(oldCh
, ch
))
351 input
.uncheckInput(matchSize
);
355 for (unsigned i
= 0; i
< matchSize
; ++i
) {
356 if (!checkCharacter(input
.reread(matchBegin
+ i
), negativeInputOffset
+ matchSize
- i
)) {
357 input
.uncheckInput(matchSize
);
366 bool matchAssertionBOL(ByteTerm
& term
)
368 return (input
.atStart(term
.inputPosition
)) || (pattern
->m_multiline
&& testCharacterClass(pattern
->newlineCharacterClass
, input
.readChecked(term
.inputPosition
+ 1)));
371 bool matchAssertionEOL(ByteTerm
& term
)
373 if (term
.inputPosition
)
374 return (input
.atEnd(term
.inputPosition
)) || (pattern
->m_multiline
&& testCharacterClass(pattern
->newlineCharacterClass
, input
.readChecked(term
.inputPosition
)));
376 return (input
.atEnd()) || (pattern
->m_multiline
&& testCharacterClass(pattern
->newlineCharacterClass
, input
.read()));
379 bool matchAssertionWordBoundary(ByteTerm
& term
)
381 bool prevIsWordchar
= !input
.atStart(term
.inputPosition
) && testCharacterClass(pattern
->wordcharCharacterClass
, input
.readChecked(term
.inputPosition
+ 1));
383 if (term
.inputPosition
)
384 readIsWordchar
= !input
.atEnd(term
.inputPosition
) && testCharacterClass(pattern
->wordcharCharacterClass
, input
.readChecked(term
.inputPosition
));
386 readIsWordchar
= !input
.atEnd() && testCharacterClass(pattern
->wordcharCharacterClass
, input
.read());
388 bool wordBoundary
= prevIsWordchar
!= readIsWordchar
;
389 return term
.invert() ? !wordBoundary
: wordBoundary
;
392 bool backtrackPatternCharacter(ByteTerm
& term
, DisjunctionContext
* context
)
394 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ term
.frameLocation
);
396 switch (term
.atom
.quantityType
) {
397 case QuantifierFixedCount
:
400 case QuantifierGreedy
:
401 if (backTrack
->matchAmount
) {
402 --backTrack
->matchAmount
;
403 input
.uncheckInput(1);
408 case QuantifierNonGreedy
:
409 if ((backTrack
->matchAmount
< term
.atom
.quantityCount
) && input
.checkInput(1)) {
410 ++backTrack
->matchAmount
;
411 if (checkCharacter(term
.atom
.patternCharacter
, term
.inputPosition
+ 1))
414 input
.uncheckInput(backTrack
->matchAmount
);
421 bool backtrackPatternCasedCharacter(ByteTerm
& term
, DisjunctionContext
* context
)
423 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ term
.frameLocation
);
425 switch (term
.atom
.quantityType
) {
426 case QuantifierFixedCount
:
429 case QuantifierGreedy
:
430 if (backTrack
->matchAmount
) {
431 --backTrack
->matchAmount
;
432 input
.uncheckInput(1);
437 case QuantifierNonGreedy
:
438 if ((backTrack
->matchAmount
< term
.atom
.quantityCount
) && input
.checkInput(1)) {
439 ++backTrack
->matchAmount
;
440 if (checkCasedCharacter(term
.atom
.casedCharacter
.lo
, term
.atom
.casedCharacter
.hi
, term
.inputPosition
+ 1))
443 input
.uncheckInput(backTrack
->matchAmount
);
450 bool matchCharacterClass(ByteTerm
& term
, DisjunctionContext
* context
)
452 ASSERT(term
.type
== ByteTerm::TypeCharacterClass
);
453 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ term
.frameLocation
);
455 switch (term
.atom
.quantityType
) {
456 case QuantifierFixedCount
: {
457 for (unsigned matchAmount
= 0; matchAmount
< term
.atom
.quantityCount
; ++matchAmount
) {
458 if (!checkCharacterClass(term
.atom
.characterClass
, term
.invert(), term
.inputPosition
- matchAmount
))
464 case QuantifierGreedy
: {
465 unsigned matchAmount
= 0;
466 while ((matchAmount
< term
.atom
.quantityCount
) && input
.checkInput(1)) {
467 if (!checkCharacterClass(term
.atom
.characterClass
, term
.invert(), term
.inputPosition
+ 1)) {
468 input
.uncheckInput(1);
473 backTrack
->matchAmount
= matchAmount
;
478 case QuantifierNonGreedy
:
479 backTrack
->matchAmount
= 0;
483 RELEASE_ASSERT_NOT_REACHED();
487 bool backtrackCharacterClass(ByteTerm
& term
, DisjunctionContext
* context
)
489 ASSERT(term
.type
== ByteTerm::TypeCharacterClass
);
490 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ term
.frameLocation
);
492 switch (term
.atom
.quantityType
) {
493 case QuantifierFixedCount
:
496 case QuantifierGreedy
:
497 if (backTrack
->matchAmount
) {
498 --backTrack
->matchAmount
;
499 input
.uncheckInput(1);
504 case QuantifierNonGreedy
:
505 if ((backTrack
->matchAmount
< term
.atom
.quantityCount
) && input
.checkInput(1)) {
506 ++backTrack
->matchAmount
;
507 if (checkCharacterClass(term
.atom
.characterClass
, term
.invert(), term
.inputPosition
+ 1))
510 input
.uncheckInput(backTrack
->matchAmount
);
517 bool matchBackReference(ByteTerm
& term
, DisjunctionContext
* context
)
519 ASSERT(term
.type
== ByteTerm::TypeBackReference
);
520 BackTrackInfoBackReference
* backTrack
= reinterpret_cast<BackTrackInfoBackReference
*>(context
->frame
+ term
.frameLocation
);
522 unsigned matchBegin
= output
[(term
.atom
.subpatternId
<< 1)];
523 unsigned matchEnd
= output
[(term
.atom
.subpatternId
<< 1) + 1];
525 // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
526 // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
528 if (matchEnd
== offsetNoMatch
)
531 if (matchBegin
== offsetNoMatch
)
534 ASSERT(matchBegin
<= matchEnd
);
536 if (matchBegin
== matchEnd
)
539 switch (term
.atom
.quantityType
) {
540 case QuantifierFixedCount
: {
541 backTrack
->begin
= input
.getPos();
542 for (unsigned matchAmount
= 0; matchAmount
< term
.atom
.quantityCount
; ++matchAmount
) {
543 if (!tryConsumeBackReference(matchBegin
, matchEnd
, term
.inputPosition
)) {
544 input
.setPos(backTrack
->begin
);
551 case QuantifierGreedy
: {
552 unsigned matchAmount
= 0;
553 while ((matchAmount
< term
.atom
.quantityCount
) && tryConsumeBackReference(matchBegin
, matchEnd
, term
.inputPosition
))
555 backTrack
->matchAmount
= matchAmount
;
559 case QuantifierNonGreedy
:
560 backTrack
->begin
= input
.getPos();
561 backTrack
->matchAmount
= 0;
565 RELEASE_ASSERT_NOT_REACHED();
569 bool backtrackBackReference(ByteTerm
& term
, DisjunctionContext
* context
)
571 ASSERT(term
.type
== ByteTerm::TypeBackReference
);
572 BackTrackInfoBackReference
* backTrack
= reinterpret_cast<BackTrackInfoBackReference
*>(context
->frame
+ term
.frameLocation
);
574 unsigned matchBegin
= output
[(term
.atom
.subpatternId
<< 1)];
575 unsigned matchEnd
= output
[(term
.atom
.subpatternId
<< 1) + 1];
577 if (matchBegin
== offsetNoMatch
)
580 ASSERT(matchBegin
<= matchEnd
);
582 if (matchBegin
== matchEnd
)
585 switch (term
.atom
.quantityType
) {
586 case QuantifierFixedCount
:
587 // for quantityCount == 1, could rewind.
588 input
.setPos(backTrack
->begin
);
591 case QuantifierGreedy
:
592 if (backTrack
->matchAmount
) {
593 --backTrack
->matchAmount
;
594 input
.rewind(matchEnd
- matchBegin
);
599 case QuantifierNonGreedy
:
600 if ((backTrack
->matchAmount
< term
.atom
.quantityCount
) && tryConsumeBackReference(matchBegin
, matchEnd
, term
.inputPosition
)) {
601 ++backTrack
->matchAmount
;
604 input
.setPos(backTrack
->begin
);
611 void recordParenthesesMatch(ByteTerm
& term
, ParenthesesDisjunctionContext
* context
)
613 if (term
.capture()) {
614 unsigned subpatternId
= term
.atom
.subpatternId
;
615 output
[(subpatternId
<< 1)] = context
->getDisjunctionContext(term
)->matchBegin
+ term
.inputPosition
;
616 output
[(subpatternId
<< 1) + 1] = context
->getDisjunctionContext(term
)->matchEnd
+ term
.inputPosition
;
619 void resetMatches(ByteTerm
& term
, ParenthesesDisjunctionContext
* context
)
621 unsigned firstSubpatternId
= term
.atom
.subpatternId
;
622 unsigned count
= term
.atom
.parenthesesDisjunction
->m_numSubpatterns
;
623 context
->restoreOutput(output
, firstSubpatternId
, count
);
625 JSRegExpResult
parenthesesDoBacktrack(ByteTerm
& term
, BackTrackInfoParentheses
* backTrack
)
627 while (backTrack
->matchAmount
) {
628 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
630 JSRegExpResult result
= matchDisjunction(term
.atom
.parenthesesDisjunction
, context
->getDisjunctionContext(term
), true);
631 if (result
== JSRegExpMatch
)
632 return JSRegExpMatch
;
634 resetMatches(term
, context
);
635 popParenthesesDisjunctionContext(backTrack
);
636 freeParenthesesDisjunctionContext(context
);
638 if (result
!= JSRegExpNoMatch
)
642 return JSRegExpNoMatch
;
645 bool matchParenthesesOnceBegin(ByteTerm
& term
, DisjunctionContext
* context
)
647 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
648 ASSERT(term
.atom
.quantityCount
== 1);
650 BackTrackInfoParenthesesOnce
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesOnce
*>(context
->frame
+ term
.frameLocation
);
652 switch (term
.atom
.quantityType
) {
653 case QuantifierGreedy
: {
654 // set this speculatively; if we get to the parens end this will be true.
655 backTrack
->begin
= input
.getPos();
658 case QuantifierNonGreedy
: {
659 backTrack
->begin
= notFound
;
660 context
->term
+= term
.atom
.parenthesesWidth
;
663 case QuantifierFixedCount
:
667 if (term
.capture()) {
668 unsigned subpatternId
= term
.atom
.subpatternId
;
669 output
[(subpatternId
<< 1)] = input
.getPos() - term
.inputPosition
;
675 bool matchParenthesesOnceEnd(ByteTerm
& term
, DisjunctionContext
* context
)
677 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternOnceEnd
);
678 ASSERT(term
.atom
.quantityCount
== 1);
680 if (term
.capture()) {
681 unsigned subpatternId
= term
.atom
.subpatternId
;
682 output
[(subpatternId
<< 1) + 1] = input
.getPos() + term
.inputPosition
;
685 if (term
.atom
.quantityType
== QuantifierFixedCount
)
688 BackTrackInfoParenthesesOnce
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesOnce
*>(context
->frame
+ term
.frameLocation
);
689 return backTrack
->begin
!= input
.getPos();
692 bool backtrackParenthesesOnceBegin(ByteTerm
& term
, DisjunctionContext
* context
)
694 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
695 ASSERT(term
.atom
.quantityCount
== 1);
697 BackTrackInfoParenthesesOnce
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesOnce
*>(context
->frame
+ term
.frameLocation
);
699 if (term
.capture()) {
700 unsigned subpatternId
= term
.atom
.subpatternId
;
701 output
[(subpatternId
<< 1)] = offsetNoMatch
;
702 output
[(subpatternId
<< 1) + 1] = offsetNoMatch
;
705 switch (term
.atom
.quantityType
) {
706 case QuantifierGreedy
:
707 // if we backtrack to this point, there is another chance - try matching nothing.
708 ASSERT(backTrack
->begin
!= notFound
);
709 backTrack
->begin
= notFound
;
710 context
->term
+= term
.atom
.parenthesesWidth
;
712 case QuantifierNonGreedy
:
713 ASSERT(backTrack
->begin
!= notFound
);
714 case QuantifierFixedCount
:
721 bool backtrackParenthesesOnceEnd(ByteTerm
& term
, DisjunctionContext
* context
)
723 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternOnceEnd
);
724 ASSERT(term
.atom
.quantityCount
== 1);
726 BackTrackInfoParenthesesOnce
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesOnce
*>(context
->frame
+ term
.frameLocation
);
728 switch (term
.atom
.quantityType
) {
729 case QuantifierGreedy
:
730 if (backTrack
->begin
== notFound
) {
731 context
->term
-= term
.atom
.parenthesesWidth
;
734 case QuantifierNonGreedy
:
735 if (backTrack
->begin
== notFound
) {
736 backTrack
->begin
= input
.getPos();
737 if (term
.capture()) {
738 // Technically this access to inputPosition should be accessing the begin term's
739 // inputPosition, but for repeats other than fixed these values should be
740 // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
741 ASSERT((&term
- term
.atom
.parenthesesWidth
)->type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
742 ASSERT((&term
- term
.atom
.parenthesesWidth
)->inputPosition
== term
.inputPosition
);
743 unsigned subpatternId
= term
.atom
.subpatternId
;
744 output
[subpatternId
<< 1] = input
.getPos() + term
.inputPosition
;
746 context
->term
-= term
.atom
.parenthesesWidth
;
749 case QuantifierFixedCount
:
756 bool matchParenthesesTerminalBegin(ByteTerm
& term
, DisjunctionContext
* context
)
758 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternTerminalBegin
);
759 ASSERT(term
.atom
.quantityType
== QuantifierGreedy
);
760 ASSERT(term
.atom
.quantityCount
== quantifyInfinite
);
761 ASSERT(!term
.capture());
763 BackTrackInfoParenthesesTerminal
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesTerminal
*>(context
->frame
+ term
.frameLocation
);
764 backTrack
->begin
= input
.getPos();
768 bool matchParenthesesTerminalEnd(ByteTerm
& term
, DisjunctionContext
* context
)
770 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternTerminalEnd
);
772 BackTrackInfoParenthesesTerminal
* backTrack
= reinterpret_cast<BackTrackInfoParenthesesTerminal
*>(context
->frame
+ term
.frameLocation
);
773 // Empty match is a failed match.
774 if (backTrack
->begin
== input
.getPos())
777 // Successful match! Okay, what's next? - loop around and try to match moar!
778 context
->term
-= (term
.atom
.parenthesesWidth
+ 1);
782 bool backtrackParenthesesTerminalBegin(ByteTerm
& term
, DisjunctionContext
* context
)
784 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpatternTerminalBegin
);
785 ASSERT(term
.atom
.quantityType
== QuantifierGreedy
);
786 ASSERT(term
.atom
.quantityCount
== quantifyInfinite
);
787 ASSERT(!term
.capture());
789 // If we backtrack to this point, we have failed to match this iteration of the parens.
790 // Since this is greedy / zero minimum a failed is also accepted as a match!
791 context
->term
+= term
.atom
.parenthesesWidth
;
795 bool backtrackParenthesesTerminalEnd(ByteTerm
&, DisjunctionContext
*)
797 // 'Terminal' parentheses are at the end of the regex, and as such a match past end
798 // should always be returned as a successful match - we should never backtrack to here.
799 RELEASE_ASSERT_NOT_REACHED();
803 bool matchParentheticalAssertionBegin(ByteTerm
& term
, DisjunctionContext
* context
)
805 ASSERT(term
.type
== ByteTerm::TypeParentheticalAssertionBegin
);
806 ASSERT(term
.atom
.quantityCount
== 1);
808 BackTrackInfoParentheticalAssertion
* backTrack
= reinterpret_cast<BackTrackInfoParentheticalAssertion
*>(context
->frame
+ term
.frameLocation
);
810 backTrack
->begin
= input
.getPos();
814 bool matchParentheticalAssertionEnd(ByteTerm
& term
, DisjunctionContext
* context
)
816 ASSERT(term
.type
== ByteTerm::TypeParentheticalAssertionEnd
);
817 ASSERT(term
.atom
.quantityCount
== 1);
819 BackTrackInfoParentheticalAssertion
* backTrack
= reinterpret_cast<BackTrackInfoParentheticalAssertion
*>(context
->frame
+ term
.frameLocation
);
821 input
.setPos(backTrack
->begin
);
823 // We've reached the end of the parens; if they are inverted, this is failure.
825 context
->term
-= term
.atom
.parenthesesWidth
;
832 bool backtrackParentheticalAssertionBegin(ByteTerm
& term
, DisjunctionContext
* context
)
834 ASSERT(term
.type
== ByteTerm::TypeParentheticalAssertionBegin
);
835 ASSERT(term
.atom
.quantityCount
== 1);
837 // We've failed to match parens; if they are inverted, this is win!
839 context
->term
+= term
.atom
.parenthesesWidth
;
846 bool backtrackParentheticalAssertionEnd(ByteTerm
& term
, DisjunctionContext
* context
)
848 ASSERT(term
.type
== ByteTerm::TypeParentheticalAssertionEnd
);
849 ASSERT(term
.atom
.quantityCount
== 1);
851 BackTrackInfoParentheticalAssertion
* backTrack
= reinterpret_cast<BackTrackInfoParentheticalAssertion
*>(context
->frame
+ term
.frameLocation
);
853 input
.setPos(backTrack
->begin
);
855 context
->term
-= term
.atom
.parenthesesWidth
;
859 JSRegExpResult
matchParentheses(ByteTerm
& term
, DisjunctionContext
* context
)
861 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpattern
);
863 BackTrackInfoParentheses
* backTrack
= reinterpret_cast<BackTrackInfoParentheses
*>(context
->frame
+ term
.frameLocation
);
864 ByteDisjunction
* disjunctionBody
= term
.atom
.parenthesesDisjunction
;
866 backTrack
->matchAmount
= 0;
867 backTrack
->lastContext
= 0;
869 switch (term
.atom
.quantityType
) {
870 case QuantifierFixedCount
: {
871 // While we haven't yet reached our fixed limit,
872 while (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
873 // Try to do a match, and it it succeeds, add it to the list.
874 ParenthesesDisjunctionContext
* context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
875 JSRegExpResult result
= matchDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
876 if (result
== JSRegExpMatch
)
877 appendParenthesesDisjunctionContext(backTrack
, context
);
879 // The match failed; try to find an alternate point to carry on from.
880 resetMatches(term
, context
);
881 freeParenthesesDisjunctionContext(context
);
883 if (result
!= JSRegExpNoMatch
)
885 JSRegExpResult backtrackResult
= parenthesesDoBacktrack(term
, backTrack
);
886 if (backtrackResult
!= JSRegExpMatch
)
887 return backtrackResult
;
891 ASSERT(backTrack
->matchAmount
== term
.atom
.quantityCount
);
892 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
893 recordParenthesesMatch(term
, context
);
894 return JSRegExpMatch
;
897 case QuantifierGreedy
: {
898 while (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
899 ParenthesesDisjunctionContext
* context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
900 JSRegExpResult result
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
901 if (result
== JSRegExpMatch
)
902 appendParenthesesDisjunctionContext(backTrack
, context
);
904 resetMatches(term
, context
);
905 freeParenthesesDisjunctionContext(context
);
907 if (result
!= JSRegExpNoMatch
)
914 if (backTrack
->matchAmount
) {
915 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
916 recordParenthesesMatch(term
, context
);
918 return JSRegExpMatch
;
921 case QuantifierNonGreedy
:
922 return JSRegExpMatch
;
925 RELEASE_ASSERT_NOT_REACHED();
926 return JSRegExpErrorNoMatch
;
929 // Rules for backtracking differ depending on whether this is greedy or non-greedy.
931 // Greedy matches never should try just adding more - you should already have done
932 // the 'more' cases. Always backtrack, at least a leetle bit. However cases where
933 // you backtrack an item off the list needs checking, since we'll never have matched
934 // the one less case. Tracking forwards, still add as much as possible.
936 // Non-greedy, we've already done the one less case, so don't match on popping.
937 // We haven't done the one more case, so always try to add that.
939 JSRegExpResult
backtrackParentheses(ByteTerm
& term
, DisjunctionContext
* context
)
941 ASSERT(term
.type
== ByteTerm::TypeParenthesesSubpattern
);
943 BackTrackInfoParentheses
* backTrack
= reinterpret_cast<BackTrackInfoParentheses
*>(context
->frame
+ term
.frameLocation
);
944 ByteDisjunction
* disjunctionBody
= term
.atom
.parenthesesDisjunction
;
946 switch (term
.atom
.quantityType
) {
947 case QuantifierFixedCount
: {
948 ASSERT(backTrack
->matchAmount
== term
.atom
.quantityCount
);
950 ParenthesesDisjunctionContext
* context
= 0;
951 JSRegExpResult result
= parenthesesDoBacktrack(term
, backTrack
);
953 if (result
!= JSRegExpMatch
)
956 // While we haven't yet reached our fixed limit,
957 while (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
958 // Try to do a match, and it it succeeds, add it to the list.
959 context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
960 result
= matchDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
962 if (result
== JSRegExpMatch
)
963 appendParenthesesDisjunctionContext(backTrack
, context
);
965 // The match failed; try to find an alternate point to carry on from.
966 resetMatches(term
, context
);
967 freeParenthesesDisjunctionContext(context
);
969 if (result
!= JSRegExpNoMatch
)
971 JSRegExpResult backtrackResult
= parenthesesDoBacktrack(term
, backTrack
);
972 if (backtrackResult
!= JSRegExpMatch
)
973 return backtrackResult
;
977 ASSERT(backTrack
->matchAmount
== term
.atom
.quantityCount
);
978 context
= backTrack
->lastContext
;
979 recordParenthesesMatch(term
, context
);
980 return JSRegExpMatch
;
983 case QuantifierGreedy
: {
984 if (!backTrack
->matchAmount
)
985 return JSRegExpNoMatch
;
987 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
988 JSRegExpResult result
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
), true);
989 if (result
== JSRegExpMatch
) {
990 while (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
991 ParenthesesDisjunctionContext
* context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
992 JSRegExpResult parenthesesResult
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
993 if (parenthesesResult
== JSRegExpMatch
)
994 appendParenthesesDisjunctionContext(backTrack
, context
);
996 resetMatches(term
, context
);
997 freeParenthesesDisjunctionContext(context
);
999 if (parenthesesResult
!= JSRegExpNoMatch
)
1000 return parenthesesResult
;
1006 resetMatches(term
, context
);
1007 popParenthesesDisjunctionContext(backTrack
);
1008 freeParenthesesDisjunctionContext(context
);
1010 if (result
!= JSRegExpNoMatch
)
1014 if (backTrack
->matchAmount
) {
1015 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
1016 recordParenthesesMatch(term
, context
);
1018 return JSRegExpMatch
;
1021 case QuantifierNonGreedy
: {
1022 // If we've not reached the limit, try to add one more match.
1023 if (backTrack
->matchAmount
< term
.atom
.quantityCount
) {
1024 ParenthesesDisjunctionContext
* context
= allocParenthesesDisjunctionContext(disjunctionBody
, output
, term
);
1025 JSRegExpResult result
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
));
1026 if (result
== JSRegExpMatch
) {
1027 appendParenthesesDisjunctionContext(backTrack
, context
);
1028 recordParenthesesMatch(term
, context
);
1029 return JSRegExpMatch
;
1032 resetMatches(term
, context
);
1033 freeParenthesesDisjunctionContext(context
);
1035 if (result
!= JSRegExpNoMatch
)
1039 // Nope - okay backtrack looking for an alternative.
1040 while (backTrack
->matchAmount
) {
1041 ParenthesesDisjunctionContext
* context
= backTrack
->lastContext
;
1042 JSRegExpResult result
= matchNonZeroDisjunction(disjunctionBody
, context
->getDisjunctionContext(term
), true);
1043 if (result
== JSRegExpMatch
) {
1044 // successful backtrack! we're back in the game!
1045 if (backTrack
->matchAmount
) {
1046 context
= backTrack
->lastContext
;
1047 recordParenthesesMatch(term
, context
);
1049 return JSRegExpMatch
;
1052 // pop a match off the stack
1053 resetMatches(term
, context
);
1054 popParenthesesDisjunctionContext(backTrack
);
1055 freeParenthesesDisjunctionContext(context
);
1057 if (result
!= JSRegExpNoMatch
)
1061 return JSRegExpNoMatch
;
1065 RELEASE_ASSERT_NOT_REACHED();
1066 return JSRegExpErrorNoMatch
;
1069 bool matchDotStarEnclosure(ByteTerm
& term
, DisjunctionContext
* context
)
1072 unsigned matchBegin
= context
->matchBegin
;
1075 for (matchBegin
--; true; matchBegin
--) {
1076 if (testCharacterClass(pattern
->newlineCharacterClass
, input
.reread(matchBegin
))) {
1086 unsigned matchEnd
= input
.getPos();
1088 for (; (matchEnd
!= input
.end())
1089 && (!testCharacterClass(pattern
->newlineCharacterClass
, input
.reread(matchEnd
))); matchEnd
++) { }
1091 if (((matchBegin
&& term
.anchors
.m_bol
)
1092 || ((matchEnd
!= input
.end()) && term
.anchors
.m_eol
))
1093 && !pattern
->m_multiline
)
1096 context
->matchBegin
= matchBegin
;
1097 context
->matchEnd
= matchEnd
;
1101 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
1102 #define BACKTRACK() { --context->term; goto backtrack; }
1103 #define currentTerm() (disjunction->terms[context->term])
1104 JSRegExpResult
matchDisjunction(ByteDisjunction
* disjunction
, DisjunctionContext
* context
, bool btrack
= false)
1106 if (!--remainingMatchCount
)
1107 return JSRegExpErrorHitLimit
;
1112 context
->matchBegin
= input
.getPos();
1116 ASSERT(context
->term
< static_cast<int>(disjunction
->terms
.size()));
1118 switch (currentTerm().type
) {
1119 case ByteTerm::TypeSubpatternBegin
:
1121 case ByteTerm::TypeSubpatternEnd
:
1122 context
->matchEnd
= input
.getPos();
1123 return JSRegExpMatch
;
1125 case ByteTerm::TypeBodyAlternativeBegin
:
1127 case ByteTerm::TypeBodyAlternativeDisjunction
:
1128 case ByteTerm::TypeBodyAlternativeEnd
:
1129 context
->matchEnd
= input
.getPos();
1130 return JSRegExpMatch
;
1132 case ByteTerm::TypeAlternativeBegin
:
1134 case ByteTerm::TypeAlternativeDisjunction
:
1135 case ByteTerm::TypeAlternativeEnd
: {
1136 int offset
= currentTerm().alternative
.end
;
1137 BackTrackInfoAlternative
* backTrack
= reinterpret_cast<BackTrackInfoAlternative
*>(context
->frame
+ currentTerm().frameLocation
);
1138 backTrack
->offset
= offset
;
1139 context
->term
+= offset
;
1143 case ByteTerm::TypeAssertionBOL
:
1144 if (matchAssertionBOL(currentTerm()))
1147 case ByteTerm::TypeAssertionEOL
:
1148 if (matchAssertionEOL(currentTerm()))
1151 case ByteTerm::TypeAssertionWordBoundary
:
1152 if (matchAssertionWordBoundary(currentTerm()))
1156 case ByteTerm::TypePatternCharacterOnce
:
1157 case ByteTerm::TypePatternCharacterFixed
: {
1158 for (unsigned matchAmount
= 0; matchAmount
< currentTerm().atom
.quantityCount
; ++matchAmount
) {
1159 if (!checkCharacter(currentTerm().atom
.patternCharacter
, currentTerm().inputPosition
- matchAmount
))
1164 case ByteTerm::TypePatternCharacterGreedy
: {
1165 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ currentTerm().frameLocation
);
1166 unsigned matchAmount
= 0;
1167 while ((matchAmount
< currentTerm().atom
.quantityCount
) && input
.checkInput(1)) {
1168 if (!checkCharacter(currentTerm().atom
.patternCharacter
, currentTerm().inputPosition
+ 1)) {
1169 input
.uncheckInput(1);
1174 backTrack
->matchAmount
= matchAmount
;
1178 case ByteTerm::TypePatternCharacterNonGreedy
: {
1179 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ currentTerm().frameLocation
);
1180 backTrack
->matchAmount
= 0;
1184 case ByteTerm::TypePatternCasedCharacterOnce
:
1185 case ByteTerm::TypePatternCasedCharacterFixed
: {
1186 for (unsigned matchAmount
= 0; matchAmount
< currentTerm().atom
.quantityCount
; ++matchAmount
) {
1187 if (!checkCasedCharacter(currentTerm().atom
.casedCharacter
.lo
, currentTerm().atom
.casedCharacter
.hi
, currentTerm().inputPosition
- matchAmount
))
1192 case ByteTerm::TypePatternCasedCharacterGreedy
: {
1193 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ currentTerm().frameLocation
);
1194 unsigned matchAmount
= 0;
1195 while ((matchAmount
< currentTerm().atom
.quantityCount
) && input
.checkInput(1)) {
1196 if (!checkCasedCharacter(currentTerm().atom
.casedCharacter
.lo
, currentTerm().atom
.casedCharacter
.hi
, currentTerm().inputPosition
+ 1)) {
1197 input
.uncheckInput(1);
1202 backTrack
->matchAmount
= matchAmount
;
1206 case ByteTerm::TypePatternCasedCharacterNonGreedy
: {
1207 BackTrackInfoPatternCharacter
* backTrack
= reinterpret_cast<BackTrackInfoPatternCharacter
*>(context
->frame
+ currentTerm().frameLocation
);
1208 backTrack
->matchAmount
= 0;
1212 case ByteTerm::TypeCharacterClass
:
1213 if (matchCharacterClass(currentTerm(), context
))
1216 case ByteTerm::TypeBackReference
:
1217 if (matchBackReference(currentTerm(), context
))
1220 case ByteTerm::TypeParenthesesSubpattern
: {
1221 JSRegExpResult result
= matchParentheses(currentTerm(), context
);
1223 if (result
== JSRegExpMatch
) {
1225 } else if (result
!= JSRegExpNoMatch
)
1230 case ByteTerm::TypeParenthesesSubpatternOnceBegin
:
1231 if (matchParenthesesOnceBegin(currentTerm(), context
))
1234 case ByteTerm::TypeParenthesesSubpatternOnceEnd
:
1235 if (matchParenthesesOnceEnd(currentTerm(), context
))
1238 case ByteTerm::TypeParenthesesSubpatternTerminalBegin
:
1239 if (matchParenthesesTerminalBegin(currentTerm(), context
))
1242 case ByteTerm::TypeParenthesesSubpatternTerminalEnd
:
1243 if (matchParenthesesTerminalEnd(currentTerm(), context
))
1246 case ByteTerm::TypeParentheticalAssertionBegin
:
1247 if (matchParentheticalAssertionBegin(currentTerm(), context
))
1250 case ByteTerm::TypeParentheticalAssertionEnd
:
1251 if (matchParentheticalAssertionEnd(currentTerm(), context
))
1255 case ByteTerm::TypeCheckInput
:
1256 if (input
.checkInput(currentTerm().checkInputCount
))
1260 case ByteTerm::TypeUncheckInput
:
1261 input
.uncheckInput(currentTerm().checkInputCount
);
1264 case ByteTerm::TypeDotStarEnclosure
:
1265 if (matchDotStarEnclosure(currentTerm(), context
))
1266 return JSRegExpMatch
;
1270 // We should never fall-through to here.
1271 RELEASE_ASSERT_NOT_REACHED();
1274 ASSERT(context
->term
< static_cast<int>(disjunction
->terms
.size()));
1276 switch (currentTerm().type
) {
1277 case ByteTerm::TypeSubpatternBegin
:
1278 return JSRegExpNoMatch
;
1279 case ByteTerm::TypeSubpatternEnd
:
1280 RELEASE_ASSERT_NOT_REACHED();
1282 case ByteTerm::TypeBodyAlternativeBegin
:
1283 case ByteTerm::TypeBodyAlternativeDisjunction
: {
1284 int offset
= currentTerm().alternative
.next
;
1285 context
->term
+= offset
;
1290 return JSRegExpNoMatch
;
1294 context
->matchBegin
= input
.getPos();
1296 if (currentTerm().alternative
.onceThrough
)
1297 context
->term
+= currentTerm().alternative
.next
;
1301 case ByteTerm::TypeBodyAlternativeEnd
:
1302 RELEASE_ASSERT_NOT_REACHED();
1304 case ByteTerm::TypeAlternativeBegin
:
1305 case ByteTerm::TypeAlternativeDisjunction
: {
1306 int offset
= currentTerm().alternative
.next
;
1307 context
->term
+= offset
;
1312 case ByteTerm::TypeAlternativeEnd
: {
1313 // We should never backtrack back into an alternative of the main body of the regex.
1314 BackTrackInfoAlternative
* backTrack
= reinterpret_cast<BackTrackInfoAlternative
*>(context
->frame
+ currentTerm().frameLocation
);
1315 unsigned offset
= backTrack
->offset
;
1316 context
->term
-= offset
;
1320 case ByteTerm::TypeAssertionBOL
:
1321 case ByteTerm::TypeAssertionEOL
:
1322 case ByteTerm::TypeAssertionWordBoundary
:
1325 case ByteTerm::TypePatternCharacterOnce
:
1326 case ByteTerm::TypePatternCharacterFixed
:
1327 case ByteTerm::TypePatternCharacterGreedy
:
1328 case ByteTerm::TypePatternCharacterNonGreedy
:
1329 if (backtrackPatternCharacter(currentTerm(), context
))
1332 case ByteTerm::TypePatternCasedCharacterOnce
:
1333 case ByteTerm::TypePatternCasedCharacterFixed
:
1334 case ByteTerm::TypePatternCasedCharacterGreedy
:
1335 case ByteTerm::TypePatternCasedCharacterNonGreedy
:
1336 if (backtrackPatternCasedCharacter(currentTerm(), context
))
1339 case ByteTerm::TypeCharacterClass
:
1340 if (backtrackCharacterClass(currentTerm(), context
))
1343 case ByteTerm::TypeBackReference
:
1344 if (backtrackBackReference(currentTerm(), context
))
1347 case ByteTerm::TypeParenthesesSubpattern
: {
1348 JSRegExpResult result
= backtrackParentheses(currentTerm(), context
);
1350 if (result
== JSRegExpMatch
) {
1352 } else if (result
!= JSRegExpNoMatch
)
1357 case ByteTerm::TypeParenthesesSubpatternOnceBegin
:
1358 if (backtrackParenthesesOnceBegin(currentTerm(), context
))
1361 case ByteTerm::TypeParenthesesSubpatternOnceEnd
:
1362 if (backtrackParenthesesOnceEnd(currentTerm(), context
))
1365 case ByteTerm::TypeParenthesesSubpatternTerminalBegin
:
1366 if (backtrackParenthesesTerminalBegin(currentTerm(), context
))
1369 case ByteTerm::TypeParenthesesSubpatternTerminalEnd
:
1370 if (backtrackParenthesesTerminalEnd(currentTerm(), context
))
1373 case ByteTerm::TypeParentheticalAssertionBegin
:
1374 if (backtrackParentheticalAssertionBegin(currentTerm(), context
))
1377 case ByteTerm::TypeParentheticalAssertionEnd
:
1378 if (backtrackParentheticalAssertionEnd(currentTerm(), context
))
1382 case ByteTerm::TypeCheckInput
:
1383 input
.uncheckInput(currentTerm().checkInputCount
);
1386 case ByteTerm::TypeUncheckInput
:
1387 input
.checkInput(currentTerm().checkInputCount
);
1390 case ByteTerm::TypeDotStarEnclosure
:
1391 RELEASE_ASSERT_NOT_REACHED();
1394 RELEASE_ASSERT_NOT_REACHED();
1395 return JSRegExpErrorNoMatch
;
1398 JSRegExpResult
matchNonZeroDisjunction(ByteDisjunction
* disjunction
, DisjunctionContext
* context
, bool btrack
= false)
1400 JSRegExpResult result
= matchDisjunction(disjunction
, context
, btrack
);
1402 if (result
== JSRegExpMatch
) {
1403 while (context
->matchBegin
== context
->matchEnd
) {
1404 result
= matchDisjunction(disjunction
, context
, true);
1405 if (result
!= JSRegExpMatch
)
1408 return JSRegExpMatch
;
1414 unsigned interpret()
1416 if (!input
.isAvailableInput(0))
1417 return offsetNoMatch
;
1419 for (unsigned i
= 0; i
< pattern
->m_body
->m_numSubpatterns
+ 1; ++i
)
1420 output
[i
<< 1] = offsetNoMatch
;
1422 allocatorPool
= pattern
->m_allocator
->startAllocator();
1423 RELEASE_ASSERT(allocatorPool
);
1425 DisjunctionContext
* context
= allocDisjunctionContext(pattern
->m_body
.get());
1427 JSRegExpResult result
= matchDisjunction(pattern
->m_body
.get(), context
, false);
1428 if (result
== JSRegExpMatch
) {
1429 output
[0] = context
->matchBegin
;
1430 output
[1] = context
->matchEnd
;
1433 freeDisjunctionContext(context
);
1435 pattern
->m_allocator
->stopAllocator();
1437 ASSERT((result
== JSRegExpMatch
) == (output
[0] != offsetNoMatch
));
1441 Interpreter(BytecodePattern
* pattern
, unsigned* output
, const CharType
* input
, unsigned length
, unsigned start
)
1444 , input(input
, start
, length
)
1446 , remainingMatchCount(matchLimit
)
1451 BytecodePattern
* pattern
;
1454 BumpPointerPool
* allocatorPool
;
1455 unsigned remainingMatchCount
;
1458 class ByteCompiler
{
1459 struct ParenthesesStackEntry
{
1461 unsigned savedAlternativeIndex
;
1462 ParenthesesStackEntry(unsigned beginTerm
, unsigned savedAlternativeIndex
/*, unsigned subpatternId, bool capture = false*/)
1463 : beginTerm(beginTerm
)
1464 , savedAlternativeIndex(savedAlternativeIndex
)
1470 ByteCompiler(YarrPattern
& pattern
)
1471 : m_pattern(pattern
)
1473 m_currentAlternativeIndex
= 0;
1476 PassOwnPtr
<BytecodePattern
> compile(BumpPointerAllocator
* allocator
)
1478 regexBegin(m_pattern
.m_numSubpatterns
, m_pattern
.m_body
->m_callFrameSize
, m_pattern
.m_body
->m_alternatives
[0]->onceThrough());
1479 emitDisjunction(m_pattern
.m_body
);
1482 return adoptPtr(new BytecodePattern(m_bodyDisjunction
.release(), m_allParenthesesInfo
, m_pattern
, allocator
));
1485 void checkInput(unsigned count
)
1487 m_bodyDisjunction
->terms
.append(ByteTerm::CheckInput(count
));
1490 void uncheckInput(unsigned count
)
1492 m_bodyDisjunction
->terms
.append(ByteTerm::UncheckInput(count
));
1495 void assertionBOL(unsigned inputPosition
)
1497 m_bodyDisjunction
->terms
.append(ByteTerm::BOL(inputPosition
));
1500 void assertionEOL(unsigned inputPosition
)
1502 m_bodyDisjunction
->terms
.append(ByteTerm::EOL(inputPosition
));
1505 void assertionWordBoundary(bool invert
, unsigned inputPosition
)
1507 m_bodyDisjunction
->terms
.append(ByteTerm::WordBoundary(invert
, inputPosition
));
1510 void atomPatternCharacter(UChar ch
, unsigned inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1512 if (m_pattern
.m_ignoreCase
) {
1513 UChar lo
= Unicode::toLower(ch
);
1514 UChar hi
= Unicode::toUpper(ch
);
1517 m_bodyDisjunction
->terms
.append(ByteTerm(lo
, hi
, inputPosition
, frameLocation
, quantityCount
, quantityType
));
1522 m_bodyDisjunction
->terms
.append(ByteTerm(ch
, inputPosition
, frameLocation
, quantityCount
, quantityType
));
1525 void atomCharacterClass(CharacterClass
* characterClass
, bool invert
, unsigned inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1527 m_bodyDisjunction
->terms
.append(ByteTerm(characterClass
, invert
, inputPosition
));
1529 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].atom
.quantityCount
= quantityCount
.unsafeGet();
1530 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].atom
.quantityType
= quantityType
;
1531 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1534 void atomBackReference(unsigned subpatternId
, unsigned inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1536 ASSERT(subpatternId
);
1538 m_bodyDisjunction
->terms
.append(ByteTerm::BackReference(subpatternId
, inputPosition
));
1540 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].atom
.quantityCount
= quantityCount
.unsafeGet();
1541 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].atom
.quantityType
= quantityType
;
1542 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1545 void atomParenthesesOnceBegin(unsigned subpatternId
, bool capture
, unsigned inputPosition
, unsigned frameLocation
, unsigned alternativeFrameLocation
)
1547 int beginTerm
= m_bodyDisjunction
->terms
.size();
1549 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin
, subpatternId
, capture
, false, inputPosition
));
1550 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1551 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeBegin());
1552 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= alternativeFrameLocation
;
1554 m_parenthesesStack
.append(ParenthesesStackEntry(beginTerm
, m_currentAlternativeIndex
));
1555 m_currentAlternativeIndex
= beginTerm
+ 1;
1558 void atomParenthesesTerminalBegin(unsigned subpatternId
, bool capture
, unsigned inputPosition
, unsigned frameLocation
, unsigned alternativeFrameLocation
)
1560 int beginTerm
= m_bodyDisjunction
->terms
.size();
1562 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin
, subpatternId
, capture
, false, inputPosition
));
1563 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1564 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeBegin());
1565 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= alternativeFrameLocation
;
1567 m_parenthesesStack
.append(ParenthesesStackEntry(beginTerm
, m_currentAlternativeIndex
));
1568 m_currentAlternativeIndex
= beginTerm
+ 1;
1571 void atomParenthesesSubpatternBegin(unsigned subpatternId
, bool capture
, unsigned inputPosition
, unsigned frameLocation
, unsigned alternativeFrameLocation
)
1573 // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1574 // then fix this up at the end! - simplifying this should make it much clearer.
1575 // https://bugs.webkit.org/show_bug.cgi?id=50136
1577 int beginTerm
= m_bodyDisjunction
->terms
.size();
1579 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin
, subpatternId
, capture
, false, inputPosition
));
1580 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1581 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeBegin());
1582 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= alternativeFrameLocation
;
1584 m_parenthesesStack
.append(ParenthesesStackEntry(beginTerm
, m_currentAlternativeIndex
));
1585 m_currentAlternativeIndex
= beginTerm
+ 1;
1588 void atomParentheticalAssertionBegin(unsigned subpatternId
, bool invert
, unsigned frameLocation
, unsigned alternativeFrameLocation
)
1590 int beginTerm
= m_bodyDisjunction
->terms
.size();
1592 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin
, subpatternId
, false, invert
, 0));
1593 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= frameLocation
;
1594 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeBegin());
1595 m_bodyDisjunction
->terms
[m_bodyDisjunction
->terms
.size() - 1].frameLocation
= alternativeFrameLocation
;
1597 m_parenthesesStack
.append(ParenthesesStackEntry(beginTerm
, m_currentAlternativeIndex
));
1598 m_currentAlternativeIndex
= beginTerm
+ 1;
1601 void atomParentheticalAssertionEnd(unsigned inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1603 unsigned beginTerm
= popParenthesesStack();
1604 closeAlternative(beginTerm
+ 1);
1605 unsigned endTerm
= m_bodyDisjunction
->terms
.size();
1607 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeParentheticalAssertionBegin
);
1609 bool invert
= m_bodyDisjunction
->terms
[beginTerm
].invert();
1610 unsigned subpatternId
= m_bodyDisjunction
->terms
[beginTerm
].atom
.subpatternId
;
1612 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd
, subpatternId
, false, invert
, inputPosition
));
1613 m_bodyDisjunction
->terms
[beginTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1614 m_bodyDisjunction
->terms
[endTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1615 m_bodyDisjunction
->terms
[endTerm
].frameLocation
= frameLocation
;
1617 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1618 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityType
= quantityType
;
1619 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1620 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityType
= quantityType
;
1623 void assertionDotStarEnclosure(bool bolAnchored
, bool eolAnchored
)
1625 m_bodyDisjunction
->terms
.append(ByteTerm::DotStarEnclosure(bolAnchored
, eolAnchored
));
1628 unsigned popParenthesesStack()
1630 ASSERT(m_parenthesesStack
.size());
1631 int stackEnd
= m_parenthesesStack
.size() - 1;
1632 unsigned beginTerm
= m_parenthesesStack
[stackEnd
].beginTerm
;
1633 m_currentAlternativeIndex
= m_parenthesesStack
[stackEnd
].savedAlternativeIndex
;
1634 m_parenthesesStack
.shrink(stackEnd
);
1636 ASSERT(beginTerm
< m_bodyDisjunction
->terms
.size());
1637 ASSERT(m_currentAlternativeIndex
< m_bodyDisjunction
->terms
.size());
1643 void dumpDisjunction(ByteDisjunction
* disjunction
)
1645 dataLogF("ByteDisjunction(%p):\n\t", disjunction
);
1646 for (unsigned i
= 0; i
< disjunction
->terms
.size(); ++i
)
1647 dataLogF("{ %d } ", disjunction
->terms
[i
].type
);
1652 void closeAlternative(int beginTerm
)
1654 int origBeginTerm
= beginTerm
;
1655 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeAlternativeBegin
);
1656 int endIndex
= m_bodyDisjunction
->terms
.size();
1658 unsigned frameLocation
= m_bodyDisjunction
->terms
[beginTerm
].frameLocation
;
1660 if (!m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
)
1661 m_bodyDisjunction
->terms
.remove(beginTerm
);
1663 while (m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
) {
1664 beginTerm
+= m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
;
1665 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeAlternativeDisjunction
);
1666 m_bodyDisjunction
->terms
[beginTerm
].alternative
.end
= endIndex
- beginTerm
;
1667 m_bodyDisjunction
->terms
[beginTerm
].frameLocation
= frameLocation
;
1670 m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
= origBeginTerm
- beginTerm
;
1672 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeEnd());
1673 m_bodyDisjunction
->terms
[endIndex
].frameLocation
= frameLocation
;
1677 void closeBodyAlternative()
1680 int origBeginTerm
= 0;
1681 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeBodyAlternativeBegin
);
1682 int endIndex
= m_bodyDisjunction
->terms
.size();
1684 unsigned frameLocation
= m_bodyDisjunction
->terms
[beginTerm
].frameLocation
;
1686 while (m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
) {
1687 beginTerm
+= m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
;
1688 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeBodyAlternativeDisjunction
);
1689 m_bodyDisjunction
->terms
[beginTerm
].alternative
.end
= endIndex
- beginTerm
;
1690 m_bodyDisjunction
->terms
[beginTerm
].frameLocation
= frameLocation
;
1693 m_bodyDisjunction
->terms
[beginTerm
].alternative
.next
= origBeginTerm
- beginTerm
;
1695 m_bodyDisjunction
->terms
.append(ByteTerm::BodyAlternativeEnd());
1696 m_bodyDisjunction
->terms
[endIndex
].frameLocation
= frameLocation
;
1699 void atomParenthesesSubpatternEnd(unsigned lastSubpatternId
, int inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
, unsigned callFrameSize
= 0)
1701 unsigned beginTerm
= popParenthesesStack();
1702 closeAlternative(beginTerm
+ 1);
1703 unsigned endTerm
= m_bodyDisjunction
->terms
.size();
1705 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
1707 ByteTerm
& parenthesesBegin
= m_bodyDisjunction
->terms
[beginTerm
];
1709 bool capture
= parenthesesBegin
.capture();
1710 unsigned subpatternId
= parenthesesBegin
.atom
.subpatternId
;
1712 unsigned numSubpatterns
= lastSubpatternId
- subpatternId
+ 1;
1713 OwnPtr
<ByteDisjunction
> parenthesesDisjunction
= adoptPtr(new ByteDisjunction(numSubpatterns
, callFrameSize
));
1715 unsigned firstTermInParentheses
= beginTerm
+ 1;
1716 parenthesesDisjunction
->terms
.reserveInitialCapacity(endTerm
- firstTermInParentheses
+ 2);
1718 parenthesesDisjunction
->terms
.append(ByteTerm::SubpatternBegin());
1719 for (unsigned termInParentheses
= firstTermInParentheses
; termInParentheses
< endTerm
; ++termInParentheses
)
1720 parenthesesDisjunction
->terms
.append(m_bodyDisjunction
->terms
[termInParentheses
]);
1721 parenthesesDisjunction
->terms
.append(ByteTerm::SubpatternEnd());
1723 m_bodyDisjunction
->terms
.shrink(beginTerm
);
1725 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern
, subpatternId
, parenthesesDisjunction
.get(), capture
, inputPosition
));
1726 m_allParenthesesInfo
.append(parenthesesDisjunction
.release());
1728 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1729 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityType
= quantityType
;
1730 m_bodyDisjunction
->terms
[beginTerm
].frameLocation
= frameLocation
;
1733 void atomParenthesesOnceEnd(int inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1735 unsigned beginTerm
= popParenthesesStack();
1736 closeAlternative(beginTerm
+ 1);
1737 unsigned endTerm
= m_bodyDisjunction
->terms
.size();
1739 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeParenthesesSubpatternOnceBegin
);
1741 bool capture
= m_bodyDisjunction
->terms
[beginTerm
].capture();
1742 unsigned subpatternId
= m_bodyDisjunction
->terms
[beginTerm
].atom
.subpatternId
;
1744 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd
, subpatternId
, capture
, false, inputPosition
));
1745 m_bodyDisjunction
->terms
[beginTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1746 m_bodyDisjunction
->terms
[endTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1747 m_bodyDisjunction
->terms
[endTerm
].frameLocation
= frameLocation
;
1749 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1750 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityType
= quantityType
;
1751 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1752 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityType
= quantityType
;
1755 void atomParenthesesTerminalEnd(int inputPosition
, unsigned frameLocation
, Checked
<unsigned> quantityCount
, QuantifierType quantityType
)
1757 unsigned beginTerm
= popParenthesesStack();
1758 closeAlternative(beginTerm
+ 1);
1759 unsigned endTerm
= m_bodyDisjunction
->terms
.size();
1761 ASSERT(m_bodyDisjunction
->terms
[beginTerm
].type
== ByteTerm::TypeParenthesesSubpatternTerminalBegin
);
1763 bool capture
= m_bodyDisjunction
->terms
[beginTerm
].capture();
1764 unsigned subpatternId
= m_bodyDisjunction
->terms
[beginTerm
].atom
.subpatternId
;
1766 m_bodyDisjunction
->terms
.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd
, subpatternId
, capture
, false, inputPosition
));
1767 m_bodyDisjunction
->terms
[beginTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1768 m_bodyDisjunction
->terms
[endTerm
].atom
.parenthesesWidth
= endTerm
- beginTerm
;
1769 m_bodyDisjunction
->terms
[endTerm
].frameLocation
= frameLocation
;
1771 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1772 m_bodyDisjunction
->terms
[beginTerm
].atom
.quantityType
= quantityType
;
1773 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityCount
= quantityCount
.unsafeGet();
1774 m_bodyDisjunction
->terms
[endTerm
].atom
.quantityType
= quantityType
;
1777 void regexBegin(unsigned numSubpatterns
, unsigned callFrameSize
, bool onceThrough
)
1779 m_bodyDisjunction
= adoptPtr(new ByteDisjunction(numSubpatterns
, callFrameSize
));
1780 m_bodyDisjunction
->terms
.append(ByteTerm::BodyAlternativeBegin(onceThrough
));
1781 m_bodyDisjunction
->terms
[0].frameLocation
= 0;
1782 m_currentAlternativeIndex
= 0;
1787 closeBodyAlternative();
1790 void alternativeBodyDisjunction(bool onceThrough
)
1792 int newAlternativeIndex
= m_bodyDisjunction
->terms
.size();
1793 m_bodyDisjunction
->terms
[m_currentAlternativeIndex
].alternative
.next
= newAlternativeIndex
- m_currentAlternativeIndex
;
1794 m_bodyDisjunction
->terms
.append(ByteTerm::BodyAlternativeDisjunction(onceThrough
));
1796 m_currentAlternativeIndex
= newAlternativeIndex
;
1799 void alternativeDisjunction()
1801 int newAlternativeIndex
= m_bodyDisjunction
->terms
.size();
1802 m_bodyDisjunction
->terms
[m_currentAlternativeIndex
].alternative
.next
= newAlternativeIndex
- m_currentAlternativeIndex
;
1803 m_bodyDisjunction
->terms
.append(ByteTerm::AlternativeDisjunction());
1805 m_currentAlternativeIndex
= newAlternativeIndex
;
1808 void emitDisjunction(PatternDisjunction
* disjunction
, unsigned inputCountAlreadyChecked
= 0, unsigned parenthesesInputCountAlreadyChecked
= 0)
1810 for (unsigned alt
= 0; alt
< disjunction
->m_alternatives
.size(); ++alt
) {
1811 unsigned currentCountAlreadyChecked
= inputCountAlreadyChecked
;
1813 PatternAlternative
* alternative
= disjunction
->m_alternatives
[alt
].get();
1816 if (disjunction
== m_pattern
.m_body
)
1817 alternativeBodyDisjunction(alternative
->onceThrough());
1819 alternativeDisjunction();
1822 unsigned minimumSize
= alternative
->m_minimumSize
;
1823 ASSERT(minimumSize
>= parenthesesInputCountAlreadyChecked
);
1824 unsigned countToCheck
= minimumSize
- parenthesesInputCountAlreadyChecked
;
1827 checkInput(countToCheck
);
1828 currentCountAlreadyChecked
+= countToCheck
;
1831 for (unsigned i
= 0; i
< alternative
->m_terms
.size(); ++i
) {
1832 PatternTerm
& term
= alternative
->m_terms
[i
];
1834 switch (term
.type
) {
1835 case PatternTerm::TypeAssertionBOL
:
1836 assertionBOL(currentCountAlreadyChecked
- term
.inputPosition
);
1839 case PatternTerm::TypeAssertionEOL
:
1840 assertionEOL(currentCountAlreadyChecked
- term
.inputPosition
);
1843 case PatternTerm::TypeAssertionWordBoundary
:
1844 assertionWordBoundary(term
.invert(), currentCountAlreadyChecked
- term
.inputPosition
);
1847 case PatternTerm::TypePatternCharacter
:
1848 atomPatternCharacter(term
.patternCharacter
, currentCountAlreadyChecked
- term
.inputPosition
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1851 case PatternTerm::TypeCharacterClass
:
1852 atomCharacterClass(term
.characterClass
, term
.invert(), currentCountAlreadyChecked
- term
.inputPosition
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1855 case PatternTerm::TypeBackReference
:
1856 atomBackReference(term
.backReferenceSubpatternId
, currentCountAlreadyChecked
- term
.inputPosition
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1859 case PatternTerm::TypeForwardReference
:
1862 case PatternTerm::TypeParenthesesSubpattern
: {
1863 unsigned disjunctionAlreadyCheckedCount
= 0;
1864 if (term
.quantityCount
== 1 && !term
.parentheses
.isCopy
) {
1865 unsigned alternativeFrameLocation
= term
.frameLocation
;
1866 // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
1867 if (term
.quantityType
== QuantifierFixedCount
)
1868 disjunctionAlreadyCheckedCount
= term
.parentheses
.disjunction
->m_minimumSize
;
1870 alternativeFrameLocation
+= YarrStackSpaceForBackTrackInfoParenthesesOnce
;
1871 unsigned delegateEndInputOffset
= term
.inputPosition
- currentCountAlreadyChecked
;
1872 atomParenthesesOnceBegin(term
.parentheses
.subpatternId
, term
.capture(), disjunctionAlreadyCheckedCount
- delegateEndInputOffset
, term
.frameLocation
, alternativeFrameLocation
);
1873 emitDisjunction(term
.parentheses
.disjunction
, currentCountAlreadyChecked
, disjunctionAlreadyCheckedCount
);
1874 atomParenthesesOnceEnd(delegateEndInputOffset
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1875 } else if (term
.parentheses
.isTerminal
) {
1876 unsigned delegateEndInputOffset
= term
.inputPosition
- currentCountAlreadyChecked
;
1877 atomParenthesesTerminalBegin(term
.parentheses
.subpatternId
, term
.capture(), disjunctionAlreadyCheckedCount
- delegateEndInputOffset
, term
.frameLocation
, term
.frameLocation
+ YarrStackSpaceForBackTrackInfoParenthesesOnce
);
1878 emitDisjunction(term
.parentheses
.disjunction
, currentCountAlreadyChecked
, disjunctionAlreadyCheckedCount
);
1879 atomParenthesesTerminalEnd(delegateEndInputOffset
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1881 unsigned delegateEndInputOffset
= term
.inputPosition
- currentCountAlreadyChecked
;
1882 atomParenthesesSubpatternBegin(term
.parentheses
.subpatternId
, term
.capture(), disjunctionAlreadyCheckedCount
- delegateEndInputOffset
, term
.frameLocation
, 0);
1883 emitDisjunction(term
.parentheses
.disjunction
, currentCountAlreadyChecked
, 0);
1884 atomParenthesesSubpatternEnd(term
.parentheses
.lastSubpatternId
, delegateEndInputOffset
, term
.frameLocation
, term
.quantityCount
, term
.quantityType
, term
.parentheses
.disjunction
->m_callFrameSize
);
1889 case PatternTerm::TypeParentheticalAssertion
: {
1890 unsigned alternativeFrameLocation
= term
.frameLocation
+ YarrStackSpaceForBackTrackInfoParentheticalAssertion
;
1892 ASSERT(currentCountAlreadyChecked
>= static_cast<unsigned>(term
.inputPosition
));
1893 unsigned positiveInputOffset
= currentCountAlreadyChecked
- static_cast<unsigned>(term
.inputPosition
);
1894 unsigned uncheckAmount
= 0;
1895 if (positiveInputOffset
> term
.parentheses
.disjunction
->m_minimumSize
) {
1896 uncheckAmount
= positiveInputOffset
- term
.parentheses
.disjunction
->m_minimumSize
;
1897 uncheckInput(uncheckAmount
);
1898 currentCountAlreadyChecked
-= uncheckAmount
;
1901 atomParentheticalAssertionBegin(term
.parentheses
.subpatternId
, term
.invert(), term
.frameLocation
, alternativeFrameLocation
);
1902 emitDisjunction(term
.parentheses
.disjunction
, currentCountAlreadyChecked
, positiveInputOffset
- uncheckAmount
);
1903 atomParentheticalAssertionEnd(0, term
.frameLocation
, term
.quantityCount
, term
.quantityType
);
1904 if (uncheckAmount
) {
1905 checkInput(uncheckAmount
);
1906 currentCountAlreadyChecked
+= uncheckAmount
;
1911 case PatternTerm::TypeDotStarEnclosure
:
1912 assertionDotStarEnclosure(term
.anchors
.bolAnchor
, term
.anchors
.eolAnchor
);
1920 YarrPattern
& m_pattern
;
1921 OwnPtr
<ByteDisjunction
> m_bodyDisjunction
;
1922 unsigned m_currentAlternativeIndex
;
1923 Vector
<ParenthesesStackEntry
> m_parenthesesStack
;
1924 Vector
<OwnPtr
<ByteDisjunction
> > m_allParenthesesInfo
;
1927 PassOwnPtr
<BytecodePattern
> byteCompile(YarrPattern
& pattern
, BumpPointerAllocator
* allocator
)
1929 return ByteCompiler(pattern
).compile(allocator
);
1932 unsigned interpret(BytecodePattern
* bytecode
, const String
& input
, unsigned start
, unsigned* output
)
1935 return Interpreter
<LChar
>(bytecode
, output
, input
.characters8(), input
.length(), start
).interpret();
1936 return Interpreter
<UChar
>(bytecode
, output
, input
.characters16(), input
.length(), start
).interpret();
1939 unsigned interpret(BytecodePattern
* bytecode
, const LChar
* input
, unsigned length
, unsigned start
, unsigned* output
)
1941 return Interpreter
<LChar
>(bytecode
, output
, input
, length
, start
).interpret();
1944 unsigned interpret(BytecodePattern
* bytecode
, const UChar
* input
, unsigned length
, unsigned start
, unsigned* output
)
1946 return Interpreter
<UChar
>(bytecode
, output
, input
, length
, start
).interpret();
1949 // These should be the same for both UChar & LChar.
1950 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoPatternCharacter
) == (YarrStackSpaceForBackTrackInfoPatternCharacter
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter
);
1951 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoCharacterClass
) == (YarrStackSpaceForBackTrackInfoCharacterClass
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass
);
1952 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoBackReference
) == (YarrStackSpaceForBackTrackInfoBackReference
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference
);
1953 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoAlternative
) == (YarrStackSpaceForBackTrackInfoAlternative
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative
);
1954 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoParentheticalAssertion
) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion
);
1955 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoParenthesesOnce
) == (YarrStackSpaceForBackTrackInfoParenthesesOnce
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce
);
1956 COMPILE_ASSERT(sizeof(Interpreter
<UChar
>::BackTrackInfoParentheses
) == (YarrStackSpaceForBackTrackInfoParentheses
* sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses
);