2  * Copyright (C) 2008 Apple Inc. All rights reserved. 
   4  * Redistribution and use in source and binary forms, with or without 
   5  * modification, are permitted provided that the following conditions 
   7  * 1. Redistributions of source code must retain the above copyright 
   8  *    notice, this list of conditions and the following disclaimer. 
   9  * 2. Redistributions in binary form must reproduce the above copyright 
  10  *    notice, this list of conditions and the following disclaimer in the 
  11  *    documentation and/or other materials provided with the distribution. 
  13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 
  14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
  16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR 
  17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
  18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
  19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
  20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
  21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
  22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
  23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
  31 #include "CharacterClassConstructor.h" 
  32 #include "Interpreter.h" 
  33 #include "WRECFunctors.h" 
  34 #include "WRECParser.h" 
  35 #include "pcre_internal.h" 
  39 namespace JSC 
{ namespace WREC 
{ 
  41 void Generator::generateEnter() 
  44     // On x86 edi & esi are callee preserved registers. 
  49     // Move the arguments into registers. 
  55     // On gcc the function is regparm(3), so the input, index, and length registers 
  56     // (eax, edx, and ecx respectively) already contain the appropriate values. 
  57     // Just load the fourth argument (output) into edi 
  63 void Generator::generateReturnSuccess() 
  65     ASSERT(returnRegister 
!= index
); 
  66     ASSERT(returnRegister 
!= output
); 
  69     pop(returnRegister
); // match begin 
  70     store32(returnRegister
, output
); 
  71     store32(index
, Address(output
, 4)); // match end 
  73     // Restore callee save registers. 
  81 void Generator::generateSaveIndex() 
  86 void Generator::generateIncrementIndex(Jump
* failure
) 
  90         *failure 
= branch32(Equal
, length
, index
); 
  91     add32(Imm32(1), index
); 
  95 void Generator::generateLoadCharacter(JumpList
& failures
) 
  97     failures
.append(branch32(Equal
, length
, index
)); 
  98     load16(BaseIndex(input
, index
, TimesTwo
), character
); 
 101 // For the sake of end-of-line assertions, we treat one-past-the-end as if it 
 102 // were part of the input string. 
 103 void Generator::generateJumpIfNotEndOfInput(Label target
) 
 105     branch32(LessThanOrEqual
, index
, length
, target
); 
 108 void Generator::generateReturnFailure() 
 111     move(Imm32(-1), returnRegister
); 
 120 void Generator::generateBacktrack1() 
 122     sub32(Imm32(1), index
); 
 125 void Generator::generateBacktrackBackreference(unsigned subpatternId
) 
 127     sub32(Address(output
, (2 * subpatternId 
+ 1) * sizeof(int)), index
); 
 128     add32(Address(output
, (2 * subpatternId
) * sizeof(int)), index
); 
 131 void Generator::generateBackreferenceQuantifier(JumpList
& failures
, Quantifier::Type quantifierType
, unsigned subpatternId
, unsigned min
, unsigned max
) 
 133     GenerateBackreferenceFunctor 
functor(subpatternId
); 
 135     load32(Address(output
, (2 * subpatternId
) * sizeof(int)), character
); 
 136     Jump skipIfEmpty 
= branch32(Equal
, Address(output
, ((2 * subpatternId
) + 1) * sizeof(int)), character
); 
 138     ASSERT(quantifierType 
== Quantifier::Greedy 
|| quantifierType 
== Quantifier::NonGreedy
); 
 139     if (quantifierType 
== Quantifier::Greedy
) 
 140         generateGreedyQuantifier(failures
, functor
, min
, max
); 
 142         generateNonGreedyQuantifier(failures
, functor
, min
, max
); 
 144     skipIfEmpty
.link(this); 
 147 void Generator::generateNonGreedyQuantifier(JumpList
& failures
, GenerateAtomFunctor
& functor
, unsigned min
, unsigned max
) 
 149     JumpList atomFailedList
; 
 150     JumpList alternativeFailedList
; 
 152     // (0) Setup: Save, then init repeatCount. 
 154     move(Imm32(0), repeatCount
); 
 157     // (4) Quantifier failed: No more atom reading possible. 
 158     Label 
quantifierFailed(this); 
 160     failures
.append(jump());  
 162     // (3) Alternative failed: If we can, read another atom, then fall through to (2) to try again. 
 163     Label 
alternativeFailed(this); 
 165     if (max 
!= Quantifier::Infinity
) 
 166         branch32(Equal
, repeatCount
, Imm32(max
), quantifierFailed
); 
 171     Label 
readAtom(this); 
 172     functor
.generateAtom(this, atomFailedList
); 
 173     atomFailedList
.linkTo(quantifierFailed
, this); 
 174     add32(Imm32(1), repeatCount
); 
 176     // (2) Keep reading if we're under the minimum. 
 178         branch32(LessThan
, repeatCount
, Imm32(min
), readAtom
); 
 180     // (3) Test the rest of the alternative. 
 184     m_parser
.parseAlternative(alternativeFailedList
); 
 185     alternativeFailedList
.linkTo(alternativeFailed
, this); 
 191 void Generator::generateGreedyQuantifier(JumpList
& failures
, GenerateAtomFunctor
& functor
, unsigned min
, unsigned max
) 
 196     JumpList doneReadingAtomsList
; 
 197     JumpList alternativeFailedList
; 
 199     // (0) Setup: Save, then init repeatCount. 
 201     move(Imm32(0), repeatCount
); 
 203     // (1) Greedily read as many copies of the atom as possible, then jump to (2). 
 204     Label 
readAtom(this); 
 205     functor
.generateAtom(this, doneReadingAtomsList
); 
 206     add32(Imm32(1), repeatCount
); 
 207     if (max 
== Quantifier::Infinity
) 
 210         doneReadingAtomsList
.append(jump()); 
 212         branch32(NotEqual
, repeatCount
, Imm32(max
), readAtom
); 
 213         doneReadingAtomsList
.append(jump()); 
 216     // (5) Quantifier failed: No more backtracking possible. 
 217     Label 
quantifierFailed(this); 
 219     failures
.append(jump());  
 221     // (4) Alternative failed: Backtrack, then fall through to (2) to try again. 
 222     Label 
alternativeFailed(this); 
 224     functor
.backtrack(this); 
 225     sub32(Imm32(1), repeatCount
); 
 227     // (2) Verify that we have enough atoms. 
 228     doneReadingAtomsList
.link(this); 
 229     branch32(LessThan
, repeatCount
, Imm32(min
), quantifierFailed
); 
 231     // (3) Test the rest of the alternative. 
 233     m_parser
.parseAlternative(alternativeFailedList
); 
 234     alternativeFailedList
.linkTo(alternativeFailed
, this); 
 240 void Generator::generatePatternCharacterSequence(JumpList
& failures
, int* sequence
, size_t count
) 
 242     for (size_t i 
= 0; i 
< count
;) { 
 244             if (generatePatternCharacterPair(failures
, sequence
[i
], sequence
[i 
+ 1])) { 
 250         generatePatternCharacter(failures
, sequence
[i
]); 
 255 bool Generator::generatePatternCharacterPair(JumpList
& failures
, int ch1
, int ch2
) 
 257     if (m_parser
.ignoreCase()) { 
 258         // Non-trivial case folding requires more than one test, so we can't 
 259         // test as a pair with an adjacent character. 
 260         if (!isASCII(ch1
) && Unicode::toLower(ch1
) != Unicode::toUpper(ch1
)) 
 262         if (!isASCII(ch2
) && Unicode::toLower(ch2
) != Unicode::toUpper(ch2
)) 
 266     // Optimistically consume 2 characters. 
 267     add32(Imm32(2), index
); 
 268     failures
.append(branch32(GreaterThan
, index
, length
)); 
 270     // Load the characters we just consumed, offset -2 characters from index. 
 271     load32(BaseIndex(input
, index
, TimesTwo
, -2 * 2), character
); 
 273     if (m_parser
.ignoreCase()) { 
 274         // Convert ASCII alphabet characters to upper case before testing for 
 275         // equality. (ASCII non-alphabet characters don't require upper-casing 
 276         // because they have no uppercase equivalents. Unicode characters don't 
 277         // require upper-casing because we only handle Unicode characters whose 
 278         // upper and lower cases are equal.) 
 280         if (isASCIIAlpha(ch1
)) { 
 286         if (isASCIIAlpha(ch2
)) { 
 291         int mask 
= ch1Mask 
| (ch2Mask 
<< 16); 
 293             or32(Imm32(mask
), character
); 
 295     int pair 
= ch1 
| (ch2 
<< 16); 
 297     failures
.append(branch32(NotEqual
, character
, Imm32(pair
))); 
 301 void Generator::generatePatternCharacter(JumpList
& failures
, int ch
) 
 303     generateLoadCharacter(failures
); 
 305     // used for unicode case insensitive 
 306     bool hasUpper 
= false; 
 309     // if case insensitive match 
 310     if (m_parser
.ignoreCase()) { 
 313         // check for ascii case sensitive characters 
 314         if (isASCIIAlpha(ch
)) { 
 315             or32(Imm32(32), character
); 
 317         } else if (!isASCII(ch
) && ((lower 
= Unicode::toLower(ch
)) != (upper 
= Unicode::toUpper(ch
)))) { 
 318             // handle unicode case sentitive characters - branch to success on upper 
 319             isUpper 
= branch32(Equal
, character
, Imm32(upper
)); 
 325     // checks for ch, or lower case version of ch, if insensitive 
 326     failures
.append(branch32(NotEqual
, character
, Imm32((unsigned short)ch
))); 
 328     if (m_parser
.ignoreCase() && hasUpper
) { 
 329         // for unicode case insensitive matches, branch here if upper matches. 
 333     // on success consume the char 
 334     add32(Imm32(1), index
); 
 337 void Generator::generateCharacterClassInvertedRange(JumpList
& failures
, JumpList
& matchDest
, const CharacterRange
* ranges
, unsigned count
, unsigned* matchIndex
, const UChar
* matches
, unsigned matchCount
) 
 340         // pick which range we're going to generate 
 341         int which 
= count 
>> 1; 
 342         char lo 
= ranges
[which
].begin
; 
 343         char hi 
= ranges
[which
].end
; 
 345         // check if there are any ranges or matches below lo.  If not, just jl to failure - 
 346         // if there is anything else to check, check that first, if it falls through jmp to failure. 
 347         if ((*matchIndex 
< matchCount
) && (matches
[*matchIndex
] < lo
)) { 
 348             Jump loOrAbove 
= branch32(GreaterThanOrEqual
, character
, Imm32((unsigned short)lo
)); 
 350             // generate code for all ranges before this one 
 352                 generateCharacterClassInvertedRange(failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
); 
 354             while ((*matchIndex 
< matchCount
) && (matches
[*matchIndex
] < lo
)) { 
 355                 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)matches
[*matchIndex
]))); 
 358             failures
.append(jump()); 
 360             loOrAbove
.link(this); 
 362             Jump loOrAbove 
= branch32(GreaterThanOrEqual
, character
, Imm32((unsigned short)lo
)); 
 364             generateCharacterClassInvertedRange(failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
); 
 365             failures
.append(jump()); 
 367             loOrAbove
.link(this); 
 369             failures
.append(branch32(LessThan
, character
, Imm32((unsigned short)lo
))); 
 371         while ((*matchIndex 
< matchCount
) && (matches
[*matchIndex
] <= hi
)) 
 374         matchDest
.append(branch32(LessThanOrEqual
, character
, Imm32((unsigned short)hi
))); 
 375         // fall through to here, the value is above hi. 
 377         // shuffle along & loop around if there are any more matches to handle. 
 378         unsigned next 
= which 
+ 1; 
 384 void Generator::generateCharacterClassInverted(JumpList
& matchDest
, const CharacterClass
& charClass
) 
 387     if (charClass
.numMatchesUnicode 
|| charClass
.numRangesUnicode
) { 
 388         Jump isAscii 
= branch32(LessThanOrEqual
, character
, Imm32(0x7f)); 
 390         if (charClass
.numMatchesUnicode
) { 
 391             for (unsigned i 
= 0; i 
< charClass
.numMatchesUnicode
; ++i
) { 
 392                 UChar ch 
= charClass
.matchesUnicode
[i
]; 
 393                 matchDest
.append(branch32(Equal
, character
, Imm32(ch
))); 
 397         if (charClass
.numRangesUnicode
) { 
 398             for (unsigned i 
= 0; i 
< charClass
.numRangesUnicode
; ++i
) { 
 399                 UChar lo 
= charClass
.rangesUnicode
[i
].begin
; 
 400                 UChar hi 
= charClass
.rangesUnicode
[i
].end
; 
 402                 Jump below 
= branch32(LessThan
, character
, Imm32(lo
)); 
 403                 matchDest
.append(branch32(LessThanOrEqual
, character
, Imm32(hi
))); 
 408         unicodeFail 
= jump(); 
 412     if (charClass
.numRanges
) { 
 413         unsigned matchIndex 
= 0; 
 415         generateCharacterClassInvertedRange(failures
, matchDest
, charClass
.ranges
, charClass
.numRanges
, &matchIndex
, charClass
.matches
, charClass
.numMatches
); 
 416         while (matchIndex 
< charClass
.numMatches
) 
 417             matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)charClass
.matches
[matchIndex
++]))); 
 420     } else if (charClass
.numMatches
) { 
 421         // optimization: gather 'a','A' etc back together, can mask & test once. 
 422         Vector
<char> matchesAZaz
; 
 424         for (unsigned i 
= 0; i 
< charClass
.numMatches
; ++i
) { 
 425             char ch 
= charClass
.matches
[i
]; 
 426             if (m_parser
.ignoreCase()) { 
 427                 if (isASCIILower(ch
)) { 
 428                     matchesAZaz
.append(ch
); 
 431                 if (isASCIIUpper(ch
)) 
 434             matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)ch
))); 
 437         if (unsigned countAZaz 
= matchesAZaz
.size()) { 
 438             or32(Imm32(32), character
); 
 439             for (unsigned i 
= 0; i 
< countAZaz
; ++i
) 
 440                 matchDest
.append(branch32(Equal
, character
, Imm32(matchesAZaz
[i
]))); 
 444     if (charClass
.numMatchesUnicode 
|| charClass
.numRangesUnicode
) 
 445         unicodeFail
.link(this); 
 448 void Generator::generateCharacterClass(JumpList
& failures
, const CharacterClass
& charClass
, bool invert
) 
 450     generateLoadCharacter(failures
); 
 453         generateCharacterClassInverted(failures
, charClass
); 
 456         generateCharacterClassInverted(successes
, charClass
); 
 457         failures
.append(jump()); 
 458         successes
.link(this); 
 461     add32(Imm32(1), index
); 
 464 void Generator::generateParenthesesAssertion(JumpList
& failures
) 
 466     JumpList disjunctionFailed
; 
 469     m_parser
.parseDisjunction(disjunctionFailed
); 
 470     Jump success 
= jump(); 
 472     disjunctionFailed
.link(this); 
 474     failures
.append(jump()); 
 480 void Generator::generateParenthesesInvertedAssertion(JumpList
& failures
) 
 482     JumpList disjunctionFailed
; 
 485     m_parser
.parseDisjunction(disjunctionFailed
); 
 487     // If the disjunction succeeded, the inverted assertion failed. 
 489     failures
.append(jump()); 
 491     // If the disjunction failed, the inverted assertion succeeded. 
 492     disjunctionFailed
.link(this); 
 496 void Generator::generateParenthesesNonGreedy(JumpList
& failures
, Label start
, Jump success
, Jump fail
) 
 500     failures
.append(fail
); 
 503 Generator::Jump 
Generator::generateParenthesesResetTrampoline(JumpList
& newFailures
, unsigned subpatternIdBefore
, unsigned subpatternIdAfter
) 
 506     newFailures
.link(this); 
 507     for (unsigned i 
= subpatternIdBefore 
+ 1; i 
<= subpatternIdAfter
; ++i
) { 
 508         store32(Imm32(-1), Address(output
, (2 * i
) * sizeof(int))); 
 509         store32(Imm32(-1), Address(output
, (2 * i 
+ 1) * sizeof(int))); 
 512     Jump newFailJump 
= jump(); 
 518 void Generator::generateAssertionBOL(JumpList
& failures
) 
 520     if (m_parser
.multiline()) { 
 521         JumpList previousIsNewline
; 
 523         // begin of input == success 
 524         previousIsNewline
.append(branch32(Equal
, index
, Imm32(0))); 
 526         // now check prev char against newline characters. 
 527         load16(BaseIndex(input
, index
, TimesTwo
, -2), character
); 
 528         generateCharacterClassInverted(previousIsNewline
, CharacterClass::newline()); 
 530         failures
.append(jump()); 
 532         previousIsNewline
.link(this); 
 534         failures
.append(branch32(NotEqual
, index
, Imm32(0))); 
 537 void Generator::generateAssertionEOL(JumpList
& failures
) 
 539     if (m_parser
.multiline()) { 
 540         JumpList nextIsNewline
; 
 542         generateLoadCharacter(nextIsNewline
); // end of input == success 
 543         generateCharacterClassInverted(nextIsNewline
, CharacterClass::newline()); 
 544         failures
.append(jump()); 
 545         nextIsNewline
.link(this); 
 547         failures
.append(branch32(NotEqual
, length
, index
)); 
 551 void Generator::generateAssertionWordBoundary(JumpList
& failures
, bool invert
) 
 553     JumpList wordBoundary
; 
 554     JumpList notWordBoundary
; 
 556     // (1) Check if the previous value was a word char 
 558     // (1.1) check for begin of input 
 559     Jump atBegin 
= branch32(Equal
, index
, Imm32(0)); 
 560     // (1.2) load the last char, and chck if is word character 
 561     load16(BaseIndex(input
, index
, TimesTwo
, -2), character
); 
 562     JumpList previousIsWord
; 
 563     generateCharacterClassInverted(previousIsWord
, CharacterClass::wordchar()); 
 564     // (1.3) if we get here, previous is not a word char 
 567     // (2) Handle situation where previous was NOT a \w 
 569     generateLoadCharacter(notWordBoundary
); 
 570     generateCharacterClassInverted(wordBoundary
, CharacterClass::wordchar()); 
 571     // (2.1) If we get here, neither chars are word chars 
 572     notWordBoundary
.append(jump()); 
 574     // (3) Handle situation where previous was a \w 
 576     // (3.0) link success in first match to here 
 577     previousIsWord
.link(this); 
 578     generateLoadCharacter(wordBoundary
); 
 579     generateCharacterClassInverted(notWordBoundary
, CharacterClass::wordchar()); 
 580     // (3.1) If we get here, this is an end of a word, within the input. 
 582     // (4) Link everything up 
 585         // handle the fall through case 
 586         wordBoundary
.append(jump()); 
 588         // looking for non word boundaries, so link boundary fails to here. 
 589         notWordBoundary
.link(this); 
 591         failures
.append(wordBoundary
); 
 593         // looking for word boundaries, so link successes here. 
 594         wordBoundary
.link(this); 
 596         failures
.append(notWordBoundary
); 
 600 void Generator::generateBackreference(JumpList
& failures
, unsigned subpatternId
) 
 605     // get the start pos of the backref into repeatCount (multipurpose!) 
 606     load32(Address(output
, (2 * subpatternId
) * sizeof(int)), repeatCount
); 
 608     Jump skipIncrement 
= jump(); 
 609     Label 
topOfLoop(this); 
 611     add32(Imm32(1), index
); 
 612     add32(Imm32(1), repeatCount
); 
 613     skipIncrement
.link(this); 
 615     // check if we're at the end of backref (if we are, success!) 
 616     Jump endOfBackRef 
= branch32(Equal
, Address(output
, ((2 * subpatternId
) + 1) * sizeof(int)), repeatCount
); 
 618     load16(BaseIndex(input
, repeatCount
, MacroAssembler::TimesTwo
), character
); 
 620     // check if we've run out of input (this would be a can o'fail) 
 621     Jump endOfInput 
= branch32(Equal
, length
, index
); 
 623     branch16(Equal
, BaseIndex(input
, index
, TimesTwo
), character
, topOfLoop
); 
 625     endOfInput
.link(this); 
 630     failures
.append(jump()); 
 633     endOfBackRef
.link(this); 
 638 void Generator::terminateAlternative(JumpList
& successes
, JumpList
& failures
) 
 640     successes
.append(jump()); 
 646 void Generator::terminateDisjunction(JumpList
& successes
) 
 648     successes
.link(this); 
 651 } } // namespace JSC::WREC 
 653 #endif // ENABLE(WREC)