yarr/YarrInterpreter.cpp

   1 /*
   2  * Copyright (C) 2009 Apple Inc. All rights reserved.
   3  * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
  15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
  18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25  */
  26
  27 #include "config.h"
  28 #include "YarrInterpreter.h"
  29
  30 #include "Yarr.h"
  31 #include "YarrCanonicalizeUCS2.h"
  32 #include <wtf/BumpPointerAllocator.h>
  33 #include <wtf/DataLog.h>
  34 #include <wtf/text/CString.h>
  35 #include <wtf/text/WTFString.h>
  36
  37 #ifndef NDEBUG
  38 #include <stdio.h>
  39 #endif
  40
  41 using namespace WTF;
  42
  43 namespace JSC { namespace Yarr {
  44
  45 template<typename CharType>
  46 class Interpreter {
  47 public:
  48     struct ParenthesesDisjunctionContext;
  49
  50     struct BackTrackInfoPatternCharacter {
  51         uintptr_t matchAmount;
  52     };
  53     struct BackTrackInfoCharacterClass {
  54         uintptr_t matchAmount;
  55     };
  56     struct BackTrackInfoBackReference {
  57         uintptr_t begin; // Not really needed for greedy quantifiers.
  58         uintptr_t matchAmount; // Not really needed for fixed quantifiers.
  59     };
  60     struct BackTrackInfoAlternative {
  61         uintptr_t offset;
  62     };
  63     struct BackTrackInfoParentheticalAssertion {
  64         uintptr_t begin;
  65     };
  66     struct BackTrackInfoParenthesesOnce {
  67         uintptr_t begin;
  68     };
  69     struct BackTrackInfoParenthesesTerminal {
  70         uintptr_t begin;
  71     };
  72     struct BackTrackInfoParentheses {
  73         uintptr_t matchAmount;
  74         ParenthesesDisjunctionContext* lastContext;
  75     };
  76
  77     static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
  78     {
  79         context->next = backTrack->lastContext;
  80         backTrack->lastContext = context;
  81         ++backTrack->matchAmount;
  82     }
  83
  84     static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
  85     {
  86         RELEASE_ASSERT(backTrack->matchAmount);
  87         RELEASE_ASSERT(backTrack->lastContext);
  88         backTrack->lastContext = backTrack->lastContext->next;
  89         --backTrack->matchAmount;
  90     }
  91
  92     struct DisjunctionContext
  93     {
  94         DisjunctionContext()
  95             : term(0)
  96         {
  97         }
  98
  99         void* operator new(size_t, void* where)
 100         {
 101             return where;
 102         }
 103
 104         int term;
 105         unsigned matchBegin;
 106         unsigned matchEnd;
 107         uintptr_t frame[1];
 108     };
 109
 110     DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
 111     {
 112         size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
 113         allocatorPool = allocatorPool->ensureCapacity(size);
 114         RELEASE_ASSERT(allocatorPool);
 115         return new (allocatorPool->alloc(size)) DisjunctionContext();
 116     }
 117
 118     void freeDisjunctionContext(DisjunctionContext* context)
 119     {
 120         allocatorPool = allocatorPool->dealloc(context);
 121     }
 122
 123     struct ParenthesesDisjunctionContext
 124     {
 125         ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
 126             : next(0)
 127         {
 128             unsigned firstSubpatternId = term.atom.subpatternId;
 129             unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
 130
 131             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
 132                 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
 133                 output[(firstSubpatternId << 1) + i] = offsetNoMatch;
 134             }
 135
 136             new (getDisjunctionContext(term)) DisjunctionContext();
 137         }
 138
 139         void* operator new(size_t, void* where)
 140         {
 141             return where;
 142         }
 143
 144         void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
 145         {
 146             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
 147                 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
 148         }
 149
 150         DisjunctionContext* getDisjunctionContext(ByteTerm& term)
 151         {
 152             return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
 153         }
 154
 155         ParenthesesDisjunctionContext* next;
 156         unsigned subpatternBackup[1];
 157     };
 158
 159     ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
 160     {
 161         size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
 162         allocatorPool = allocatorPool->ensureCapacity(size);
 163         RELEASE_ASSERT(allocatorPool);
 164         return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
 165     }
 166
 167     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
 168     {
 169         allocatorPool = allocatorPool->dealloc(context);
 170     }
 171
 172     class InputStream {
 173     public:
 174         InputStream(const CharType* input, unsigned start, unsigned length)
 175             : input(input)
 176             , pos(start)
 177             , length(length)
 178         {
 179         }
 180
 181         void next()
 182         {
 183             ++pos;
 184         }
 185
 186         void rewind(unsigned amount)
 187         {
 188             ASSERT(pos >= amount);
 189             pos -= amount;
 190         }
 191
 192         int read()
 193         {
 194             ASSERT(pos < length);
 195             if (pos < length)
 196                 return input[pos];
 197             return -1;
 198         }
 199
 200         int readPair()
 201         {
 202             ASSERT(pos + 1 < length);
 203             return input[pos] | input[pos + 1] << 16;
 204         }
 205
 206         int readChecked(unsigned negativePositionOffest)
 207         {
 208             RELEASE_ASSERT(pos >= negativePositionOffest);
 209             unsigned p = pos - negativePositionOffest;
 210             ASSERT(p < length);
 211             return input[p];
 212         }
 213
 214         int reread(unsigned from)
 215         {
 216             ASSERT(from < length);
 217             return input[from];
 218         }
 219
 220         int prev()
 221         {
 222             ASSERT(!(pos > length));
 223             if (pos && length)
 224                 return input[pos - 1];
 225             return -1;
 226         }
 227
 228         unsigned getPos()
 229         {
 230             return pos;
 231         }
 232
 233         void setPos(unsigned p)
 234         {
 235             pos = p;
 236         }
 237
 238         bool atStart()
 239         {
 240             return pos == 0;
 241         }
 242
 243         bool atEnd()
 244         {
 245             return pos == length;
 246         }
 247
 248         unsigned end()
 249         {
 250             return length;
 251         }
 252
 253         bool checkInput(unsigned count)
 254         {
 255             if (((pos + count) <= length) && ((pos + count) >= pos)) {
 256                 pos += count;
 257                 return true;
 258             }
 259             return false;
 260         }
 261
 262         void uncheckInput(unsigned count)
 263         {
 264             RELEASE_ASSERT(pos >= count);
 265             pos -= count;
 266         }
 267
 268         bool atStart(unsigned negativePositionOffest)
 269         {
 270             return pos == negativePositionOffest;
 271         }
 272
 273         bool atEnd(unsigned negativePositionOffest)
 274         {
 275             RELEASE_ASSERT(pos >= negativePositionOffest);
 276             return (pos - negativePositionOffest) == length;
 277         }
 278
 279         bool isAvailableInput(unsigned offset)
 280         {
 281             return (((pos + offset) <= length) && ((pos + offset) >= pos));
 282         }
 283
 284     private:
 285         const CharType* input;
 286         unsigned pos;
 287         unsigned length;
 288     };
 289
 290     bool testCharacterClass(CharacterClass* characterClass, int ch)
 291     {
 292         if (ch & 0xFF80) {
 293             for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
 294                 if (ch == characterClass->m_matchesUnicode[i])
 295                     return true;
 296             for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
 297                 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
 298                     return true;
 299         } else {
 300             for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
 301                 if (ch == characterClass->m_matches[i])
 302                     return true;
 303             for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
 304                 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
 305                     return true;
 306         }
 307
 308         return false;
 309     }
 310
 311     bool checkCharacter(int testChar, unsigned negativeInputOffset)
 312     {
 313         return testChar == input.readChecked(negativeInputOffset);
 314     }
 315
 316     bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
 317     {
 318         int ch = input.readChecked(negativeInputOffset);
 319         return (loChar == ch) || (hiChar == ch);
 320     }
 321
 322     bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
 323     {
 324         bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
 325         return invert ? !match : match;
 326     }
 327
 328     bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
 329     {
 330         unsigned matchSize = (unsigned)(matchEnd - matchBegin);
 331
 332         if (!input.checkInput(matchSize))
 333             return false;
 334
 335         if (pattern->m_ignoreCase) {
 336             for (unsigned i = 0; i < matchSize; ++i) {
 337                 int oldCh = input.reread(matchBegin + i);
 338                 int ch = input.readChecked(negativeInputOffset + matchSize - i);
 339
 340                 if (oldCh == ch)
 341                     continue;
 342
 343                 // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that
 344                 // unicode values are never allowed to match against ascii ones.
 345                 if (isASCII(oldCh) || isASCII(ch)) {
 346                     if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
 347                         continue;
 348                 } else if (areCanonicallyEquivalent(oldCh, ch))
 349                     continue;
 350
 351                 input.uncheckInput(matchSize);
 352                 return false;
 353             }
 354         } else {
 355             for (unsigned i = 0; i < matchSize; ++i) {
 356                 if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) {
 357                     input.uncheckInput(matchSize);
 358                     return false;
 359                 }
 360             }
 361         }
 362
 363         return true;
 364     }
 365
 366     bool matchAssertionBOL(ByteTerm& term)
 367     {
 368         return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
 369     }
 370
 371     bool matchAssertionEOL(ByteTerm& term)
 372     {
 373         if (term.inputPosition)
 374             return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
 375
 376         return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
 377     }
 378
 379     bool matchAssertionWordBoundary(ByteTerm& term)
 380     {
 381         bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1));
 382         bool readIsWordchar;
 383         if (term.inputPosition)
 384             readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
 385         else
 386             readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
 387
 388         bool wordBoundary = prevIsWordchar != readIsWordchar;
 389         return term.invert() ? !wordBoundary : wordBoundary;
 390     }
 391
 392     bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
 393     {
 394         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
 395
 396         switch (term.atom.quantityType) {
 397         case QuantifierFixedCount:
 398             break;
 399
 400         case QuantifierGreedy:
 401             if (backTrack->matchAmount) {
 402                 --backTrack->matchAmount;
 403                 input.uncheckInput(1);
 404                 return true;
 405             }
 406             break;
 407
 408         case QuantifierNonGreedy:
 409             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
 410                 ++backTrack->matchAmount;
 411                 if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))
 412                     return true;
 413             }
 414             input.uncheckInput(backTrack->matchAmount);
 415             break;
 416         }
 417
 418         return false;
 419     }
 420
 421     bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
 422     {
 423         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
 424
 425         switch (term.atom.quantityType) {
 426         case QuantifierFixedCount:
 427             break;
 428
 429         case QuantifierGreedy:
 430             if (backTrack->matchAmount) {
 431                 --backTrack->matchAmount;
 432                 input.uncheckInput(1);
 433                 return true;
 434             }
 435             break;
 436
 437         case QuantifierNonGreedy:
 438             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
 439                 ++backTrack->matchAmount;
 440                 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))
 441                     return true;
 442             }
 443             input.uncheckInput(backTrack->matchAmount);
 444             break;
 445         }
 446
 447         return false;
 448     }
 449
 450     bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
 451     {
 452         ASSERT(term.type == ByteTerm::TypeCharacterClass);
 453         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
 454
 455         switch (term.atom.quantityType) {
 456         case QuantifierFixedCount: {
 457             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
 458                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
 459                     return false;
 460             }
 461             return true;
 462         }
 463
 464         case QuantifierGreedy: {
 465             unsigned matchAmount = 0;
 466             while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
 467                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
 468                     input.uncheckInput(1);
 469                     break;
 470                 }
 471                 ++matchAmount;
 472             }
 473             backTrack->matchAmount = matchAmount;
 474
 475             return true;
 476         }
 477
 478         case QuantifierNonGreedy:
 479             backTrack->matchAmount = 0;
 480             return true;
 481         }
 482
 483         RELEASE_ASSERT_NOT_REACHED();
 484         return false;
 485     }
 486
 487     bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
 488     {
 489         ASSERT(term.type == ByteTerm::TypeCharacterClass);
 490         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
 491
 492         switch (term.atom.quantityType) {
 493         case QuantifierFixedCount:
 494             break;
 495
 496         case QuantifierGreedy:
 497             if (backTrack->matchAmount) {
 498                 --backTrack->matchAmount;
 499                 input.uncheckInput(1);
 500                 return true;
 501             }
 502             break;
 503
 504         case QuantifierNonGreedy:
 505             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
 506                 ++backTrack->matchAmount;
 507                 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))
 508                     return true;
 509             }
 510             input.uncheckInput(backTrack->matchAmount);
 511             break;
 512         }
 513
 514         return false;
 515     }
 516
 517     bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
 518     {
 519         ASSERT(term.type == ByteTerm::TypeBackReference);
 520         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
 521
 522         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
 523         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
 524
 525         // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
 526         // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
 527         // Eg.: /(a\1)/
 528         if (matchEnd == offsetNoMatch)
 529             return true;
 530
 531         if (matchBegin == offsetNoMatch)
 532             return true;
 533
 534         ASSERT(matchBegin <= matchEnd);
 535
 536         if (matchBegin == matchEnd)
 537             return true;
 538
 539         switch (term.atom.quantityType) {
 540         case QuantifierFixedCount: {
 541             backTrack->begin = input.getPos();
 542             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
 543                 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
 544                     input.setPos(backTrack->begin);
 545                     return false;
 546                 }
 547             }
 548             return true;
 549         }
 550
 551         case QuantifierGreedy: {
 552             unsigned matchAmount = 0;
 553             while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
 554                 ++matchAmount;
 555             backTrack->matchAmount = matchAmount;
 556             return true;
 557         }
 558
 559         case QuantifierNonGreedy:
 560             backTrack->begin = input.getPos();
 561             backTrack->matchAmount = 0;
 562             return true;
 563         }
 564
 565         RELEASE_ASSERT_NOT_REACHED();
 566         return false;
 567     }
 568
 569     bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
 570     {
 571         ASSERT(term.type == ByteTerm::TypeBackReference);
 572         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
 573
 574         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
 575         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
 576
 577         if (matchBegin == offsetNoMatch)
 578             return false;
 579
 580         ASSERT(matchBegin <= matchEnd);
 581
 582         if (matchBegin == matchEnd)
 583             return false;
 584
 585         switch (term.atom.quantityType) {
 586         case QuantifierFixedCount:
 587             // for quantityCount == 1, could rewind.
 588             input.setPos(backTrack->begin);
 589             break;
 590
 591         case QuantifierGreedy:
 592             if (backTrack->matchAmount) {
 593                 --backTrack->matchAmount;
 594                 input.rewind(matchEnd - matchBegin);
 595                 return true;
 596             }
 597             break;
 598
 599         case QuantifierNonGreedy:
 600             if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
 601                 ++backTrack->matchAmount;
 602                 return true;
 603             }
 604             input.setPos(backTrack->begin);
 605             break;
 606         }
 607
 608         return false;
 609     }
 610
 611     void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
 612     {
 613         if (term.capture()) {
 614             unsigned subpatternId = term.atom.subpatternId;
 615             output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
 616             output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
 617         }
 618     }
 619     void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
 620     {
 621         unsigned firstSubpatternId = term.atom.subpatternId;
 622         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
 623         context->restoreOutput(output, firstSubpatternId, count);
 624     }
 625     JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
 626     {
 627         while (backTrack->matchAmount) {
 628             ParenthesesDisjunctionContext* context = backTrack->lastContext;
 629
 630             JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
 631             if (result == JSRegExpMatch)
 632                 return JSRegExpMatch;
 633
 634             resetMatches(term, context);
 635             popParenthesesDisjunctionContext(backTrack);
 636             freeParenthesesDisjunctionContext(context);
 637
 638             if (result != JSRegExpNoMatch)
 639                 return result;
 640         }
 641
 642         return JSRegExpNoMatch;
 643     }
 644
 645     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
 646     {
 647         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
 648         ASSERT(term.atom.quantityCount == 1);
 649
 650         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
 651
 652         switch (term.atom.quantityType) {
 653         case QuantifierGreedy: {
 654             // set this speculatively; if we get to the parens end this will be true.
 655             backTrack->begin = input.getPos();
 656             break;
 657         }
 658         case QuantifierNonGreedy: {
 659             backTrack->begin = notFound;
 660             context->term += term.atom.parenthesesWidth;
 661             return true;
 662         }
 663         case QuantifierFixedCount:
 664             break;
 665         }
 666
 667         if (term.capture()) {
 668             unsigned subpatternId = term.atom.subpatternId;
 669             output[(subpatternId << 1)] = input.getPos() - term.inputPosition;
 670         }
 671
 672         return true;
 673     }
 674
 675     bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
 676     {
 677         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
 678         ASSERT(term.atom.quantityCount == 1);
 679
 680         if (term.capture()) {
 681             unsigned subpatternId = term.atom.subpatternId;
 682             output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
 683         }
 684
 685         if (term.atom.quantityType == QuantifierFixedCount)
 686             return true;
 687
 688         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
 689         return backTrack->begin != input.getPos();
 690     }
 691
 692     bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
 693     {
 694         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
 695         ASSERT(term.atom.quantityCount == 1);
 696
 697         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
 698
 699         if (term.capture()) {
 700             unsigned subpatternId = term.atom.subpatternId;
 701             output[(subpatternId << 1)] = offsetNoMatch;
 702             output[(subpatternId << 1) + 1] = offsetNoMatch;
 703         }
 704
 705         switch (term.atom.quantityType) {
 706         case QuantifierGreedy:
 707             // if we backtrack to this point, there is another chance - try matching nothing.
 708             ASSERT(backTrack->begin != notFound);
 709             backTrack->begin = notFound;
 710             context->term += term.atom.parenthesesWidth;
 711             return true;
 712         case QuantifierNonGreedy:
 713             ASSERT(backTrack->begin != notFound);
 714         case QuantifierFixedCount:
 715             break;
 716         }
 717
 718         return false;
 719     }
 720
 721     bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
 722     {
 723         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
 724         ASSERT(term.atom.quantityCount == 1);
 725
 726         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
 727
 728         switch (term.atom.quantityType) {
 729         case QuantifierGreedy:
 730             if (backTrack->begin == notFound) {
 731                 context->term -= term.atom.parenthesesWidth;
 732                 return false;
 733             }
 734         case QuantifierNonGreedy:
 735             if (backTrack->begin == notFound) {
 736                 backTrack->begin = input.getPos();
 737                 if (term.capture()) {
 738                     // Technically this access to inputPosition should be accessing the begin term's
 739                     // inputPosition, but for repeats other than fixed these values should be
 740                     // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
 741                     ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
 742                     ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
 743                     unsigned subpatternId = term.atom.subpatternId;
 744                     output[subpatternId << 1] = input.getPos() + term.inputPosition;
 745                 }
 746                 context->term -= term.atom.parenthesesWidth;
 747                 return true;
 748             }
 749         case QuantifierFixedCount:
 750             break;
 751         }
 752
 753         return false;
 754     }
 755
 756     bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
 757     {
 758         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
 759         ASSERT(term.atom.quantityType == QuantifierGreedy);
 760         ASSERT(term.atom.quantityCount == quantifyInfinite);
 761         ASSERT(!term.capture());
 762
 763         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
 764         backTrack->begin = input.getPos();
 765         return true;
 766     }
 767
 768     bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
 769     {
 770         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
 771
 772         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
 773         // Empty match is a failed match.
 774         if (backTrack->begin == input.getPos())
 775             return false;
 776
 777         // Successful match! Okay, what's next? - loop around and try to match moar!
 778         context->term -= (term.atom.parenthesesWidth + 1);
 779         return true;
 780     }
 781
 782     bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
 783     {
 784         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
 785         ASSERT(term.atom.quantityType == QuantifierGreedy);
 786         ASSERT(term.atom.quantityCount == quantifyInfinite);
 787         ASSERT(!term.capture());
 788
 789         // If we backtrack to this point, we have failed to match this iteration of the parens.
 790         // Since this is greedy / zero minimum a failed is also accepted as a match!
 791         context->term += term.atom.parenthesesWidth;
 792         return true;
 793     }
 794
 795     bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
 796     {
 797         // 'Terminal' parentheses are at the end of the regex, and as such a match past end
 798         // should always be returned as a successful match - we should never backtrack to here.
 799         RELEASE_ASSERT_NOT_REACHED();
 800         return false;
 801     }
 802
 803     bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
 804     {
 805         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
 806         ASSERT(term.atom.quantityCount == 1);
 807
 808         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
 809
 810         backTrack->begin = input.getPos();
 811         return true;
 812     }
 813
 814     bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
 815     {
 816         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
 817         ASSERT(term.atom.quantityCount == 1);
 818
 819         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
 820
 821         input.setPos(backTrack->begin);
 822
 823         // We've reached the end of the parens; if they are inverted, this is failure.
 824         if (term.invert()) {
 825             context->term -= term.atom.parenthesesWidth;
 826             return false;
 827         }
 828
 829         return true;
 830     }
 831
 832     bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
 833     {
 834         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
 835         ASSERT(term.atom.quantityCount == 1);
 836
 837         // We've failed to match parens; if they are inverted, this is win!
 838         if (term.invert()) {
 839             context->term += term.atom.parenthesesWidth;
 840             return true;
 841         }
 842
 843         return false;
 844     }
 845
 846     bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
 847     {
 848         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
 849         ASSERT(term.atom.quantityCount == 1);
 850
 851         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
 852
 853         input.setPos(backTrack->begin);
 854
 855         context->term -= term.atom.parenthesesWidth;
 856         return false;
 857     }
 858
 859     JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
 860     {
 861         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
 862
 863         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
 864         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
 865
 866         backTrack->matchAmount = 0;
 867         backTrack->lastContext = 0;
 868
 869         switch (term.atom.quantityType) {
 870         case QuantifierFixedCount: {
 871             // While we haven't yet reached our fixed limit,
 872             while (backTrack->matchAmount < term.atom.quantityCount) {
 873                 // Try to do a match, and it it succeeds, add it to the list.
 874                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
 875                 JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
 876                 if (result == JSRegExpMatch)
 877                     appendParenthesesDisjunctionContext(backTrack, context);
 878                 else {
 879                     // The match failed; try to find an alternate point to carry on from.
 880                     resetMatches(term, context);
 881                     freeParenthesesDisjunctionContext(context);
 882
 883                     if (result != JSRegExpNoMatch)
 884                         return result;
 885                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
 886                     if (backtrackResult != JSRegExpMatch)
 887                         return backtrackResult;
 888                 }
 889             }
 890
 891             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
 892             ParenthesesDisjunctionContext* context = backTrack->lastContext;
 893             recordParenthesesMatch(term, context);
 894             return JSRegExpMatch;
 895         }
 896
 897         case QuantifierGreedy: {
 898             while (backTrack->matchAmount < term.atom.quantityCount) {
 899                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
 900                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
 901                 if (result == JSRegExpMatch)
 902                     appendParenthesesDisjunctionContext(backTrack, context);
 903                 else {
 904                     resetMatches(term, context);
 905                     freeParenthesesDisjunctionContext(context);
 906
 907                     if (result != JSRegExpNoMatch)
 908                         return result;
 909
 910                     break;
 911                 }
 912             }
 913
 914             if (backTrack->matchAmount) {
 915                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
 916                 recordParenthesesMatch(term, context);
 917             }
 918             return JSRegExpMatch;
 919         }
 920
 921         case QuantifierNonGreedy:
 922             return JSRegExpMatch;
 923         }
 924
 925         RELEASE_ASSERT_NOT_REACHED();
 926         return JSRegExpErrorNoMatch;
 927     }
 928
 929     // Rules for backtracking differ depending on whether this is greedy or non-greedy.
 930     //
 931     // Greedy matches never should try just adding more - you should already have done
 932     // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where
 933     // you backtrack an item off the list needs checking, since we'll never have matched
 934     // the one less case.  Tracking forwards, still add as much as possible.
 935     //
 936     // Non-greedy, we've already done the one less case, so don't match on popping.
 937     // We haven't done the one more case, so always try to add that.
 938     //
 939     JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
 940     {
 941         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
 942
 943         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
 944         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
 945
 946         switch (term.atom.quantityType) {
 947         case QuantifierFixedCount: {
 948             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
 949
 950             ParenthesesDisjunctionContext* context = 0;
 951             JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
 952
 953             if (result != JSRegExpMatch)
 954                 return result;
 955
 956             // While we haven't yet reached our fixed limit,
 957             while (backTrack->matchAmount < term.atom.quantityCount) {
 958                 // Try to do a match, and it it succeeds, add it to the list.
 959                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
 960                 result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
 961
 962                 if (result == JSRegExpMatch)
 963                     appendParenthesesDisjunctionContext(backTrack, context);
 964                 else {
 965                     // The match failed; try to find an alternate point to carry on from.
 966                     resetMatches(term, context);
 967                     freeParenthesesDisjunctionContext(context);
 968
 969                     if (result != JSRegExpNoMatch)
 970                         return result;
 971                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
 972                     if (backtrackResult != JSRegExpMatch)
 973                         return backtrackResult;
 974                 }
 975             }
 976
 977             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
 978             context = backTrack->lastContext;
 979             recordParenthesesMatch(term, context);
 980             return JSRegExpMatch;
 981         }
 982
 983         case QuantifierGreedy: {
 984             if (!backTrack->matchAmount)
 985                 return JSRegExpNoMatch;
 986
 987             ParenthesesDisjunctionContext* context = backTrack->lastContext;
 988             JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
 989             if (result == JSRegExpMatch) {
 990                 while (backTrack->matchAmount < term.atom.quantityCount) {
 991                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
 992                     JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
 993                     if (parenthesesResult == JSRegExpMatch)
 994                         appendParenthesesDisjunctionContext(backTrack, context);
 995                     else {
 996                         resetMatches(term, context);
 997                         freeParenthesesDisjunctionContext(context);
 998
 999                         if (parenthesesResult != JSRegExpNoMatch)
1000                             return parenthesesResult;
1001
1002                         break;
1003                     }
1004                 }
1005             } else {
1006                 resetMatches(term, context);
1007                 popParenthesesDisjunctionContext(backTrack);
1008                 freeParenthesesDisjunctionContext(context);
1009
1010                 if (result != JSRegExpNoMatch)
1011                     return result;
1012             }
1013
1014             if (backTrack->matchAmount) {
1015                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1016                 recordParenthesesMatch(term, context);
1017             }
1018             return JSRegExpMatch;
1019         }
1020
1021         case QuantifierNonGreedy: {
1022             // If we've not reached the limit, try to add one more match.
1023             if (backTrack->matchAmount < term.atom.quantityCount) {
1024                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1025                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1026                 if (result == JSRegExpMatch) {
1027                     appendParenthesesDisjunctionContext(backTrack, context);
1028                     recordParenthesesMatch(term, context);
1029                     return JSRegExpMatch;
1030                 }
1031
1032                 resetMatches(term, context);
1033                 freeParenthesesDisjunctionContext(context);
1034
1035                 if (result != JSRegExpNoMatch)
1036                     return result;
1037             }
1038
1039             // Nope - okay backtrack looking for an alternative.
1040             while (backTrack->matchAmount) {
1041                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1042                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1043                 if (result == JSRegExpMatch) {
1044                     // successful backtrack! we're back in the game!
1045                     if (backTrack->matchAmount) {
1046                         context = backTrack->lastContext;
1047                         recordParenthesesMatch(term, context);
1048                     }
1049                     return JSRegExpMatch;
1050                 }
1051
1052                 // pop a match off the stack
1053                 resetMatches(term, context);
1054                 popParenthesesDisjunctionContext(backTrack);
1055                 freeParenthesesDisjunctionContext(context);
1056
1057                 if (result != JSRegExpNoMatch)
1058                     return result;
1059             }
1060
1061             return JSRegExpNoMatch;
1062         }
1063         }
1064
1065         RELEASE_ASSERT_NOT_REACHED();
1066         return JSRegExpErrorNoMatch;
1067     }
1068
1069     bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
1070     {
1071         UNUSED_PARAM(term);
1072         unsigned matchBegin = context->matchBegin;
1073
1074         if (matchBegin) {
1075             for (matchBegin--; true; matchBegin--) {
1076                 if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
1077                     ++matchBegin;
1078                     break;
1079                 }
1080
1081                 if (!matchBegin)
1082                     break;
1083             }
1084         }
1085
1086         unsigned matchEnd = input.getPos();
1087
1088         for (; (matchEnd != input.end())
1089              && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
1090
1091         if (((matchBegin && term.anchors.m_bol)
1092              || ((matchEnd != input.end()) && term.anchors.m_eol))
1093             && !pattern->m_multiline)
1094             return false;
1095
1096         context->matchBegin = matchBegin;
1097         context->matchEnd = matchEnd;
1098         return true;
1099     }
1100
1101 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
1102 #define BACKTRACK() { --context->term; goto backtrack; }
1103 #define currentTerm() (disjunction->terms[context->term])
1104     JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1105     {
1106         if (!--remainingMatchCount)
1107             return JSRegExpErrorHitLimit;
1108
1109         if (btrack)
1110             BACKTRACK();
1111
1112         context->matchBegin = input.getPos();
1113         context->term = 0;
1114
1115     matchAgain:
1116         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1117
1118         switch (currentTerm().type) {
1119         case ByteTerm::TypeSubpatternBegin:
1120             MATCH_NEXT();
1121         case ByteTerm::TypeSubpatternEnd:
1122             context->matchEnd = input.getPos();
1123             return JSRegExpMatch;
1124
1125         case ByteTerm::TypeBodyAlternativeBegin:
1126             MATCH_NEXT();
1127         case ByteTerm::TypeBodyAlternativeDisjunction:
1128         case ByteTerm::TypeBodyAlternativeEnd:
1129             context->matchEnd = input.getPos();
1130             return JSRegExpMatch;
1131
1132         case ByteTerm::TypeAlternativeBegin:
1133             MATCH_NEXT();
1134         case ByteTerm::TypeAlternativeDisjunction:
1135         case ByteTerm::TypeAlternativeEnd: {
1136             int offset = currentTerm().alternative.end;
1137             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1138             backTrack->offset = offset;
1139             context->term += offset;
1140             MATCH_NEXT();
1141         }
1142
1143         case ByteTerm::TypeAssertionBOL:
1144             if (matchAssertionBOL(currentTerm()))
1145                 MATCH_NEXT();
1146             BACKTRACK();
1147         case ByteTerm::TypeAssertionEOL:
1148             if (matchAssertionEOL(currentTerm()))
1149                 MATCH_NEXT();
1150             BACKTRACK();
1151         case ByteTerm::TypeAssertionWordBoundary:
1152             if (matchAssertionWordBoundary(currentTerm()))
1153                 MATCH_NEXT();
1154             BACKTRACK();
1155
1156         case ByteTerm::TypePatternCharacterOnce:
1157         case ByteTerm::TypePatternCharacterFixed: {
1158             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1159                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount))
1160                     BACKTRACK();
1161             }
1162             MATCH_NEXT();
1163         }
1164         case ByteTerm::TypePatternCharacterGreedy: {
1165             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1166             unsigned matchAmount = 0;
1167             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1168                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {
1169                     input.uncheckInput(1);
1170                     break;
1171                 }
1172                 ++matchAmount;
1173             }
1174             backTrack->matchAmount = matchAmount;
1175
1176             MATCH_NEXT();
1177         }
1178         case ByteTerm::TypePatternCharacterNonGreedy: {
1179             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1180             backTrack->matchAmount = 0;
1181             MATCH_NEXT();
1182         }
1183
1184         case ByteTerm::TypePatternCasedCharacterOnce:
1185         case ByteTerm::TypePatternCasedCharacterFixed: {
1186             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1187                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
1188                     BACKTRACK();
1189             }
1190             MATCH_NEXT();
1191         }
1192         case ByteTerm::TypePatternCasedCharacterGreedy: {
1193             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1194             unsigned matchAmount = 0;
1195             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1196                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {
1197                     input.uncheckInput(1);
1198                     break;
1199                 }
1200                 ++matchAmount;
1201             }
1202             backTrack->matchAmount = matchAmount;
1203
1204             MATCH_NEXT();
1205         }
1206         case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1207             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1208             backTrack->matchAmount = 0;
1209             MATCH_NEXT();
1210         }
1211
1212         case ByteTerm::TypeCharacterClass:
1213             if (matchCharacterClass(currentTerm(), context))
1214                 MATCH_NEXT();
1215             BACKTRACK();
1216         case ByteTerm::TypeBackReference:
1217             if (matchBackReference(currentTerm(), context))
1218                 MATCH_NEXT();
1219             BACKTRACK();
1220         case ByteTerm::TypeParenthesesSubpattern: {
1221             JSRegExpResult result = matchParentheses(currentTerm(), context);
1222
1223             if (result == JSRegExpMatch) {
1224                 MATCH_NEXT();
1225             }  else if (result != JSRegExpNoMatch)
1226                 return result;
1227
1228             BACKTRACK();
1229         }
1230         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1231             if (matchParenthesesOnceBegin(currentTerm(), context))
1232                 MATCH_NEXT();
1233             BACKTRACK();
1234         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1235             if (matchParenthesesOnceEnd(currentTerm(), context))
1236                 MATCH_NEXT();
1237             BACKTRACK();
1238         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1239             if (matchParenthesesTerminalBegin(currentTerm(), context))
1240                 MATCH_NEXT();
1241             BACKTRACK();
1242         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1243             if (matchParenthesesTerminalEnd(currentTerm(), context))
1244                 MATCH_NEXT();
1245             BACKTRACK();
1246         case ByteTerm::TypeParentheticalAssertionBegin:
1247             if (matchParentheticalAssertionBegin(currentTerm(), context))
1248                 MATCH_NEXT();
1249             BACKTRACK();
1250         case ByteTerm::TypeParentheticalAssertionEnd:
1251             if (matchParentheticalAssertionEnd(currentTerm(), context))
1252                 MATCH_NEXT();
1253             BACKTRACK();
1254
1255         case ByteTerm::TypeCheckInput:
1256             if (input.checkInput(currentTerm().checkInputCount))
1257                 MATCH_NEXT();
1258             BACKTRACK();
1259
1260         case ByteTerm::TypeUncheckInput:
1261             input.uncheckInput(currentTerm().checkInputCount);
1262             MATCH_NEXT();
1263
1264         case ByteTerm::TypeDotStarEnclosure:
1265             if (matchDotStarEnclosure(currentTerm(), context))
1266                 return JSRegExpMatch;
1267             BACKTRACK();
1268         }
1269
1270         // We should never fall-through to here.
1271         RELEASE_ASSERT_NOT_REACHED();
1272
1273     backtrack:
1274         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1275
1276         switch (currentTerm().type) {
1277         case ByteTerm::TypeSubpatternBegin:
1278             return JSRegExpNoMatch;
1279         case ByteTerm::TypeSubpatternEnd:
1280             RELEASE_ASSERT_NOT_REACHED();
1281
1282         case ByteTerm::TypeBodyAlternativeBegin:
1283         case ByteTerm::TypeBodyAlternativeDisjunction: {
1284             int offset = currentTerm().alternative.next;
1285             context->term += offset;
1286             if (offset > 0)
1287                 MATCH_NEXT();
1288
1289             if (input.atEnd())
1290                 return JSRegExpNoMatch;
1291
1292             input.next();
1293
1294             context->matchBegin = input.getPos();
1295
1296             if (currentTerm().alternative.onceThrough)
1297                 context->term += currentTerm().alternative.next;
1298
1299             MATCH_NEXT();
1300         }
1301         case ByteTerm::TypeBodyAlternativeEnd:
1302             RELEASE_ASSERT_NOT_REACHED();
1303
1304         case ByteTerm::TypeAlternativeBegin:
1305         case ByteTerm::TypeAlternativeDisjunction: {
1306             int offset = currentTerm().alternative.next;
1307             context->term += offset;
1308             if (offset > 0)
1309                 MATCH_NEXT();
1310             BACKTRACK();
1311         }
1312         case ByteTerm::TypeAlternativeEnd: {
1313             // We should never backtrack back into an alternative of the main body of the regex.
1314             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1315             unsigned offset = backTrack->offset;
1316             context->term -= offset;
1317             BACKTRACK();
1318         }
1319
1320         case ByteTerm::TypeAssertionBOL:
1321         case ByteTerm::TypeAssertionEOL:
1322         case ByteTerm::TypeAssertionWordBoundary:
1323             BACKTRACK();
1324
1325         case ByteTerm::TypePatternCharacterOnce:
1326         case ByteTerm::TypePatternCharacterFixed:
1327         case ByteTerm::TypePatternCharacterGreedy:
1328         case ByteTerm::TypePatternCharacterNonGreedy:
1329             if (backtrackPatternCharacter(currentTerm(), context))
1330                 MATCH_NEXT();
1331             BACKTRACK();
1332         case ByteTerm::TypePatternCasedCharacterOnce:
1333         case ByteTerm::TypePatternCasedCharacterFixed:
1334         case ByteTerm::TypePatternCasedCharacterGreedy:
1335         case ByteTerm::TypePatternCasedCharacterNonGreedy:
1336             if (backtrackPatternCasedCharacter(currentTerm(), context))
1337                 MATCH_NEXT();
1338             BACKTRACK();
1339         case ByteTerm::TypeCharacterClass:
1340             if (backtrackCharacterClass(currentTerm(), context))
1341                 MATCH_NEXT();
1342             BACKTRACK();
1343         case ByteTerm::TypeBackReference:
1344             if (backtrackBackReference(currentTerm(), context))
1345                 MATCH_NEXT();
1346             BACKTRACK();
1347         case ByteTerm::TypeParenthesesSubpattern: {
1348             JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1349
1350             if (result == JSRegExpMatch) {
1351                 MATCH_NEXT();
1352             } else if (result != JSRegExpNoMatch)
1353                 return result;
1354
1355             BACKTRACK();
1356         }
1357         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1358             if (backtrackParenthesesOnceBegin(currentTerm(), context))
1359                 MATCH_NEXT();
1360             BACKTRACK();
1361         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1362             if (backtrackParenthesesOnceEnd(currentTerm(), context))
1363                 MATCH_NEXT();
1364             BACKTRACK();
1365         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1366             if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1367                 MATCH_NEXT();
1368             BACKTRACK();
1369         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1370             if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1371                 MATCH_NEXT();
1372             BACKTRACK();
1373         case ByteTerm::TypeParentheticalAssertionBegin:
1374             if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1375                 MATCH_NEXT();
1376             BACKTRACK();
1377         case ByteTerm::TypeParentheticalAssertionEnd:
1378             if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1379                 MATCH_NEXT();
1380             BACKTRACK();
1381
1382         case ByteTerm::TypeCheckInput:
1383             input.uncheckInput(currentTerm().checkInputCount);
1384             BACKTRACK();
1385
1386         case ByteTerm::TypeUncheckInput:
1387             input.checkInput(currentTerm().checkInputCount);
1388             BACKTRACK();
1389
1390         case ByteTerm::TypeDotStarEnclosure:
1391             RELEASE_ASSERT_NOT_REACHED();
1392         }
1393
1394         RELEASE_ASSERT_NOT_REACHED();
1395         return JSRegExpErrorNoMatch;
1396     }
1397
1398     JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1399     {
1400         JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1401
1402         if (result == JSRegExpMatch) {
1403             while (context->matchBegin == context->matchEnd) {
1404                 result = matchDisjunction(disjunction, context, true);
1405                 if (result != JSRegExpMatch)
1406                     return result;
1407             }
1408             return JSRegExpMatch;
1409         }
1410
1411         return result;
1412     }
1413
1414     unsigned interpret()
1415     {
1416         if (!input.isAvailableInput(0))
1417             return offsetNoMatch;
1418
1419         for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)
1420             output[i << 1] = offsetNoMatch;
1421
1422         allocatorPool = pattern->m_allocator->startAllocator();
1423         RELEASE_ASSERT(allocatorPool);
1424
1425         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1426
1427         JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
1428         if (result == JSRegExpMatch) {
1429             output[0] = context->matchBegin;
1430             output[1] = context->matchEnd;
1431         }
1432
1433         freeDisjunctionContext(context);
1434
1435         pattern->m_allocator->stopAllocator();
1436
1437         ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));
1438         return output[0];
1439     }
1440
1441     Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
1442         : pattern(pattern)
1443         , output(output)
1444         , input(input, start, length)
1445         , allocatorPool(0)
1446         , remainingMatchCount(matchLimit)
1447     {
1448     }
1449
1450 private:
1451     BytecodePattern* pattern;
1452     unsigned* output;
1453     InputStream input;
1454     BumpPointerPool* allocatorPool;
1455     unsigned remainingMatchCount;
1456 };
1457
1458 class ByteCompiler {
1459     struct ParenthesesStackEntry {
1460         unsigned beginTerm;
1461         unsigned savedAlternativeIndex;
1462         ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
1463             : beginTerm(beginTerm)
1464             , savedAlternativeIndex(savedAlternativeIndex)
1465         {
1466         }
1467     };
1468
1469 public:
1470     ByteCompiler(YarrPattern& pattern)
1471         : m_pattern(pattern)
1472     {
1473         m_currentAlternativeIndex = 0;
1474     }
1475
1476     PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
1477     {
1478         regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
1479         emitDisjunction(m_pattern.m_body);
1480         regexEnd();
1481
1482         return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator));
1483     }
1484
1485     void checkInput(unsigned count)
1486     {
1487         m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
1488     }
1489
1490     void uncheckInput(unsigned count)
1491     {
1492         m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
1493     }
1494
1495     void assertionBOL(unsigned inputPosition)
1496     {
1497         m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
1498     }
1499
1500     void assertionEOL(unsigned inputPosition)
1501     {
1502         m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
1503     }
1504
1505     void assertionWordBoundary(bool invert, unsigned inputPosition)
1506     {
1507         m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1508     }
1509
1510     void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1511     {
1512         if (m_pattern.m_ignoreCase) {
1513             UChar lo = Unicode::toLower(ch);
1514             UChar hi = Unicode::toUpper(ch);
1515
1516             if (lo != hi) {
1517                 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
1518                 return;
1519             }
1520         }
1521
1522         m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
1523     }
1524
1525     void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1526     {
1527         m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
1528
1529         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
1530         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1531         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1532     }
1533
1534     void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1535     {
1536         ASSERT(subpatternId);
1537
1538         m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1539
1540         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
1541         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1542         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1543     }
1544
1545     void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1546     {
1547         int beginTerm = m_bodyDisjunction->terms.size();
1548
1549         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1550         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1551         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1552         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1553
1554         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1555         m_currentAlternativeIndex = beginTerm + 1;
1556     }
1557
1558     void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1559     {
1560         int beginTerm = m_bodyDisjunction->terms.size();
1561
1562         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1563         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1564         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1565         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1566
1567         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1568         m_currentAlternativeIndex = beginTerm + 1;
1569     }
1570
1571     void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1572     {
1573         // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1574         // then fix this up at the end! - simplifying this should make it much clearer.
1575         // https://bugs.webkit.org/show_bug.cgi?id=50136
1576
1577         int beginTerm = m_bodyDisjunction->terms.size();
1578
1579         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1580         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1581         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1582         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1583
1584         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1585         m_currentAlternativeIndex = beginTerm + 1;
1586     }
1587
1588     void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1589     {
1590         int beginTerm = m_bodyDisjunction->terms.size();
1591
1592         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
1593         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1594         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1595         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1596
1597         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1598         m_currentAlternativeIndex = beginTerm + 1;
1599     }
1600
1601     void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1602     {
1603         unsigned beginTerm = popParenthesesStack();
1604         closeAlternative(beginTerm + 1);
1605         unsigned endTerm = m_bodyDisjunction->terms.size();
1606
1607         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1608
1609         bool invert = m_bodyDisjunction->terms[beginTerm].invert();
1610         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1611
1612         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1613         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1614         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1615         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1616
1617         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1618         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1619         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1620         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1621     }
1622
1623     void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
1624     {
1625         m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
1626     }
1627
1628     unsigned popParenthesesStack()
1629     {
1630         ASSERT(m_parenthesesStack.size());
1631         int stackEnd = m_parenthesesStack.size() - 1;
1632         unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
1633         m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
1634         m_parenthesesStack.shrink(stackEnd);
1635
1636         ASSERT(beginTerm < m_bodyDisjunction->terms.size());
1637         ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
1638
1639         return beginTerm;
1640     }
1641
1642 #ifndef NDEBUG
1643     void dumpDisjunction(ByteDisjunction* disjunction)
1644     {
1645         dataLogF("ByteDisjunction(%p):\n\t", disjunction);
1646         for (unsigned i = 0; i < disjunction->terms.size(); ++i)
1647             dataLogF("{ %d } ", disjunction->terms[i].type);
1648         dataLogF("\n");
1649     }
1650 #endif
1651
1652     void closeAlternative(int beginTerm)
1653     {
1654         int origBeginTerm = beginTerm;
1655         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1656         int endIndex = m_bodyDisjunction->terms.size();
1657
1658         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1659
1660         if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
1661             m_bodyDisjunction->terms.remove(beginTerm);
1662         else {
1663             while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1664                 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1665                 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1666                 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1667                 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1668             }
1669
1670             m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1671
1672             m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
1673             m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1674         }
1675     }
1676
1677     void closeBodyAlternative()
1678     {
1679         int beginTerm = 0;
1680         int origBeginTerm = 0;
1681         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1682         int endIndex = m_bodyDisjunction->terms.size();
1683
1684         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1685
1686         while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1687             beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1688             ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1689             m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1690             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1691         }
1692
1693         m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1694
1695         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
1696         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1697     }
1698
1699     void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
1700     {
1701         unsigned beginTerm = popParenthesesStack();
1702         closeAlternative(beginTerm + 1);
1703         unsigned endTerm = m_bodyDisjunction->terms.size();
1704
1705         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1706
1707         ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
1708
1709         bool capture = parenthesesBegin.capture();
1710         unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1711
1712         unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
1713         OwnPtr<ByteDisjunction> parenthesesDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
1714
1715         unsigned firstTermInParentheses = beginTerm + 1;
1716         parenthesesDisjunction->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + 2);
1717
1718         parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
1719         for (unsigned termInParentheses = firstTermInParentheses; termInParentheses < endTerm; ++termInParentheses)
1720             parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
1721         parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
1722
1723         m_bodyDisjunction->terms.shrink(beginTerm);
1724
1725         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition));
1726         m_allParenthesesInfo.append(parenthesesDisjunction.release());
1727
1728         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1729         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1730         m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1731     }
1732
1733     void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1734     {
1735         unsigned beginTerm = popParenthesesStack();
1736         closeAlternative(beginTerm + 1);
1737         unsigned endTerm = m_bodyDisjunction->terms.size();
1738
1739         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1740
1741         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1742         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1743
1744         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1745         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1746         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1747         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1748
1749         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1750         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1751         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1752         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1753     }
1754
1755     void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1756     {
1757         unsigned beginTerm = popParenthesesStack();
1758         closeAlternative(beginTerm + 1);
1759         unsigned endTerm = m_bodyDisjunction->terms.size();
1760
1761         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1762
1763         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1764         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1765
1766         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1767         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1768         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1769         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1770
1771         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1772         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1773         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1774         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1775     }
1776
1777     void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
1778     {
1779         m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
1780         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
1781         m_bodyDisjunction->terms[0].frameLocation = 0;
1782         m_currentAlternativeIndex = 0;
1783     }
1784
1785     void regexEnd()
1786     {
1787         closeBodyAlternative();
1788     }
1789
1790     void alternativeBodyDisjunction(bool onceThrough)
1791     {
1792         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1793         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1794         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
1795
1796         m_currentAlternativeIndex = newAlternativeIndex;
1797     }
1798
1799     void alternativeDisjunction()
1800     {
1801         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1802         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1803         m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
1804
1805         m_currentAlternativeIndex = newAlternativeIndex;
1806     }
1807
1808     void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
1809     {
1810         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
1811             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
1812
1813             PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
1814
1815             if (alt) {
1816                 if (disjunction == m_pattern.m_body)
1817                     alternativeBodyDisjunction(alternative->onceThrough());
1818                 else
1819                     alternativeDisjunction();
1820             }
1821
1822             unsigned minimumSize = alternative->m_minimumSize;
1823             ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
1824             unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
1825
1826             if (countToCheck) {
1827                 checkInput(countToCheck);
1828                 currentCountAlreadyChecked += countToCheck;
1829             }
1830
1831             for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
1832                 PatternTerm& term = alternative->m_terms[i];
1833
1834                 switch (term.type) {
1835                 case PatternTerm::TypeAssertionBOL:
1836                     assertionBOL(currentCountAlreadyChecked - term.inputPosition);
1837                     break;
1838
1839                 case PatternTerm::TypeAssertionEOL:
1840                     assertionEOL(currentCountAlreadyChecked - term.inputPosition);
1841                     break;
1842
1843                 case PatternTerm::TypeAssertionWordBoundary:
1844                     assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);
1845                     break;
1846
1847                 case PatternTerm::TypePatternCharacter:
1848                     atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1849                     break;
1850
1851                 case PatternTerm::TypeCharacterClass:
1852                     atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1853                     break;
1854
1855                 case PatternTerm::TypeBackReference:
1856                     atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1857                         break;
1858
1859                 case PatternTerm::TypeForwardReference:
1860                     break;
1861
1862                 case PatternTerm::TypeParenthesesSubpattern: {
1863                     unsigned disjunctionAlreadyCheckedCount = 0;
1864                     if (term.quantityCount == 1 && !term.parentheses.isCopy) {
1865                         unsigned alternativeFrameLocation = term.frameLocation;
1866                         // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
1867                         if (term.quantityType == QuantifierFixedCount)
1868                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
1869                         else
1870                             alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1871                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1872                         atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
1873                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1874                         atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1875                     } else if (term.parentheses.isTerminal) {
1876                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1877                         atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
1878                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1879                         atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1880                     } else {
1881                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1882                         atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0);
1883                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
1884                         atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
1885                     }
1886                     break;
1887                 }
1888
1889                 case PatternTerm::TypeParentheticalAssertion: {
1890                     unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
1891
1892                     ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
1893                     unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition);
1894                     unsigned uncheckAmount = 0;
1895                     if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
1896                         uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
1897                         uncheckInput(uncheckAmount);
1898                         currentCountAlreadyChecked -= uncheckAmount;
1899                     }
1900
1901                     atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
1902                     emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
1903                     atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
1904                     if (uncheckAmount) {
1905                         checkInput(uncheckAmount);
1906                         currentCountAlreadyChecked += uncheckAmount;
1907                     }
1908                     break;
1909                 }
1910
1911                 case PatternTerm::TypeDotStarEnclosure:
1912                     assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
1913                     break;
1914                 }
1915             }
1916         }
1917     }
1918
1919 private:
1920     YarrPattern& m_pattern;
1921     OwnPtr<ByteDisjunction> m_bodyDisjunction;
1922     unsigned m_currentAlternativeIndex;
1923     Vector<ParenthesesStackEntry> m_parenthesesStack;
1924     Vector<OwnPtr<ByteDisjunction> > m_allParenthesesInfo;
1925 };
1926
1927 PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
1928 {
1929     return ByteCompiler(pattern).compile(allocator);
1930 }
1931
1932 unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
1933 {
1934     if (input.is8Bit())
1935         return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret();
1936     return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret();
1937 }
1938
1939 unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
1940 {
1941     return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
1942 }
1943
1944 unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
1945 {
1946     return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
1947 }
1948
1949 // These should be the same for both UChar & LChar.
1950 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
1951 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
1952 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
1953 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
1954 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
1955 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
1956 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
1957
1958
1959 } }