yarr/YarrInterpreter.cpp

   1 /*
   2  * Copyright (C) 2009 Apple Inc. All rights reserved.
   3  * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
  15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
  18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25  */
  26
  27 #include "config.h"
  28 #include "YarrInterpreter.h"
  29
  30 #include "UString.h"
  31 #include "Yarr.h"
  32 #include "YarrCanonicalizeUCS2.h"
  33 #include <wtf/BumpPointerAllocator.h>
  34 #include <wtf/DataLog.h>
  35 #include <wtf/text/CString.h>
  36
  37 #ifndef NDEBUG
  38 #include <stdio.h>
  39 #endif
  40
  41 using namespace WTF;
  42
  43 namespace JSC { namespace Yarr {
  44
  45 template<typename CharType>
  46 class Interpreter {
  47 public:
  48     struct ParenthesesDisjunctionContext;
  49
  50     struct BackTrackInfoPatternCharacter {
  51         uintptr_t matchAmount;
  52     };
  53     struct BackTrackInfoCharacterClass {
  54         uintptr_t matchAmount;
  55     };
  56     struct BackTrackInfoBackReference {
  57         uintptr_t begin; // Not really needed for greedy quantifiers.
  58         uintptr_t matchAmount; // Not really needed for fixed quantifiers.
  59     };
  60     struct BackTrackInfoAlternative {
  61         uintptr_t offset;
  62     };
  63     struct BackTrackInfoParentheticalAssertion {
  64         uintptr_t begin;
  65     };
  66     struct BackTrackInfoParenthesesOnce {
  67         uintptr_t begin;
  68     };
  69     struct BackTrackInfoParenthesesTerminal {
  70         uintptr_t begin;
  71     };
  72     struct BackTrackInfoParentheses {
  73         uintptr_t matchAmount;
  74         ParenthesesDisjunctionContext* lastContext;
  75     };
  76
  77     static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
  78     {
  79         context->next = backTrack->lastContext;
  80         backTrack->lastContext = context;
  81         ++backTrack->matchAmount;
  82     }
  83
  84     static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
  85     {
  86         ASSERT(backTrack->matchAmount);
  87         ASSERT(backTrack->lastContext);
  88         backTrack->lastContext = backTrack->lastContext->next;
  89         --backTrack->matchAmount;
  90     }
  91
  92     struct DisjunctionContext
  93     {
  94         DisjunctionContext()
  95             : term(0)
  96         {
  97         }
  98
  99         void* operator new(size_t, void* where)
 100         {
 101             return where;
 102         }
 103
 104         int term;
 105         unsigned matchBegin;
 106         unsigned matchEnd;
 107         uintptr_t frame[1];
 108     };
 109
 110     DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
 111     {
 112         size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
 113         allocatorPool = allocatorPool->ensureCapacity(size);
 114         if (!allocatorPool)
 115             CRASH();
 116         return new (allocatorPool->alloc(size)) DisjunctionContext();
 117     }
 118
 119     void freeDisjunctionContext(DisjunctionContext* context)
 120     {
 121         allocatorPool = allocatorPool->dealloc(context);
 122     }
 123
 124     struct ParenthesesDisjunctionContext
 125     {
 126         ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
 127             : next(0)
 128         {
 129             unsigned firstSubpatternId = term.atom.subpatternId;
 130             unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
 131
 132             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
 133                 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
 134                 output[(firstSubpatternId << 1) + i] = offsetNoMatch;
 135             }
 136
 137             new (getDisjunctionContext(term)) DisjunctionContext();
 138         }
 139
 140         void* operator new(size_t, void* where)
 141         {
 142             return where;
 143         }
 144
 145         void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
 146         {
 147             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
 148                 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
 149         }
 150
 151         DisjunctionContext* getDisjunctionContext(ByteTerm& term)
 152         {
 153             return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
 154         }
 155
 156         ParenthesesDisjunctionContext* next;
 157         unsigned subpatternBackup[1];
 158     };
 159
 160     ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
 161     {
 162         size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
 163         allocatorPool = allocatorPool->ensureCapacity(size);
 164         if (!allocatorPool)
 165             CRASH();
 166         return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
 167     }
 168
 169     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
 170     {
 171         allocatorPool = allocatorPool->dealloc(context);
 172     }
 173
 174     class InputStream {
 175     public:
 176         InputStream(const CharType* input, unsigned start, unsigned length)
 177             : input(input)
 178             , pos(start)
 179             , length(length)
 180         {
 181         }
 182
 183         void next()
 184         {
 185             ++pos;
 186         }
 187
 188         void rewind(unsigned amount)
 189         {
 190             ASSERT(pos >= amount);
 191             pos -= amount;
 192         }
 193
 194         int read()
 195         {
 196             ASSERT(pos < length);
 197             if (pos < length)
 198                 return input[pos];
 199             return -1;
 200         }
 201
 202         int readPair()
 203         {
 204             ASSERT(pos + 1 < length);
 205             return input[pos] | input[pos + 1] << 16;
 206         }
 207
 208         int readChecked(unsigned negativePositionOffest)
 209         {
 210             if (pos < negativePositionOffest)
 211                 CRASH();
 212             unsigned p = pos - negativePositionOffest;
 213             ASSERT(p < length);
 214             return input[p];
 215         }
 216
 217         int reread(unsigned from)
 218         {
 219             ASSERT(from < length);
 220             return input[from];
 221         }
 222
 223         int prev()
 224         {
 225             ASSERT(!(pos > length));
 226             if (pos && length)
 227                 return input[pos - 1];
 228             return -1;
 229         }
 230
 231         unsigned getPos()
 232         {
 233             return pos;
 234         }
 235
 236         void setPos(unsigned p)
 237         {
 238             pos = p;
 239         }
 240
 241         bool atStart()
 242         {
 243             return pos == 0;
 244         }
 245
 246         bool atEnd()
 247         {
 248             return pos == length;
 249         }
 250
 251         unsigned end()
 252         {
 253             return length;
 254         }
 255
 256         bool checkInput(unsigned count)
 257         {
 258             if (((pos + count) <= length) && ((pos + count) >= pos)) {
 259                 pos += count;
 260                 return true;
 261             }
 262             return false;
 263         }
 264
 265         void uncheckInput(unsigned count)
 266         {
 267             if (pos < count)
 268                 CRASH();
 269             pos -= count;
 270         }
 271
 272         bool atStart(unsigned negativePositionOffest)
 273         {
 274             return pos == negativePositionOffest;
 275         }
 276
 277         bool atEnd(unsigned negativePositionOffest)
 278         {
 279             if (pos < negativePositionOffest)
 280                 CRASH();
 281             return (pos - negativePositionOffest) == length;
 282         }
 283
 284         bool isAvailableInput(unsigned offset)
 285         {
 286             return (((pos + offset) <= length) && ((pos + offset) >= pos));
 287         }
 288
 289     private:
 290         const CharType* input;
 291         unsigned pos;
 292         unsigned length;
 293     };
 294
 295     bool testCharacterClass(CharacterClass* characterClass, int ch)
 296     {
 297         if (ch & 0xFF80) {
 298             for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
 299                 if (ch == characterClass->m_matchesUnicode[i])
 300                     return true;
 301             for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
 302                 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
 303                     return true;
 304         } else {
 305             for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
 306                 if (ch == characterClass->m_matches[i])
 307                     return true;
 308             for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
 309                 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
 310                     return true;
 311         }
 312
 313         return false;
 314     }
 315
 316     bool checkCharacter(int testChar, unsigned negativeInputOffset)
 317     {
 318         return testChar == input.readChecked(negativeInputOffset);
 319     }
 320
 321     bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
 322     {
 323         int ch = input.readChecked(negativeInputOffset);
 324         return (loChar == ch) || (hiChar == ch);
 325     }
 326
 327     bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
 328     {
 329         bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
 330         return invert ? !match : match;
 331     }
 332
 333     bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
 334     {
 335         unsigned matchSize = (unsigned)(matchEnd - matchBegin);
 336
 337         if (!input.checkInput(matchSize))
 338             return false;
 339
 340         if (pattern->m_ignoreCase) {
 341             for (unsigned i = 0; i < matchSize; ++i) {
 342                 int oldCh = input.reread(matchBegin + i);
 343                 int ch = input.readChecked(negativeInputOffset + matchSize - i);
 344
 345                 if (oldCh == ch)
 346                     continue;
 347
 348                 // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that
 349                 // unicode values are never allowed to match against ascii ones.
 350                 if (isASCII(oldCh) || isASCII(ch)) {
 351                     if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
 352                         continue;
 353                 } else if (areCanonicallyEquivalent(oldCh, ch))
 354                     continue;
 355
 356                 input.uncheckInput(matchSize);
 357                 return false;
 358             }
 359         } else {
 360             for (unsigned i = 0; i < matchSize; ++i) {
 361                 if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) {
 362                     input.uncheckInput(matchSize);
 363                     return false;
 364                 }
 365             }
 366         }
 367
 368         return true;
 369     }
 370
 371     bool matchAssertionBOL(ByteTerm& term)
 372     {
 373         return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
 374     }
 375
 376     bool matchAssertionEOL(ByteTerm& term)
 377     {
 378         if (term.inputPosition)
 379             return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
 380
 381         return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
 382     }
 383
 384     bool matchAssertionWordBoundary(ByteTerm& term)
 385     {
 386         bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1));
 387         bool readIsWordchar;
 388         if (term.inputPosition)
 389             readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
 390         else
 391             readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
 392
 393         bool wordBoundary = prevIsWordchar != readIsWordchar;
 394         return term.invert() ? !wordBoundary : wordBoundary;
 395     }
 396
 397     bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
 398     {
 399         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
 400
 401         switch (term.atom.quantityType) {
 402         case QuantifierFixedCount:
 403             break;
 404
 405         case QuantifierGreedy:
 406             if (backTrack->matchAmount) {
 407                 --backTrack->matchAmount;
 408                 input.uncheckInput(1);
 409                 return true;
 410             }
 411             break;
 412
 413         case QuantifierNonGreedy:
 414             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
 415                 ++backTrack->matchAmount;
 416                 if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))
 417                     return true;
 418             }
 419             input.uncheckInput(backTrack->matchAmount);
 420             break;
 421         }
 422
 423         return false;
 424     }
 425
 426     bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
 427     {
 428         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
 429
 430         switch (term.atom.quantityType) {
 431         case QuantifierFixedCount:
 432             break;
 433
 434         case QuantifierGreedy:
 435             if (backTrack->matchAmount) {
 436                 --backTrack->matchAmount;
 437                 input.uncheckInput(1);
 438                 return true;
 439             }
 440             break;
 441
 442         case QuantifierNonGreedy:
 443             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
 444                 ++backTrack->matchAmount;
 445                 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))
 446                     return true;
 447             }
 448             input.uncheckInput(backTrack->matchAmount);
 449             break;
 450         }
 451
 452         return false;
 453     }
 454
 455     bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
 456     {
 457         ASSERT(term.type == ByteTerm::TypeCharacterClass);
 458         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
 459
 460         switch (term.atom.quantityType) {
 461         case QuantifierFixedCount: {
 462             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
 463                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
 464                     return false;
 465             }
 466             return true;
 467         }
 468
 469         case QuantifierGreedy: {
 470             unsigned matchAmount = 0;
 471             while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
 472                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
 473                     input.uncheckInput(1);
 474                     break;
 475                 }
 476                 ++matchAmount;
 477             }
 478             backTrack->matchAmount = matchAmount;
 479
 480             return true;
 481         }
 482
 483         case QuantifierNonGreedy:
 484             backTrack->matchAmount = 0;
 485             return true;
 486         }
 487
 488         ASSERT_NOT_REACHED();
 489         return false;
 490     }
 491
 492     bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
 493     {
 494         ASSERT(term.type == ByteTerm::TypeCharacterClass);
 495         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
 496
 497         switch (term.atom.quantityType) {
 498         case QuantifierFixedCount:
 499             break;
 500
 501         case QuantifierGreedy:
 502             if (backTrack->matchAmount) {
 503                 --backTrack->matchAmount;
 504                 input.uncheckInput(1);
 505                 return true;
 506             }
 507             break;
 508
 509         case QuantifierNonGreedy:
 510             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
 511                 ++backTrack->matchAmount;
 512                 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))
 513                     return true;
 514             }
 515             input.uncheckInput(backTrack->matchAmount);
 516             break;
 517         }
 518
 519         return false;
 520     }
 521
 522     bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
 523     {
 524         ASSERT(term.type == ByteTerm::TypeBackReference);
 525         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
 526
 527         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
 528         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
 529
 530         // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
 531         // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
 532         // Eg.: /(a\1)/
 533         if (matchEnd == offsetNoMatch)
 534             return true;
 535
 536         if (matchBegin == offsetNoMatch)
 537             return true;
 538
 539         ASSERT(matchBegin <= matchEnd);
 540
 541         if (matchBegin == matchEnd)
 542             return true;
 543
 544         switch (term.atom.quantityType) {
 545         case QuantifierFixedCount: {
 546             backTrack->begin = input.getPos();
 547             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
 548                 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
 549                     input.setPos(backTrack->begin);
 550                     return false;
 551                 }
 552             }
 553             return true;
 554         }
 555
 556         case QuantifierGreedy: {
 557             unsigned matchAmount = 0;
 558             while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
 559                 ++matchAmount;
 560             backTrack->matchAmount = matchAmount;
 561             return true;
 562         }
 563
 564         case QuantifierNonGreedy:
 565             backTrack->begin = input.getPos();
 566             backTrack->matchAmount = 0;
 567             return true;
 568         }
 569
 570         ASSERT_NOT_REACHED();
 571         return false;
 572     }
 573
 574     bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
 575     {
 576         ASSERT(term.type == ByteTerm::TypeBackReference);
 577         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
 578
 579         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
 580         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
 581
 582         if (matchBegin == offsetNoMatch)
 583             return false;
 584
 585         ASSERT(matchBegin <= matchEnd);
 586
 587         if (matchBegin == matchEnd)
 588             return false;
 589
 590         switch (term.atom.quantityType) {
 591         case QuantifierFixedCount:
 592             // for quantityCount == 1, could rewind.
 593             input.setPos(backTrack->begin);
 594             break;
 595
 596         case QuantifierGreedy:
 597             if (backTrack->matchAmount) {
 598                 --backTrack->matchAmount;
 599                 input.rewind(matchEnd - matchBegin);
 600                 return true;
 601             }
 602             break;
 603
 604         case QuantifierNonGreedy:
 605             if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
 606                 ++backTrack->matchAmount;
 607                 return true;
 608             }
 609             input.setPos(backTrack->begin);
 610             break;
 611         }
 612
 613         return false;
 614     }
 615
 616     void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
 617     {
 618         if (term.capture()) {
 619             unsigned subpatternId = term.atom.subpatternId;
 620             output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
 621             output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
 622         }
 623     }
 624     void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
 625     {
 626         unsigned firstSubpatternId = term.atom.subpatternId;
 627         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
 628         context->restoreOutput(output, firstSubpatternId, count);
 629     }
 630     JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
 631     {
 632         while (backTrack->matchAmount) {
 633             ParenthesesDisjunctionContext* context = backTrack->lastContext;
 634
 635             JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
 636             if (result == JSRegExpMatch)
 637                 return JSRegExpMatch;
 638
 639             resetMatches(term, context);
 640             popParenthesesDisjunctionContext(backTrack);
 641             freeParenthesesDisjunctionContext(context);
 642
 643             if (result != JSRegExpNoMatch)
 644                 return result;
 645         }
 646
 647         return JSRegExpNoMatch;
 648     }
 649
 650     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
 651     {
 652         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
 653         ASSERT(term.atom.quantityCount == 1);
 654
 655         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
 656
 657         switch (term.atom.quantityType) {
 658         case QuantifierGreedy: {
 659             // set this speculatively; if we get to the parens end this will be true.
 660             backTrack->begin = input.getPos();
 661             break;
 662         }
 663         case QuantifierNonGreedy: {
 664             backTrack->begin = notFound;
 665             context->term += term.atom.parenthesesWidth;
 666             return true;
 667         }
 668         case QuantifierFixedCount:
 669             break;
 670         }
 671
 672         if (term.capture()) {
 673             unsigned subpatternId = term.atom.subpatternId;
 674             output[(subpatternId << 1)] = input.getPos() - term.inputPosition;
 675         }
 676
 677         return true;
 678     }
 679
 680     bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
 681     {
 682         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
 683         ASSERT(term.atom.quantityCount == 1);
 684
 685         if (term.capture()) {
 686             unsigned subpatternId = term.atom.subpatternId;
 687             output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
 688         }
 689
 690         if (term.atom.quantityType == QuantifierFixedCount)
 691             return true;
 692
 693         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
 694         return backTrack->begin != input.getPos();
 695     }
 696
 697     bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
 698     {
 699         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
 700         ASSERT(term.atom.quantityCount == 1);
 701
 702         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
 703
 704         if (term.capture()) {
 705             unsigned subpatternId = term.atom.subpatternId;
 706             output[(subpatternId << 1)] = offsetNoMatch;
 707             output[(subpatternId << 1) + 1] = offsetNoMatch;
 708         }
 709
 710         switch (term.atom.quantityType) {
 711         case QuantifierGreedy:
 712             // if we backtrack to this point, there is another chance - try matching nothing.
 713             ASSERT(backTrack->begin != notFound);
 714             backTrack->begin = notFound;
 715             context->term += term.atom.parenthesesWidth;
 716             return true;
 717         case QuantifierNonGreedy:
 718             ASSERT(backTrack->begin != notFound);
 719         case QuantifierFixedCount:
 720             break;
 721         }
 722
 723         return false;
 724     }
 725
 726     bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
 727     {
 728         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
 729         ASSERT(term.atom.quantityCount == 1);
 730
 731         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
 732
 733         switch (term.atom.quantityType) {
 734         case QuantifierGreedy:
 735             if (backTrack->begin == notFound) {
 736                 context->term -= term.atom.parenthesesWidth;
 737                 return false;
 738             }
 739         case QuantifierNonGreedy:
 740             if (backTrack->begin == notFound) {
 741                 backTrack->begin = input.getPos();
 742                 if (term.capture()) {
 743                     // Technically this access to inputPosition should be accessing the begin term's
 744                     // inputPosition, but for repeats other than fixed these values should be
 745                     // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
 746                     ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
 747                     ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
 748                     unsigned subpatternId = term.atom.subpatternId;
 749                     output[subpatternId << 1] = input.getPos() + term.inputPosition;
 750                 }
 751                 context->term -= term.atom.parenthesesWidth;
 752                 return true;
 753             }
 754         case QuantifierFixedCount:
 755             break;
 756         }
 757
 758         return false;
 759     }
 760
 761     bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
 762     {
 763         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
 764         ASSERT(term.atom.quantityType == QuantifierGreedy);
 765         ASSERT(term.atom.quantityCount == quantifyInfinite);
 766         ASSERT(!term.capture());
 767
 768         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
 769         backTrack->begin = input.getPos();
 770         return true;
 771     }
 772
 773     bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
 774     {
 775         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
 776
 777         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
 778         // Empty match is a failed match.
 779         if (backTrack->begin == input.getPos())
 780             return false;
 781
 782         // Successful match! Okay, what's next? - loop around and try to match moar!
 783         context->term -= (term.atom.parenthesesWidth + 1);
 784         return true;
 785     }
 786
 787     bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
 788     {
 789         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
 790         ASSERT(term.atom.quantityType == QuantifierGreedy);
 791         ASSERT(term.atom.quantityCount == quantifyInfinite);
 792         ASSERT(!term.capture());
 793
 794         // If we backtrack to this point, we have failed to match this iteration of the parens.
 795         // Since this is greedy / zero minimum a failed is also accepted as a match!
 796         context->term += term.atom.parenthesesWidth;
 797         return true;
 798     }
 799
 800     bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
 801     {
 802         // 'Terminal' parentheses are at the end of the regex, and as such a match past end
 803         // should always be returned as a successful match - we should never backtrack to here.
 804         ASSERT_NOT_REACHED();
 805         return false;
 806     }
 807
 808     bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
 809     {
 810         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
 811         ASSERT(term.atom.quantityCount == 1);
 812
 813         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
 814
 815         backTrack->begin = input.getPos();
 816         return true;
 817     }
 818
 819     bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
 820     {
 821         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
 822         ASSERT(term.atom.quantityCount == 1);
 823
 824         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
 825
 826         input.setPos(backTrack->begin);
 827
 828         // We've reached the end of the parens; if they are inverted, this is failure.
 829         if (term.invert()) {
 830             context->term -= term.atom.parenthesesWidth;
 831             return false;
 832         }
 833
 834         return true;
 835     }
 836
 837     bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
 838     {
 839         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
 840         ASSERT(term.atom.quantityCount == 1);
 841
 842         // We've failed to match parens; if they are inverted, this is win!
 843         if (term.invert()) {
 844             context->term += term.atom.parenthesesWidth;
 845             return true;
 846         }
 847
 848         return false;
 849     }
 850
 851     bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
 852     {
 853         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
 854         ASSERT(term.atom.quantityCount == 1);
 855
 856         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
 857
 858         input.setPos(backTrack->begin);
 859
 860         context->term -= term.atom.parenthesesWidth;
 861         return false;
 862     }
 863
 864     JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
 865     {
 866         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
 867
 868         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
 869         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
 870
 871         backTrack->matchAmount = 0;
 872         backTrack->lastContext = 0;
 873
 874         switch (term.atom.quantityType) {
 875         case QuantifierFixedCount: {
 876             // While we haven't yet reached our fixed limit,
 877             while (backTrack->matchAmount < term.atom.quantityCount) {
 878                 // Try to do a match, and it it succeeds, add it to the list.
 879                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
 880                 JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
 881                 if (result == JSRegExpMatch)
 882                     appendParenthesesDisjunctionContext(backTrack, context);
 883                 else {
 884                     // The match failed; try to find an alternate point to carry on from.
 885                     resetMatches(term, context);
 886                     freeParenthesesDisjunctionContext(context);
 887
 888                     if (result != JSRegExpNoMatch)
 889                         return result;
 890                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
 891                     if (backtrackResult != JSRegExpMatch)
 892                         return backtrackResult;
 893                 }
 894             }
 895
 896             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
 897             ParenthesesDisjunctionContext* context = backTrack->lastContext;
 898             recordParenthesesMatch(term, context);
 899             return JSRegExpMatch;
 900         }
 901
 902         case QuantifierGreedy: {
 903             while (backTrack->matchAmount < term.atom.quantityCount) {
 904                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
 905                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
 906                 if (result == JSRegExpMatch)
 907                     appendParenthesesDisjunctionContext(backTrack, context);
 908                 else {
 909                     resetMatches(term, context);
 910                     freeParenthesesDisjunctionContext(context);
 911
 912                     if (result != JSRegExpNoMatch)
 913                         return result;
 914
 915                     break;
 916                 }
 917             }
 918
 919             if (backTrack->matchAmount) {
 920                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
 921                 recordParenthesesMatch(term, context);
 922             }
 923             return JSRegExpMatch;
 924         }
 925
 926         case QuantifierNonGreedy:
 927             return JSRegExpMatch;
 928         }
 929
 930         ASSERT_NOT_REACHED();
 931         return JSRegExpErrorNoMatch;
 932     }
 933
 934     // Rules for backtracking differ depending on whether this is greedy or non-greedy.
 935     //
 936     // Greedy matches never should try just adding more - you should already have done
 937     // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where
 938     // you backtrack an item off the list needs checking, since we'll never have matched
 939     // the one less case.  Tracking forwards, still add as much as possible.
 940     //
 941     // Non-greedy, we've already done the one less case, so don't match on popping.
 942     // We haven't done the one more case, so always try to add that.
 943     //
 944     JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
 945     {
 946         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
 947
 948         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
 949         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
 950
 951         switch (term.atom.quantityType) {
 952         case QuantifierFixedCount: {
 953             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
 954
 955             ParenthesesDisjunctionContext* context = 0;
 956             JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
 957
 958             if (result != JSRegExpMatch)
 959                 return result;
 960
 961             // While we haven't yet reached our fixed limit,
 962             while (backTrack->matchAmount < term.atom.quantityCount) {
 963                 // Try to do a match, and it it succeeds, add it to the list.
 964                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
 965                 result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
 966
 967                 if (result == JSRegExpMatch)
 968                     appendParenthesesDisjunctionContext(backTrack, context);
 969                 else {
 970                     // The match failed; try to find an alternate point to carry on from.
 971                     resetMatches(term, context);
 972                     freeParenthesesDisjunctionContext(context);
 973
 974                     if (result != JSRegExpNoMatch)
 975                         return result;
 976                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
 977                     if (backtrackResult != JSRegExpMatch)
 978                         return backtrackResult;
 979                 }
 980             }
 981
 982             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
 983             context = backTrack->lastContext;
 984             recordParenthesesMatch(term, context);
 985             return JSRegExpMatch;
 986         }
 987
 988         case QuantifierGreedy: {
 989             if (!backTrack->matchAmount)
 990                 return JSRegExpNoMatch;
 991
 992             ParenthesesDisjunctionContext* context = backTrack->lastContext;
 993             JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
 994             if (result == JSRegExpMatch) {
 995                 while (backTrack->matchAmount < term.atom.quantityCount) {
 996                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
 997                     JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
 998                     if (parenthesesResult == JSRegExpMatch)
 999                         appendParenthesesDisjunctionContext(backTrack, context);
1000                     else {
1001                         resetMatches(term, context);
1002                         freeParenthesesDisjunctionContext(context);
1003
1004                         if (parenthesesResult != JSRegExpNoMatch)
1005                             return parenthesesResult;
1006
1007                         break;
1008                     }
1009                 }
1010             } else {
1011                 resetMatches(term, context);
1012                 popParenthesesDisjunctionContext(backTrack);
1013                 freeParenthesesDisjunctionContext(context);
1014
1015                 if (result != JSRegExpNoMatch)
1016                     return result;
1017             }
1018
1019             if (backTrack->matchAmount) {
1020                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1021                 recordParenthesesMatch(term, context);
1022             }
1023             return JSRegExpMatch;
1024         }
1025
1026         case QuantifierNonGreedy: {
1027             // If we've not reached the limit, try to add one more match.
1028             if (backTrack->matchAmount < term.atom.quantityCount) {
1029                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1030                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1031                 if (result == JSRegExpMatch) {
1032                     appendParenthesesDisjunctionContext(backTrack, context);
1033                     recordParenthesesMatch(term, context);
1034                     return JSRegExpMatch;
1035                 }
1036
1037                 resetMatches(term, context);
1038                 freeParenthesesDisjunctionContext(context);
1039
1040                 if (result != JSRegExpNoMatch)
1041                     return result;
1042             }
1043
1044             // Nope - okay backtrack looking for an alternative.
1045             while (backTrack->matchAmount) {
1046                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1047                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1048                 if (result == JSRegExpMatch) {
1049                     // successful backtrack! we're back in the game!
1050                     if (backTrack->matchAmount) {
1051                         context = backTrack->lastContext;
1052                         recordParenthesesMatch(term, context);
1053                     }
1054                     return JSRegExpMatch;
1055                 }
1056
1057                 // pop a match off the stack
1058                 resetMatches(term, context);
1059                 popParenthesesDisjunctionContext(backTrack);
1060                 freeParenthesesDisjunctionContext(context);
1061
1062                 if (result != JSRegExpNoMatch)
1063                     return result;
1064             }
1065
1066             return JSRegExpNoMatch;
1067         }
1068         }
1069
1070         ASSERT_NOT_REACHED();
1071         return JSRegExpErrorNoMatch;
1072     }
1073
1074     bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
1075     {
1076         UNUSED_PARAM(term);
1077         unsigned matchBegin = context->matchBegin;
1078
1079         if (matchBegin) {
1080             for (matchBegin--; true; matchBegin--) {
1081                 if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
1082                     ++matchBegin;
1083                     break;
1084                 }
1085
1086                 if (!matchBegin)
1087                     break;
1088             }
1089         }
1090
1091         unsigned matchEnd = input.getPos();
1092
1093         for (; (matchEnd != input.end())
1094              && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
1095
1096         if (((matchBegin && term.anchors.m_bol)
1097              || ((matchEnd != input.end()) && term.anchors.m_eol))
1098             && !pattern->m_multiline)
1099             return false;
1100
1101         context->matchBegin = matchBegin;
1102         context->matchEnd = matchEnd;
1103         return true;
1104     }
1105
1106 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
1107 #define BACKTRACK() { --context->term; goto backtrack; }
1108 #define currentTerm() (disjunction->terms[context->term])
1109     JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1110     {
1111         if (!--remainingMatchCount)
1112             return JSRegExpErrorHitLimit;
1113
1114         if (btrack)
1115             BACKTRACK();
1116
1117         context->matchBegin = input.getPos();
1118         context->term = 0;
1119
1120     matchAgain:
1121         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1122
1123         switch (currentTerm().type) {
1124         case ByteTerm::TypeSubpatternBegin:
1125             MATCH_NEXT();
1126         case ByteTerm::TypeSubpatternEnd:
1127             context->matchEnd = input.getPos();
1128             return JSRegExpMatch;
1129
1130         case ByteTerm::TypeBodyAlternativeBegin:
1131             MATCH_NEXT();
1132         case ByteTerm::TypeBodyAlternativeDisjunction:
1133         case ByteTerm::TypeBodyAlternativeEnd:
1134             context->matchEnd = input.getPos();
1135             return JSRegExpMatch;
1136
1137         case ByteTerm::TypeAlternativeBegin:
1138             MATCH_NEXT();
1139         case ByteTerm::TypeAlternativeDisjunction:
1140         case ByteTerm::TypeAlternativeEnd: {
1141             int offset = currentTerm().alternative.end;
1142             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1143             backTrack->offset = offset;
1144             context->term += offset;
1145             MATCH_NEXT();
1146         }
1147
1148         case ByteTerm::TypeAssertionBOL:
1149             if (matchAssertionBOL(currentTerm()))
1150                 MATCH_NEXT();
1151             BACKTRACK();
1152         case ByteTerm::TypeAssertionEOL:
1153             if (matchAssertionEOL(currentTerm()))
1154                 MATCH_NEXT();
1155             BACKTRACK();
1156         case ByteTerm::TypeAssertionWordBoundary:
1157             if (matchAssertionWordBoundary(currentTerm()))
1158                 MATCH_NEXT();
1159             BACKTRACK();
1160
1161         case ByteTerm::TypePatternCharacterOnce:
1162         case ByteTerm::TypePatternCharacterFixed: {
1163             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1164                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount))
1165                     BACKTRACK();
1166             }
1167             MATCH_NEXT();
1168         }
1169         case ByteTerm::TypePatternCharacterGreedy: {
1170             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1171             unsigned matchAmount = 0;
1172             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1173                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {
1174                     input.uncheckInput(1);
1175                     break;
1176                 }
1177                 ++matchAmount;
1178             }
1179             backTrack->matchAmount = matchAmount;
1180
1181             MATCH_NEXT();
1182         }
1183         case ByteTerm::TypePatternCharacterNonGreedy: {
1184             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1185             backTrack->matchAmount = 0;
1186             MATCH_NEXT();
1187         }
1188
1189         case ByteTerm::TypePatternCasedCharacterOnce:
1190         case ByteTerm::TypePatternCasedCharacterFixed: {
1191             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1192                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
1193                     BACKTRACK();
1194             }
1195             MATCH_NEXT();
1196         }
1197         case ByteTerm::TypePatternCasedCharacterGreedy: {
1198             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1199             unsigned matchAmount = 0;
1200             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1201                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {
1202                     input.uncheckInput(1);
1203                     break;
1204                 }
1205                 ++matchAmount;
1206             }
1207             backTrack->matchAmount = matchAmount;
1208
1209             MATCH_NEXT();
1210         }
1211         case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1212             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1213             backTrack->matchAmount = 0;
1214             MATCH_NEXT();
1215         }
1216
1217         case ByteTerm::TypeCharacterClass:
1218             if (matchCharacterClass(currentTerm(), context))
1219                 MATCH_NEXT();
1220             BACKTRACK();
1221         case ByteTerm::TypeBackReference:
1222             if (matchBackReference(currentTerm(), context))
1223                 MATCH_NEXT();
1224             BACKTRACK();
1225         case ByteTerm::TypeParenthesesSubpattern: {
1226             JSRegExpResult result = matchParentheses(currentTerm(), context);
1227
1228             if (result == JSRegExpMatch) {
1229                 MATCH_NEXT();
1230             }  else if (result != JSRegExpNoMatch)
1231                 return result;
1232
1233             BACKTRACK();
1234         }
1235         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1236             if (matchParenthesesOnceBegin(currentTerm(), context))
1237                 MATCH_NEXT();
1238             BACKTRACK();
1239         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1240             if (matchParenthesesOnceEnd(currentTerm(), context))
1241                 MATCH_NEXT();
1242             BACKTRACK();
1243         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1244             if (matchParenthesesTerminalBegin(currentTerm(), context))
1245                 MATCH_NEXT();
1246             BACKTRACK();
1247         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1248             if (matchParenthesesTerminalEnd(currentTerm(), context))
1249                 MATCH_NEXT();
1250             BACKTRACK();
1251         case ByteTerm::TypeParentheticalAssertionBegin:
1252             if (matchParentheticalAssertionBegin(currentTerm(), context))
1253                 MATCH_NEXT();
1254             BACKTRACK();
1255         case ByteTerm::TypeParentheticalAssertionEnd:
1256             if (matchParentheticalAssertionEnd(currentTerm(), context))
1257                 MATCH_NEXT();
1258             BACKTRACK();
1259
1260         case ByteTerm::TypeCheckInput:
1261             if (input.checkInput(currentTerm().checkInputCount))
1262                 MATCH_NEXT();
1263             BACKTRACK();
1264
1265         case ByteTerm::TypeUncheckInput:
1266             input.uncheckInput(currentTerm().checkInputCount);
1267             MATCH_NEXT();
1268
1269         case ByteTerm::TypeDotStarEnclosure:
1270             if (matchDotStarEnclosure(currentTerm(), context))
1271                 return JSRegExpMatch;
1272             BACKTRACK();
1273         }
1274
1275         // We should never fall-through to here.
1276         ASSERT_NOT_REACHED();
1277
1278     backtrack:
1279         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1280
1281         switch (currentTerm().type) {
1282         case ByteTerm::TypeSubpatternBegin:
1283             return JSRegExpNoMatch;
1284         case ByteTerm::TypeSubpatternEnd:
1285             ASSERT_NOT_REACHED();
1286
1287         case ByteTerm::TypeBodyAlternativeBegin:
1288         case ByteTerm::TypeBodyAlternativeDisjunction: {
1289             int offset = currentTerm().alternative.next;
1290             context->term += offset;
1291             if (offset > 0)
1292                 MATCH_NEXT();
1293
1294             if (input.atEnd())
1295                 return JSRegExpNoMatch;
1296
1297             input.next();
1298
1299             context->matchBegin = input.getPos();
1300
1301             if (currentTerm().alternative.onceThrough)
1302                 context->term += currentTerm().alternative.next;
1303
1304             MATCH_NEXT();
1305         }
1306         case ByteTerm::TypeBodyAlternativeEnd:
1307             ASSERT_NOT_REACHED();
1308
1309         case ByteTerm::TypeAlternativeBegin:
1310         case ByteTerm::TypeAlternativeDisjunction: {
1311             int offset = currentTerm().alternative.next;
1312             context->term += offset;
1313             if (offset > 0)
1314                 MATCH_NEXT();
1315             BACKTRACK();
1316         }
1317         case ByteTerm::TypeAlternativeEnd: {
1318             // We should never backtrack back into an alternative of the main body of the regex.
1319             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1320             unsigned offset = backTrack->offset;
1321             context->term -= offset;
1322             BACKTRACK();
1323         }
1324
1325         case ByteTerm::TypeAssertionBOL:
1326         case ByteTerm::TypeAssertionEOL:
1327         case ByteTerm::TypeAssertionWordBoundary:
1328             BACKTRACK();
1329
1330         case ByteTerm::TypePatternCharacterOnce:
1331         case ByteTerm::TypePatternCharacterFixed:
1332         case ByteTerm::TypePatternCharacterGreedy:
1333         case ByteTerm::TypePatternCharacterNonGreedy:
1334             if (backtrackPatternCharacter(currentTerm(), context))
1335                 MATCH_NEXT();
1336             BACKTRACK();
1337         case ByteTerm::TypePatternCasedCharacterOnce:
1338         case ByteTerm::TypePatternCasedCharacterFixed:
1339         case ByteTerm::TypePatternCasedCharacterGreedy:
1340         case ByteTerm::TypePatternCasedCharacterNonGreedy:
1341             if (backtrackPatternCasedCharacter(currentTerm(), context))
1342                 MATCH_NEXT();
1343             BACKTRACK();
1344         case ByteTerm::TypeCharacterClass:
1345             if (backtrackCharacterClass(currentTerm(), context))
1346                 MATCH_NEXT();
1347             BACKTRACK();
1348         case ByteTerm::TypeBackReference:
1349             if (backtrackBackReference(currentTerm(), context))
1350                 MATCH_NEXT();
1351             BACKTRACK();
1352         case ByteTerm::TypeParenthesesSubpattern: {
1353             JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1354
1355             if (result == JSRegExpMatch) {
1356                 MATCH_NEXT();
1357             } else if (result != JSRegExpNoMatch)
1358                 return result;
1359
1360             BACKTRACK();
1361         }
1362         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1363             if (backtrackParenthesesOnceBegin(currentTerm(), context))
1364                 MATCH_NEXT();
1365             BACKTRACK();
1366         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1367             if (backtrackParenthesesOnceEnd(currentTerm(), context))
1368                 MATCH_NEXT();
1369             BACKTRACK();
1370         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1371             if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1372                 MATCH_NEXT();
1373             BACKTRACK();
1374         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1375             if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1376                 MATCH_NEXT();
1377             BACKTRACK();
1378         case ByteTerm::TypeParentheticalAssertionBegin:
1379             if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1380                 MATCH_NEXT();
1381             BACKTRACK();
1382         case ByteTerm::TypeParentheticalAssertionEnd:
1383             if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1384                 MATCH_NEXT();
1385             BACKTRACK();
1386
1387         case ByteTerm::TypeCheckInput:
1388             input.uncheckInput(currentTerm().checkInputCount);
1389             BACKTRACK();
1390
1391         case ByteTerm::TypeUncheckInput:
1392             input.checkInput(currentTerm().checkInputCount);
1393             BACKTRACK();
1394
1395         case ByteTerm::TypeDotStarEnclosure:
1396             ASSERT_NOT_REACHED();
1397         }
1398
1399         ASSERT_NOT_REACHED();
1400         return JSRegExpErrorNoMatch;
1401     }
1402
1403     JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1404     {
1405         JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1406
1407         if (result == JSRegExpMatch) {
1408             while (context->matchBegin == context->matchEnd) {
1409                 result = matchDisjunction(disjunction, context, true);
1410                 if (result != JSRegExpMatch)
1411                     return result;
1412             }
1413             return JSRegExpMatch;
1414         }
1415
1416         return result;
1417     }
1418
1419     unsigned interpret()
1420     {
1421         if (!input.isAvailableInput(0))
1422             return offsetNoMatch;
1423
1424         for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)
1425             output[i << 1] = offsetNoMatch;
1426
1427         allocatorPool = pattern->m_allocator->startAllocator();
1428         if (!allocatorPool)
1429             CRASH();
1430
1431         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1432
1433         JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
1434         if (result == JSRegExpMatch) {
1435             output[0] = context->matchBegin;
1436             output[1] = context->matchEnd;
1437         }
1438
1439         freeDisjunctionContext(context);
1440
1441         pattern->m_allocator->stopAllocator();
1442
1443         ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));
1444         return output[0];
1445     }
1446
1447     Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
1448         : pattern(pattern)
1449         , output(output)
1450         , input(input, start, length)
1451         , allocatorPool(0)
1452         , remainingMatchCount(matchLimit)
1453     {
1454     }
1455
1456 private:
1457     BytecodePattern* pattern;
1458     unsigned* output;
1459     InputStream input;
1460     BumpPointerPool* allocatorPool;
1461     unsigned remainingMatchCount;
1462 };
1463
1464
1465
1466 class ByteCompiler {
1467     struct ParenthesesStackEntry {
1468         unsigned beginTerm;
1469         unsigned savedAlternativeIndex;
1470         ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
1471             : beginTerm(beginTerm)
1472             , savedAlternativeIndex(savedAlternativeIndex)
1473         {
1474         }
1475     };
1476
1477 public:
1478     ByteCompiler(YarrPattern& pattern)
1479         : m_pattern(pattern)
1480     {
1481         m_currentAlternativeIndex = 0;
1482     }
1483
1484     PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
1485     {
1486         regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
1487         emitDisjunction(m_pattern.m_body);
1488         regexEnd();
1489
1490         return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator));
1491     }
1492
1493     void checkInput(unsigned count)
1494     {
1495         m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
1496     }
1497
1498     void uncheckInput(unsigned count)
1499     {
1500         m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
1501     }
1502
1503     void assertionBOL(unsigned inputPosition)
1504     {
1505         m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
1506     }
1507
1508     void assertionEOL(unsigned inputPosition)
1509     {
1510         m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
1511     }
1512
1513     void assertionWordBoundary(bool invert, unsigned inputPosition)
1514     {
1515         m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1516     }
1517
1518     void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1519     {
1520         if (m_pattern.m_ignoreCase) {
1521             UChar lo = Unicode::toLower(ch);
1522             UChar hi = Unicode::toUpper(ch);
1523
1524             if (lo != hi) {
1525                 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
1526                 return;
1527             }
1528         }
1529
1530         m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
1531     }
1532
1533     void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1534     {
1535         m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
1536
1537         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
1538         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1539         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1540     }
1541
1542     void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1543     {
1544         ASSERT(subpatternId);
1545
1546         m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1547
1548         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
1549         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1550         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1551     }
1552
1553     void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1554     {
1555         int beginTerm = m_bodyDisjunction->terms.size();
1556
1557         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1558         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1559         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1560         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1561
1562         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1563         m_currentAlternativeIndex = beginTerm + 1;
1564     }
1565
1566     void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1567     {
1568         int beginTerm = m_bodyDisjunction->terms.size();
1569
1570         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1571         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1572         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1573         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1574
1575         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1576         m_currentAlternativeIndex = beginTerm + 1;
1577     }
1578
1579     void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1580     {
1581         // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1582         // then fix this up at the end! - simplifying this should make it much clearer.
1583         // https://bugs.webkit.org/show_bug.cgi?id=50136
1584
1585         int beginTerm = m_bodyDisjunction->terms.size();
1586
1587         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1588         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1589         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1590         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1591
1592         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1593         m_currentAlternativeIndex = beginTerm + 1;
1594     }
1595
1596     void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1597     {
1598         int beginTerm = m_bodyDisjunction->terms.size();
1599
1600         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
1601         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1602         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1603         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1604
1605         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1606         m_currentAlternativeIndex = beginTerm + 1;
1607     }
1608
1609     void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1610     {
1611         unsigned beginTerm = popParenthesesStack();
1612         closeAlternative(beginTerm + 1);
1613         unsigned endTerm = m_bodyDisjunction->terms.size();
1614
1615         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1616
1617         bool invert = m_bodyDisjunction->terms[beginTerm].invert();
1618         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1619
1620         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1621         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1622         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1623         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1624
1625         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1626         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1627         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1628         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1629     }
1630
1631     void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
1632     {
1633         m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
1634     }
1635
1636     unsigned popParenthesesStack()
1637     {
1638         ASSERT(m_parenthesesStack.size());
1639         int stackEnd = m_parenthesesStack.size() - 1;
1640         unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
1641         m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
1642         m_parenthesesStack.shrink(stackEnd);
1643
1644         ASSERT(beginTerm < m_bodyDisjunction->terms.size());
1645         ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
1646
1647         return beginTerm;
1648     }
1649
1650 #ifndef NDEBUG
1651     void dumpDisjunction(ByteDisjunction* disjunction)
1652     {
1653         dataLog("ByteDisjunction(%p):\n\t", disjunction);
1654         for (unsigned i = 0; i < disjunction->terms.size(); ++i)
1655             dataLog("{ %d } ", disjunction->terms[i].type);
1656         dataLog("\n");
1657     }
1658 #endif
1659
1660     void closeAlternative(int beginTerm)
1661     {
1662         int origBeginTerm = beginTerm;
1663         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1664         int endIndex = m_bodyDisjunction->terms.size();
1665
1666         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1667
1668         if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
1669             m_bodyDisjunction->terms.remove(beginTerm);
1670         else {
1671             while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1672                 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1673                 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1674                 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1675                 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1676             }
1677
1678             m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1679
1680             m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
1681             m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1682         }
1683     }
1684
1685     void closeBodyAlternative()
1686     {
1687         int beginTerm = 0;
1688         int origBeginTerm = 0;
1689         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1690         int endIndex = m_bodyDisjunction->terms.size();
1691
1692         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1693
1694         while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1695             beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1696             ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1697             m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1698             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1699         }
1700
1701         m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1702
1703         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
1704         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1705     }
1706
1707     void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
1708     {
1709         unsigned beginTerm = popParenthesesStack();
1710         closeAlternative(beginTerm + 1);
1711         unsigned endTerm = m_bodyDisjunction->terms.size();
1712
1713         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1714
1715         ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
1716
1717         bool capture = parenthesesBegin.capture();
1718         unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1719
1720         unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
1721         ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
1722
1723         parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
1724         for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
1725             parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
1726         parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
1727
1728         m_bodyDisjunction->terms.shrink(beginTerm);
1729
1730         m_allParenthesesInfo.append(parenthesesDisjunction);
1731         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition));
1732
1733         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1734         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1735         m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1736     }
1737
1738     void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1739     {
1740         unsigned beginTerm = popParenthesesStack();
1741         closeAlternative(beginTerm + 1);
1742         unsigned endTerm = m_bodyDisjunction->terms.size();
1743
1744         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1745
1746         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1747         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1748
1749         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1750         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1751         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1752         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1753
1754         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1755         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1756         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1757         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1758     }
1759
1760     void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1761     {
1762         unsigned beginTerm = popParenthesesStack();
1763         closeAlternative(beginTerm + 1);
1764         unsigned endTerm = m_bodyDisjunction->terms.size();
1765
1766         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1767
1768         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1769         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1770
1771         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1772         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1773         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1774         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1775
1776         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1777         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1778         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1779         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1780     }
1781
1782     void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
1783     {
1784         m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
1785         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
1786         m_bodyDisjunction->terms[0].frameLocation = 0;
1787         m_currentAlternativeIndex = 0;
1788     }
1789
1790     void regexEnd()
1791     {
1792         closeBodyAlternative();
1793     }
1794
1795     void alternativeBodyDisjunction(bool onceThrough)
1796     {
1797         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1798         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1799         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
1800
1801         m_currentAlternativeIndex = newAlternativeIndex;
1802     }
1803
1804     void alternativeDisjunction()
1805     {
1806         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1807         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1808         m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
1809
1810         m_currentAlternativeIndex = newAlternativeIndex;
1811     }
1812
1813     void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
1814     {
1815         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
1816             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
1817
1818             PatternAlternative* alternative = disjunction->m_alternatives[alt];
1819
1820             if (alt) {
1821                 if (disjunction == m_pattern.m_body)
1822                     alternativeBodyDisjunction(alternative->onceThrough());
1823                 else
1824                     alternativeDisjunction();
1825             }
1826
1827             unsigned minimumSize = alternative->m_minimumSize;
1828             ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
1829             unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
1830
1831             if (countToCheck) {
1832                 checkInput(countToCheck);
1833                 currentCountAlreadyChecked += countToCheck;
1834             }
1835
1836             for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
1837                 PatternTerm& term = alternative->m_terms[i];
1838
1839                 switch (term.type) {
1840                 case PatternTerm::TypeAssertionBOL:
1841                     assertionBOL(currentCountAlreadyChecked - term.inputPosition);
1842                     break;
1843
1844                 case PatternTerm::TypeAssertionEOL:
1845                     assertionEOL(currentCountAlreadyChecked - term.inputPosition);
1846                     break;
1847
1848                 case PatternTerm::TypeAssertionWordBoundary:
1849                     assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);
1850                     break;
1851
1852                 case PatternTerm::TypePatternCharacter:
1853                     atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1854                     break;
1855
1856                 case PatternTerm::TypeCharacterClass:
1857                     atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1858                     break;
1859
1860                 case PatternTerm::TypeBackReference:
1861                     atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1862                         break;
1863
1864                 case PatternTerm::TypeForwardReference:
1865                     break;
1866
1867                 case PatternTerm::TypeParenthesesSubpattern: {
1868                     unsigned disjunctionAlreadyCheckedCount = 0;
1869                     if (term.quantityCount == 1 && !term.parentheses.isCopy) {
1870                         unsigned alternativeFrameLocation = term.frameLocation;
1871                         // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
1872                         if (term.quantityType == QuantifierFixedCount)
1873                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
1874                         else
1875                             alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1876                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1877                         atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
1878                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1879                         atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1880                     } else if (term.parentheses.isTerminal) {
1881                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1882                         atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
1883                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1884                         atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1885                     } else {
1886                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1887                         atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0);
1888                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
1889                         atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
1890                     }
1891                     break;
1892                 }
1893
1894                 case PatternTerm::TypeParentheticalAssertion: {
1895                     unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
1896
1897                     ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
1898                     unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition);
1899                     unsigned uncheckAmount = 0;
1900                     if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
1901                         uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
1902                         uncheckInput(uncheckAmount);
1903                         currentCountAlreadyChecked -= uncheckAmount;
1904                     }
1905
1906                     atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
1907                     emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
1908                     atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
1909                     if (uncheckAmount) {
1910                         checkInput(uncheckAmount);
1911                         currentCountAlreadyChecked += uncheckAmount;
1912                     }
1913                     break;
1914                 }
1915
1916                 case PatternTerm::TypeDotStarEnclosure:
1917                     assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
1918                     break;
1919                 }
1920             }
1921         }
1922     }
1923
1924 private:
1925     YarrPattern& m_pattern;
1926     OwnPtr<ByteDisjunction> m_bodyDisjunction;
1927     unsigned m_currentAlternativeIndex;
1928     Vector<ParenthesesStackEntry> m_parenthesesStack;
1929     Vector<ByteDisjunction*> m_allParenthesesInfo;
1930 };
1931
1932 PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
1933 {
1934     return ByteCompiler(pattern).compile(allocator);
1935 }
1936
1937 unsigned interpret(BytecodePattern* bytecode, const UString& input, unsigned start, unsigned* output)
1938 {
1939     if (input.is8Bit())
1940         return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret();
1941     return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret();
1942 }
1943
1944 unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
1945 {
1946     return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
1947 }
1948
1949 unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
1950 {
1951     return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
1952 }
1953
1954 // These should be the same for both UChar & LChar.
1955 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
1956 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
1957 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
1958 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
1959 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
1960 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
1961 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
1962
1963
1964 } }