X-Git-Url: https://git.saurik.com/apple/javascriptcore.git/blobdiff_plain/14957cd040308e3eeec43d26bae5d76da13fcd85..217a6308cd6a1dc049a0bb69263bd4c91f91c4d0:/yarr/YarrInterpreter.cpp

diff --git a/yarr/YarrInterpreter.cpp b/yarr/YarrInterpreter.cpp
index 903c692..f0312ea 100644
--- a/yarr/YarrInterpreter.cpp
+++ b/yarr/YarrInterpreter.cpp
@@ -28,7 +28,11 @@
 #include "YarrInterpreter.h"
 
 #include "Yarr.h"
+#include "YarrCanonicalizeUCS2.h"
 #include <wtf/BumpPointerAllocator.h>
+#include <wtf/DataLog.h>
+#include <wtf/text/CString.h>
+#include <wtf/text/WTFString.h>
 
 #ifndef NDEBUG
 #include <stdio.h>
@@ -38,6 +42,7 @@ using namespace WTF;
 
 namespace JSC { namespace Yarr {
 
+template<typename CharType>
 class Interpreter {
 public:
     struct ParenthesesDisjunctionContext;
@@ -78,8 +83,8 @@ public:
 
     static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
     {
-        ASSERT(backTrack->matchAmount);
-        ASSERT(backTrack->lastContext);
+        RELEASE_ASSERT(backTrack->matchAmount);
+        RELEASE_ASSERT(backTrack->lastContext);
         backTrack->lastContext = backTrack->lastContext->next;
         --backTrack->matchAmount;
     }
@@ -106,9 +111,8 @@ public:
     {
         size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
         allocatorPool = allocatorPool->ensureCapacity(size);
-        if (!allocatorPool)
-            CRASH();
-        return new(allocatorPool->alloc(size)) DisjunctionContext();
+        RELEASE_ASSERT(allocatorPool);
+        return new (allocatorPool->alloc(size)) DisjunctionContext();
     }
 
     void freeDisjunctionContext(DisjunctionContext* context)
@@ -118,7 +122,7 @@ public:
 
     struct ParenthesesDisjunctionContext
     {
-        ParenthesesDisjunctionContext(int* output, ByteTerm& term)
+        ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
             : next(0)
         {
             unsigned firstSubpatternId = term.atom.subpatternId;
@@ -126,10 +130,10 @@ public:
 
             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
                 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
-                output[(firstSubpatternId << 1) + i] = -1;
+                output[(firstSubpatternId << 1) + i] = offsetNoMatch;
             }
 
-            new(getDisjunctionContext(term)) DisjunctionContext();
+            new (getDisjunctionContext(term)) DisjunctionContext();
         }
 
         void* operator new(size_t, void* where)
@@ -137,7 +141,7 @@ public:
             return where;
         }
 
-        void restoreOutput(int* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
+        void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
         {
             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
                 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
@@ -149,16 +153,15 @@ public:
         }
 
         ParenthesesDisjunctionContext* next;
-        int subpatternBackup[1];
+        unsigned subpatternBackup[1];
     };
 
-    ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, int* output, ByteTerm& term)
+    ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
     {
-        size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(int) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(int) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
+        size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
         allocatorPool = allocatorPool->ensureCapacity(size);
-        if (!allocatorPool)
-            CRASH();
-        return new(allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
+        RELEASE_ASSERT(allocatorPool);
+        return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
     }
 
     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
@@ -168,7 +171,7 @@ public:
 
     class InputStream {
     public:
-        InputStream(const UChar* input, unsigned start, unsigned length)
+        InputStream(const CharType* input, unsigned start, unsigned length)
             : input(input)
             , pos(start)
             , length(length)
@@ -200,11 +203,10 @@ public:
             return input[pos] | input[pos + 1] << 16;
         }
 
-        int readChecked(int position)
+        int readChecked(unsigned negativePositionOffest)
         {
-            ASSERT(position < 0);
-            ASSERT(static_cast<unsigned>(-position) <= pos);
-            unsigned p = pos + position;
+            RELEASE_ASSERT(pos >= negativePositionOffest);
+            unsigned p = pos - negativePositionOffest;
             ASSERT(p < length);
             return input[p];
         }
@@ -248,37 +250,39 @@ public:
             return length;
         }
 
-        bool checkInput(int count)
+        bool checkInput(unsigned count)
         {
-            if ((pos + count) <= length) {
+            if (((pos + count) <= length) && ((pos + count) >= pos)) {
                 pos += count;
                 return true;
             }
             return false;
         }
 
-        void uncheckInput(int count)
+        void uncheckInput(unsigned count)
         {
+            RELEASE_ASSERT(pos >= count);
             pos -= count;
         }
 
-        bool atStart(int position)
+        bool atStart(unsigned negativePositionOffest)
         {
-            return (pos + position) == 0;
+            return pos == negativePositionOffest;
         }
 
-        bool atEnd(int position)
+        bool atEnd(unsigned negativePositionOffest)
         {
-            return (pos + position) == length;
+            RELEASE_ASSERT(pos >= negativePositionOffest);
+            return (pos - negativePositionOffest) == length;
         }
 
-        bool isNotAvailableInput(int position)
+        bool isAvailableInput(unsigned offset)
         {
-            return (pos + position) > length;
+            return (((pos + offset) <= length) && ((pos + offset) >= pos));
         }
 
     private:
-        const UChar* input;
+        const CharType* input;
         unsigned pos;
         unsigned length;
     };
@@ -304,45 +308,52 @@ public:
         return false;
     }
 
-    bool checkCharacter(int testChar, int inputPosition)
+    bool checkCharacter(int testChar, unsigned negativeInputOffset)
     {
-        return testChar == input.readChecked(inputPosition);
+        return testChar == input.readChecked(negativeInputOffset);
     }
 
-    bool checkCasedCharacter(int loChar, int hiChar, int inputPosition)
+    bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
     {
-        int ch = input.readChecked(inputPosition);
+        int ch = input.readChecked(negativeInputOffset);
         return (loChar == ch) || (hiChar == ch);
     }
 
-    bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition)
+    bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
     {
-        bool match = testCharacterClass(characterClass, input.readChecked(inputPosition));
+        bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
         return invert ? !match : match;
     }
 
-    bool tryConsumeBackReference(int matchBegin, int matchEnd, int inputOffset)
+    bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
     {
-        int matchSize = matchEnd - matchBegin;
+        unsigned matchSize = (unsigned)(matchEnd - matchBegin);
 
         if (!input.checkInput(matchSize))
             return false;
 
         if (pattern->m_ignoreCase) {
-            for (int i = 0; i < matchSize; ++i) {
-                int ch = input.reread(matchBegin + i);
-
-                int lo = Unicode::toLower(ch);
-                int hi = Unicode::toUpper(ch);
-
-                if ((lo != hi) ? (!checkCasedCharacter(lo, hi, inputOffset - matchSize + i)) : (!checkCharacter(ch, inputOffset - matchSize + i))) {
-                    input.uncheckInput(matchSize);
-                    return false;
-                }
+            for (unsigned i = 0; i < matchSize; ++i) {
+                int oldCh = input.reread(matchBegin + i);
+                int ch = input.readChecked(negativeInputOffset + matchSize - i);
+
+                if (oldCh == ch)
+                    continue;
+
+                // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that
+                // unicode values are never allowed to match against ascii ones.
+                if (isASCII(oldCh) || isASCII(ch)) {
+                    if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
+                        continue;
+                } else if (areCanonicallyEquivalent(oldCh, ch))
+                    continue;
+
+                input.uncheckInput(matchSize);
+                return false;
             }
         } else {
-            for (int i = 0; i < matchSize; ++i) {
-                if (!checkCharacter(input.reread(matchBegin + i), inputOffset - matchSize + i)) {
+            for (unsigned i = 0; i < matchSize; ++i) {
+                if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) {
                     input.uncheckInput(matchSize);
                     return false;
                 }
@@ -354,7 +365,7 @@ public:
 
     bool matchAssertionBOL(ByteTerm& term)
     {
-        return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition - 1)));
+        return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
     }
 
     bool matchAssertionEOL(ByteTerm& term)
@@ -367,7 +378,7 @@ public:
 
     bool matchAssertionWordBoundary(ByteTerm& term)
     {
-        bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition - 1));
+        bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1));
         bool readIsWordchar;
         if (term.inputPosition)
             readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
@@ -397,7 +408,7 @@ public:
         case QuantifierNonGreedy:
             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
                 ++backTrack->matchAmount;
-                if (checkCharacter(term.atom.patternCharacter, term.inputPosition - 1))
+                if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))
                     return true;
             }
             input.uncheckInput(backTrack->matchAmount);
@@ -426,7 +437,7 @@ public:
         case QuantifierNonGreedy:
             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
                 ++backTrack->matchAmount;
-                if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition - 1))
+                if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))
                     return true;
             }
             input.uncheckInput(backTrack->matchAmount);
@@ -444,7 +455,7 @@ public:
         switch (term.atom.quantityType) {
         case QuantifierFixedCount: {
             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
-                if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + matchAmount))
+                if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
                     return false;
             }
             return true;
@@ -453,7 +464,7 @@ public:
         case QuantifierGreedy: {
             unsigned matchAmount = 0;
             while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
-                if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1)) {
+                if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
                     input.uncheckInput(1);
                     break;
                 }
@@ -469,7 +480,7 @@ public:
             return true;
         }
 
-        ASSERT_NOT_REACHED();
+        RELEASE_ASSERT_NOT_REACHED();
         return false;
     }
 
@@ -493,7 +504,7 @@ public:
         case QuantifierNonGreedy:
             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
                 ++backTrack->matchAmount;
-                if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1))
+                if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))
                     return true;
             }
             input.uncheckInput(backTrack->matchAmount);
@@ -508,16 +519,19 @@ public:
         ASSERT(term.type == ByteTerm::TypeBackReference);
         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
 
-        int matchBegin = output[(term.atom.subpatternId << 1)];
-        int matchEnd = output[(term.atom.subpatternId << 1) + 1];
+        unsigned matchBegin = output[(term.atom.subpatternId << 1)];
+        unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
 
         // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
         // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
         // Eg.: /(a\1)/
-        if (matchEnd == -1)
+        if (matchEnd == offsetNoMatch)
+            return true;
+
+        if (matchBegin == offsetNoMatch)
             return true;
 
-        ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
+        ASSERT(matchBegin <= matchEnd);
 
         if (matchBegin == matchEnd)
             return true;
@@ -548,7 +562,7 @@ public:
             return true;
         }
 
-        ASSERT_NOT_REACHED();
+        RELEASE_ASSERT_NOT_REACHED();
         return false;
     }
 
@@ -557,9 +571,13 @@ public:
         ASSERT(term.type == ByteTerm::TypeBackReference);
         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
 
-        int matchBegin = output[(term.atom.subpatternId << 1)];
-        int matchEnd = output[(term.atom.subpatternId << 1) + 1];
-        ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
+        unsigned matchBegin = output[(term.atom.subpatternId << 1)];
+        unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
+
+        if (matchBegin == offsetNoMatch)
+            return false;
+
+        ASSERT(matchBegin <= matchEnd);
 
         if (matchBegin == matchEnd)
             return false;
@@ -648,7 +666,7 @@ public:
 
         if (term.capture()) {
             unsigned subpatternId = term.atom.subpatternId;
-            output[(subpatternId << 1)] = input.getPos() + term.inputPosition;
+            output[(subpatternId << 1)] = input.getPos() - term.inputPosition;
         }
 
         return true;
@@ -680,8 +698,8 @@ public:
 
         if (term.capture()) {
             unsigned subpatternId = term.atom.subpatternId;
-            output[(subpatternId << 1)] = -1;
-            output[(subpatternId << 1) + 1] = -1;
+            output[(subpatternId << 1)] = offsetNoMatch;
+            output[(subpatternId << 1) + 1] = offsetNoMatch;
         }
 
         switch (term.atom.quantityType) {
@@ -778,7 +796,7 @@ public:
     {
         // 'Terminal' parentheses are at the end of the regex, and as such a match past end
         // should always be returned as a successful match - we should never backtrack to here.
-        ASSERT_NOT_REACHED();
+        RELEASE_ASSERT_NOT_REACHED();
         return false;
     }
 
@@ -862,12 +880,11 @@ public:
                     resetMatches(term, context);
                     freeParenthesesDisjunctionContext(context);
 
-                    if (result == JSRegExpNoMatch) {
-                        JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
-                        if (backtrackResult != JSRegExpMatch)
-                            return backtrackResult;
-                    } else
+                    if (result != JSRegExpNoMatch)
                         return result;
+                    JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
+                    if (backtrackResult != JSRegExpMatch)
+                        return backtrackResult;
                 }
             }
 
@@ -905,7 +922,7 @@ public:
             return JSRegExpMatch;
         }
 
-        ASSERT_NOT_REACHED();
+        RELEASE_ASSERT_NOT_REACHED();
         return JSRegExpErrorNoMatch;
     }
 
@@ -949,12 +966,11 @@ public:
                     resetMatches(term, context);
                     freeParenthesesDisjunctionContext(context);
 
-                    if (result == JSRegExpNoMatch) {
-                        JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
-                        if (backtrackResult != JSRegExpMatch)
-                            return backtrackResult;
-                    } else
+                    if (result != JSRegExpNoMatch)
                         return result;
+                    JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
+                    if (backtrackResult != JSRegExpMatch)
+                        return backtrackResult;
                 }
             }
 
@@ -1038,14 +1054,15 @@ public:
                 popParenthesesDisjunctionContext(backTrack);
                 freeParenthesesDisjunctionContext(context);
 
-                return result;
+                if (result != JSRegExpNoMatch)
+                    return result;
             }
 
             return JSRegExpNoMatch;
         }
         }
 
-        ASSERT_NOT_REACHED();
+        RELEASE_ASSERT_NOT_REACHED();
         return JSRegExpErrorNoMatch;
     }
 
@@ -1139,7 +1156,7 @@ public:
         case ByteTerm::TypePatternCharacterOnce:
         case ByteTerm::TypePatternCharacterFixed: {
             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
-                if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount))
+                if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount))
                     BACKTRACK();
             }
             MATCH_NEXT();
@@ -1148,7 +1165,7 @@ public:
             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
             unsigned matchAmount = 0;
             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
-                if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) {
+                if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {
                     input.uncheckInput(1);
                     break;
                 }
@@ -1167,7 +1184,7 @@ public:
         case ByteTerm::TypePatternCasedCharacterOnce:
         case ByteTerm::TypePatternCasedCharacterFixed: {
             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
-                if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount))
+                if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
                     BACKTRACK();
             }
             MATCH_NEXT();
@@ -1176,7 +1193,7 @@ public:
             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
             unsigned matchAmount = 0;
             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
-                if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) {
+                if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {
                     input.uncheckInput(1);
                     break;
                 }
@@ -1251,7 +1268,7 @@ public:
         }
 
         // We should never fall-through to here.
-        ASSERT_NOT_REACHED();
+        RELEASE_ASSERT_NOT_REACHED();
 
     backtrack:
         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
@@ -1260,7 +1277,7 @@ public:
         case ByteTerm::TypeSubpatternBegin:
             return JSRegExpNoMatch;
         case ByteTerm::TypeSubpatternEnd:
-            ASSERT_NOT_REACHED();
+            RELEASE_ASSERT_NOT_REACHED();
 
         case ByteTerm::TypeBodyAlternativeBegin:
         case ByteTerm::TypeBodyAlternativeDisjunction: {
@@ -1282,7 +1299,7 @@ public:
             MATCH_NEXT();
         }
         case ByteTerm::TypeBodyAlternativeEnd:
-            ASSERT_NOT_REACHED();
+            RELEASE_ASSERT_NOT_REACHED();
 
         case ByteTerm::TypeAlternativeBegin:
         case ByteTerm::TypeAlternativeDisjunction: {
@@ -1371,10 +1388,10 @@ public:
             BACKTRACK();
 
         case ByteTerm::TypeDotStarEnclosure:
-            ASSERT_NOT_REACHED();
+            RELEASE_ASSERT_NOT_REACHED();
         }
 
-        ASSERT_NOT_REACHED();
+        RELEASE_ASSERT_NOT_REACHED();
         return JSRegExpErrorNoMatch;
     }
 
@@ -1394,14 +1411,16 @@ public:
         return result;
     }
 
-    int interpret()
+    unsigned interpret()
     {
-        allocatorPool = pattern->m_allocator->startAllocator();
-        if (!allocatorPool)
-            CRASH();
+        if (!input.isAvailableInput(0))
+            return offsetNoMatch;
 
-        for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i)
-            output[i] = -1;
+        for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)
+            output[i << 1] = offsetNoMatch;
+
+        allocatorPool = pattern->m_allocator->startAllocator();
+        RELEASE_ASSERT(allocatorPool);
 
         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
 
@@ -1415,15 +1434,14 @@ public:
 
         pattern->m_allocator->stopAllocator();
 
-        // RegExp.cpp currently expects all error to be converted to -1.
-        ASSERT((result == JSRegExpMatch) == (output[0] != -1));
+        ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));
         return output[0];
     }
 
-    Interpreter(BytecodePattern* pattern, int* output, const UChar* inputChar, unsigned start, unsigned length)
+    Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
         : pattern(pattern)
         , output(output)
-        , input(inputChar, start, length)
+        , input(input, start, length)
         , allocatorPool(0)
         , remainingMatchCount(matchLimit)
     {
@@ -1431,14 +1449,12 @@ public:
 
 private:
     BytecodePattern* pattern;
-    int* output;
+    unsigned* output;
     InputStream input;
     BumpPointerPool* allocatorPool;
     unsigned remainingMatchCount;
 };
 
-
-
 class ByteCompiler {
     struct ParenthesesStackEntry {
         unsigned beginTerm;
@@ -1476,22 +1492,22 @@ public:
         m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
     }
     
-    void assertionBOL(int inputPosition)
+    void assertionBOL(unsigned inputPosition)
     {
         m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
     }
 
-    void assertionEOL(int inputPosition)
+    void assertionEOL(unsigned inputPosition)
     {
         m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
     }
 
-    void assertionWordBoundary(bool invert, int inputPosition)
+    void assertionWordBoundary(bool invert, unsigned inputPosition)
     {
         m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
     }
 
-    void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+    void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
     {
         if (m_pattern.m_ignoreCase) {
             UChar lo = Unicode::toLower(ch);
@@ -1506,27 +1522,27 @@ public:
         m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
     }
 
-    void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+    void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
     {
         m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
 
-        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
+        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
     }
 
-    void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+    void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
     {
         ASSERT(subpatternId);
 
         m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
 
-        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
+        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
     }
 
-    void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
+    void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
     {
         int beginTerm = m_bodyDisjunction->terms.size();
 
@@ -1539,7 +1555,7 @@ public:
         m_currentAlternativeIndex = beginTerm + 1;
     }
 
-    void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
+    void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
     {
         int beginTerm = m_bodyDisjunction->terms.size();
 
@@ -1552,7 +1568,7 @@ public:
         m_currentAlternativeIndex = beginTerm + 1;
     }
 
-    void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
+    void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
     {
         // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
         // then fix this up at the end! - simplifying this should make it much clearer.
@@ -1582,7 +1598,7 @@ public:
         m_currentAlternativeIndex = beginTerm + 1;
     }
 
-    void atomParentheticalAssertionEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+    void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
     {
         unsigned beginTerm = popParenthesesStack();
         closeAlternative(beginTerm + 1);
@@ -1598,9 +1614,9 @@ public:
         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
 
-        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
+        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
-        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
+        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
     }
 
@@ -1626,10 +1642,10 @@ public:
 #ifndef NDEBUG
     void dumpDisjunction(ByteDisjunction* disjunction)
     {
-        printf("ByteDisjunction(%p):\n\t", disjunction);
+        dataLogF("ByteDisjunction(%p):\n\t", disjunction);
         for (unsigned i = 0; i < disjunction->terms.size(); ++i)
-            printf("{ %d } ", disjunction->terms[i].type);
-        printf("\n");
+            dataLogF("{ %d } ", disjunction->terms[i].type);
+        dataLogF("\n");
     }
 #endif
 
@@ -1680,7 +1696,7 @@ public:
         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
     }
 
-    void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
+    void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
     {
         unsigned beginTerm = popParenthesesStack();
         closeAlternative(beginTerm + 1);
@@ -1694,24 +1710,27 @@ public:
         unsigned subpatternId = parenthesesBegin.atom.subpatternId;
 
         unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
-        ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
+        OwnPtr<ByteDisjunction> parenthesesDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
+
+        unsigned firstTermInParentheses = beginTerm + 1;
+        parenthesesDisjunction->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + 2);
 
         parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
-        for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
+        for (unsigned termInParentheses = firstTermInParentheses; termInParentheses < endTerm; ++termInParentheses)
             parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
         parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
 
         m_bodyDisjunction->terms.shrink(beginTerm);
 
-        m_allParenthesesInfo.append(parenthesesDisjunction);
-        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition));
+        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition));
+        m_allParenthesesInfo.append(parenthesesDisjunction.release());
 
-        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
+        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
         m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
     }
 
-    void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+    void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
     {
         unsigned beginTerm = popParenthesesStack();
         closeAlternative(beginTerm + 1);
@@ -1727,13 +1746,13 @@ public:
         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
 
-        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
+        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
-        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
+        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
     }
 
-    void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+    void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
     {
         unsigned beginTerm = popParenthesesStack();
         closeAlternative(beginTerm + 1);
@@ -1749,9 +1768,9 @@ public:
         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
 
-        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
+        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
-        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
+        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
     }
 
@@ -1791,7 +1810,7 @@ public:
         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
 
-            PatternAlternative* alternative = disjunction->m_alternatives[alt];
+            PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
 
             if (alt) {
                 if (disjunction == m_pattern.m_body)
@@ -1814,27 +1833,27 @@ public:
 
                 switch (term.type) {
                 case PatternTerm::TypeAssertionBOL:
-                    assertionBOL(term.inputPosition - currentCountAlreadyChecked);
+                    assertionBOL(currentCountAlreadyChecked - term.inputPosition);
                     break;
 
                 case PatternTerm::TypeAssertionEOL:
-                    assertionEOL(term.inputPosition - currentCountAlreadyChecked);
+                    assertionEOL(currentCountAlreadyChecked - term.inputPosition);
                     break;
 
                 case PatternTerm::TypeAssertionWordBoundary:
-                    assertionWordBoundary(term.invert(), term.inputPosition - currentCountAlreadyChecked);
+                    assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);
                     break;
 
                 case PatternTerm::TypePatternCharacter:
-                    atomPatternCharacter(term.patternCharacter, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
+                    atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
                     break;
 
                 case PatternTerm::TypeCharacterClass:
-                    atomCharacterClass(term.characterClass, term.invert(), term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
+                    atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
                     break;
 
                 case PatternTerm::TypeBackReference:
-                    atomBackReference(term.backReferenceSubpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
+                    atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
                         break;
 
                 case PatternTerm::TypeForwardReference:
@@ -1850,17 +1869,17 @@ public:
                         else
                             alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
-                        atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, alternativeFrameLocation);
+                        atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
                         atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
                     } else if (term.parentheses.isTerminal) {
                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
-                        atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
+                        atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
                         atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
                     } else {
                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
-                        atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
+                        atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0);
                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
                         atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
                     }
@@ -1902,7 +1921,7 @@ private:
     OwnPtr<ByteDisjunction> m_bodyDisjunction;
     unsigned m_currentAlternativeIndex;
     Vector<ParenthesesStackEntry> m_parenthesesStack;
-    Vector<ByteDisjunction*> m_allParenthesesInfo;
+    Vector<OwnPtr<ByteDisjunction> > m_allParenthesesInfo;
 };
 
 PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
@@ -1910,18 +1929,31 @@ PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocat
     return ByteCompiler(pattern).compile(allocator);
 }
 
-int interpret(BytecodePattern* bytecode, const UChar* input, unsigned start, unsigned length, int* output)
+unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
+{
+    if (input.is8Bit())
+        return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret();
+    return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret();
+}
+
+unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
+{
+    return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
+}
+
+unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
 {
-    return Interpreter(bytecode, output, input, start, length).interpret();
+    return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
 }
 
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
+// These should be the same for both UChar & LChar.
+COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
+COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
+COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
+COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
+COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
+COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
+COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
 
 
 } }