JavaScriptCore-7600.1.4.17.5.tar.gz

[apple/javascriptcore.git] / yarr / YarrPattern.cpp
diff --git a/yarr/YarrPattern.cpp b/yarr/YarrPattern.cpp

index bf150c120dc4d939353f40a720373c3a31904cf9..56dd5e56267ba6347694a48c962f75d8cf86698b 100644 (file)
--- a/yarr/YarrPattern.cpp
+++ b/yarr/YarrPattern.cpp
@@ -1,5 +1,5 @@
  /*
  /*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
   * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
   *
   * Redistribution and use in source and binary forms, with or without
   * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
   *
   * Redistribution and use in source and binary forms, with or without
@@ -28,6 +28,7 @@
  #include "YarrPattern.h"
  
  #include "Yarr.h"
  #include "YarrPattern.h"
  
  #include "Yarr.h"
+#include "YarrCanonicalizeUCS2.h"
  #include "YarrParser.h"
  #include <wtf/Vector.h>
  
  #include "YarrParser.h"
  #include <wtf/Vector.h>
  
@@ -66,32 +67,43 @@ public:
  
      void putChar(UChar ch)
      {
  
      void putChar(UChar ch)
      {
+        // Handle ascii cases.
          if (ch <= 0x7f) {
              if (m_isCaseInsensitive && isASCIIAlpha(ch)) {
                  addSorted(m_matches, toASCIIUpper(ch));
                  addSorted(m_matches, toASCIILower(ch));
              } else
                  addSorted(m_matches, ch);
          if (ch <= 0x7f) {
              if (m_isCaseInsensitive && isASCIIAlpha(ch)) {
                  addSorted(m_matches, toASCIIUpper(ch));
                  addSorted(m_matches, toASCIILower(ch));
              } else
                  addSorted(m_matches, ch);
-        } else {
-            UChar upper, lower;
-            if (m_isCaseInsensitive && ((upper = Unicode::toUpper(ch)) != (lower = Unicode::toLower(ch)))) {
-                addSorted(m_matchesUnicode, upper);
-                addSorted(m_matchesUnicode, lower);
-            } else
-                addSorted(m_matchesUnicode, ch);
+            return;
          }
          }
-    }
  
  
-    // returns true if this character has another case, and 'ch' is the upper case form.
-    static inline bool isUnicodeUpper(UChar ch)
-    {
-        return ch != Unicode::toLower(ch);
+        // Simple case, not a case-insensitive match.
+        if (!m_isCaseInsensitive) {
+            addSorted(m_matchesUnicode, ch);
+            return;
+        }
+
+        // Add multiple matches, if necessary.
+        const UCS2CanonicalizationRange* info = rangeInfoFor(ch);
+        if (info->type == CanonicalizeUnique)
+            addSorted(m_matchesUnicode, ch);
+        else
+            putUnicodeIgnoreCase(ch, info);
      }
  
      }
  
-    // returns true if this character has another case, and 'ch' is the lower case form.
-    static inline bool isUnicodeLower(UChar ch)
+    void putUnicodeIgnoreCase(UChar ch, const UCS2CanonicalizationRange* info)
      {
      {
-        return ch != Unicode::toUpper(ch);
+        ASSERT(m_isCaseInsensitive);
+        ASSERT(ch > 0x7f);
+        ASSERT(ch >= info->begin && ch <= info->end);
+        ASSERT(info->type != CanonicalizeUnique);
+        if (info->type == CanonicalizeSet) {
+            for (const uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
+                addSorted(m_matchesUnicode, ch);
+        } else {
+            addSorted(m_matchesUnicode, ch);
+            addSorted(m_matchesUnicode, getCanonicalPair(info, ch));
+        }
      }
  
      void putRange(UChar lo, UChar hi)
      }
  
      void putRange(UChar lo, UChar hi)
@@ -108,50 +120,71 @@ public:
                      addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a'));
              }
          }
                      addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a'));
              }
          }
-        if (hi >= 0x80) {
-            uint32_t unicodeCurr = std::max(lo, (UChar)0x80);
-            addSortedRange(m_rangesUnicode, unicodeCurr, hi);
-            
-            if (m_isCaseInsensitive) {
-                while (unicodeCurr <= hi) {
-                    // If the upper bound of the range (hi) is 0xffff, the increments to
-                    // unicodeCurr in this loop may take it to 0x10000.  This is fine
-                    // (if so we won't re-enter the loop, since the loop condition above
-                    // will definitely fail) - but this does mean we cannot use a UChar
-                    // to represent unicodeCurr, we must use a 32-bit value instead.
-                    ASSERT(unicodeCurr <= 0xffff);
-
-                    if (isUnicodeUpper(unicodeCurr)) {
-                        UChar lowerCaseRangeBegin = Unicode::toLower(unicodeCurr);
-                        UChar lowerCaseRangeEnd = lowerCaseRangeBegin;
-                        while ((++unicodeCurr <= hi) && isUnicodeUpper(unicodeCurr) && (Unicode::toLower(unicodeCurr) == (lowerCaseRangeEnd + 1)))
-                            lowerCaseRangeEnd++;
-                        addSortedRange(m_rangesUnicode, lowerCaseRangeBegin, lowerCaseRangeEnd);
-                    } else if (isUnicodeLower(unicodeCurr)) {
-                        UChar upperCaseRangeBegin = Unicode::toUpper(unicodeCurr);
-                        UChar upperCaseRangeEnd = upperCaseRangeBegin;
-                        while ((++unicodeCurr <= hi) && isUnicodeLower(unicodeCurr) && (Unicode::toUpper(unicodeCurr) == (upperCaseRangeEnd + 1)))
-                            upperCaseRangeEnd++;
-                        addSortedRange(m_rangesUnicode, upperCaseRangeBegin, upperCaseRangeEnd);
-                    } else
-                        ++unicodeCurr;
-                }
+        if (hi <= 0x7f)
+            return;
+
+        lo = std::max(lo, (UChar)0x80);
+        addSortedRange(m_rangesUnicode, lo, hi);
+        
+        if (!m_isCaseInsensitive)
+            return;
+
+        const UCS2CanonicalizationRange* info = rangeInfoFor(lo);
+        while (true) {
+            // Handle the range [lo .. end]
+            UChar end = std::min<UChar>(info->end, hi);
+
+            switch (info->type) {
+            case CanonicalizeUnique:
+                // Nothing to do - no canonical equivalents.
+                break;
+            case CanonicalizeSet: {
+                UChar ch;
+                for (const uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
+                    addSorted(m_matchesUnicode, ch);
+                break;
              }
              }
-        }
+            case CanonicalizeRangeLo:
+                addSortedRange(m_rangesUnicode, lo + info->value, end + info->value);
+                break;
+            case CanonicalizeRangeHi:
+                addSortedRange(m_rangesUnicode, lo - info->value, end - info->value);
+                break;
+            case CanonicalizeAlternatingAligned:
+                // Use addSortedRange since there is likely an abutting range to combine with.
+                if (lo & 1)
+                    addSortedRange(m_rangesUnicode, lo - 1, lo - 1);
+                if (!(end & 1))
+                    addSortedRange(m_rangesUnicode, end + 1, end + 1);
+                break;
+            case CanonicalizeAlternatingUnaligned:
+                // Use addSortedRange since there is likely an abutting range to combine with.
+                if (!(lo & 1))
+                    addSortedRange(m_rangesUnicode, lo - 1, lo - 1);
+                if (end & 1)
+                    addSortedRange(m_rangesUnicode, end + 1, end + 1);
+                break;
+            }
+
+            if (hi == end)
+                return;
+
+            ++info;
+            lo = info->begin;
+        };
+
      }
  
      }
  
-    CharacterClass* charClass()
+    PassOwnPtr<CharacterClass> charClass()
      {
      {
-        CharacterClass* characterClass = new CharacterClass(0);
-
-        characterClass->m_matches.append(m_matches);
-        characterClass->m_ranges.append(m_ranges);
-        characterClass->m_matchesUnicode.append(m_matchesUnicode);
-        characterClass->m_rangesUnicode.append(m_rangesUnicode);
+        OwnPtr<CharacterClass> characterClass = adoptPtr(new CharacterClass);
  
  
-        reset();
+        characterClass->m_matches.swap(m_matches);
+        characterClass->m_ranges.swap(m_ranges);
+        characterClass->m_matchesUnicode.swap(m_matchesUnicode);
+        characterClass->m_rangesUnicode.swap(m_rangesUnicode);
  
  
-        return characterClass;
+        return characterClass.release();
      }
  
  private:
      }
  
  private:
@@ -241,9 +274,10 @@ public:
          , m_characterClassConstructor(pattern.m_ignoreCase)
          , m_invertParentheticalAssertion(false)
      {
          , m_characterClassConstructor(pattern.m_ignoreCase)
          , m_invertParentheticalAssertion(false)
      {
-        m_pattern.m_body = new PatternDisjunction();
-        m_alternative = m_pattern.m_body->addNewAlternative();
-        m_pattern.m_disjunctions.append(m_pattern.m_body);
+        OwnPtr<PatternDisjunction> body = adoptPtr(new PatternDisjunction);
+        m_pattern.m_body = body.get();
+        m_alternative = body->addNewAlternative();
+        m_pattern.m_disjunctions.append(body.release());
      }
  
      ~YarrPatternConstructor()
      }
  
      ~YarrPatternConstructor()
@@ -255,9 +289,10 @@ public:
          m_pattern.reset();
          m_characterClassConstructor.reset();
  
          m_pattern.reset();
          m_characterClassConstructor.reset();
  
-        m_pattern.m_body = new PatternDisjunction();
-        m_alternative = m_pattern.m_body->addNewAlternative();
-        m_pattern.m_disjunctions.append(m_pattern.m_body);
+        OwnPtr<PatternDisjunction> body = adoptPtr(new PatternDisjunction);
+        m_pattern.m_body = body.get();
+        m_alternative = body->addNewAlternative();
+        m_pattern.m_disjunctions.append(body.release());
      }
      
      void assertionBOL()
      }
      
      void assertionBOL()
@@ -282,12 +317,21 @@ public:
      {
          // We handle case-insensitive checking of unicode characters which do have both
          // cases by handling them as if they were defined using a CharacterClass.
      {
          // We handle case-insensitive checking of unicode characters which do have both
          // cases by handling them as if they were defined using a CharacterClass.
-        if (m_pattern.m_ignoreCase && !isASCII(ch) && (Unicode::toUpper(ch) != Unicode::toLower(ch))) {
-            atomCharacterClassBegin();
-            atomCharacterClassAtom(ch);
-            atomCharacterClassEnd();
-        } else
+        if (!m_pattern.m_ignoreCase || isASCII(ch)) {
              m_alternative->m_terms.append(PatternTerm(ch));
              m_alternative->m_terms.append(PatternTerm(ch));
+            return;
+        }
+
+        const UCS2CanonicalizationRange* info = rangeInfoFor(ch);
+        if (info->type == CanonicalizeUnique) {
+            m_alternative->m_terms.append(PatternTerm(ch));
+            return;
+        }
+
+        m_characterClassConstructor.putUnicodeIgnoreCase(ch, info);
+        OwnPtr<CharacterClass> newCharacterClass = m_characterClassConstructor.charClass();
+        m_alternative->m_terms.append(PatternTerm(newCharacterClass.get(), false));
+        m_pattern.m_userCharacterClasses.append(newCharacterClass.release());
      }
  
      void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
      }
  
      void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
@@ -341,15 +385,15 @@ public:
              break;
          
          default:
              break;
          
          default:
-            ASSERT_NOT_REACHED();
+            RELEASE_ASSERT_NOT_REACHED();
          }
      }
  
      void atomCharacterClassEnd()
      {
          }
      }
  
      void atomCharacterClassEnd()
      {
-        CharacterClass* newCharacterClass = m_characterClassConstructor.charClass();
-        m_pattern.m_userCharacterClasses.append(newCharacterClass);
-        m_alternative->m_terms.append(PatternTerm(newCharacterClass, m_invertCharacterClass));
+        OwnPtr<CharacterClass> newCharacterClass = m_characterClassConstructor.charClass();
+        m_alternative->m_terms.append(PatternTerm(newCharacterClass.get(), m_invertCharacterClass));
+        m_pattern.m_userCharacterClasses.append(newCharacterClass.release());
      }
  
      void atomParenthesesSubpatternBegin(bool capture = true)
      }
  
      void atomParenthesesSubpatternBegin(bool capture = true)
@@ -358,19 +402,19 @@ public:
          if (capture)
              m_pattern.m_numSubpatterns++;
  
          if (capture)
              m_pattern.m_numSubpatterns++;
  
-        PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative);
-        m_pattern.m_disjunctions.append(parenthesesDisjunction);
-        m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, false));
+        OwnPtr<PatternDisjunction> parenthesesDisjunction = adoptPtr(new PatternDisjunction(m_alternative));
+        m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, false));
          m_alternative = parenthesesDisjunction->addNewAlternative();
          m_alternative = parenthesesDisjunction->addNewAlternative();
+        m_pattern.m_disjunctions.append(parenthesesDisjunction.release());
      }
  
      void atomParentheticalAssertionBegin(bool invert = false)
      {
      }
  
      void atomParentheticalAssertionBegin(bool invert = false)
      {
-        PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative);
-        m_pattern.m_disjunctions.append(parenthesesDisjunction);
-        m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, false, invert));
+        OwnPtr<PatternDisjunction> parenthesesDisjunction = adoptPtr(new PatternDisjunction(m_alternative));
+        m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction.get(), false, invert));
          m_alternative = parenthesesDisjunction->addNewAlternative();
          m_invertParentheticalAssertion = invert;
          m_alternative = parenthesesDisjunction->addNewAlternative();
          m_invertParentheticalAssertion = invert;
+        m_pattern.m_disjunctions.append(parenthesesDisjunction.release());
      }
  
      void atomParenthesesEnd()
      }
  
      void atomParenthesesEnd()
@@ -435,23 +479,27 @@ public:
      // skip alternatives with m_startsWithBOL set true.
      PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false)
      {
      // skip alternatives with m_startsWithBOL set true.
      PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false)
      {
-        PatternDisjunction* newDisjunction = 0;
+        OwnPtr<PatternDisjunction> newDisjunction;
          for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
          for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
-            PatternAlternative* alternative = disjunction->m_alternatives[alt];
+            PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
              if (!filterStartsWithBOL || !alternative->m_startsWithBOL) {
                  if (!newDisjunction) {
              if (!filterStartsWithBOL || !alternative->m_startsWithBOL) {
                  if (!newDisjunction) {
-                    newDisjunction = new PatternDisjunction();
+                    newDisjunction = adoptPtr(new PatternDisjunction());
                      newDisjunction->m_parent = disjunction->m_parent;
                  }
                  PatternAlternative* newAlternative = newDisjunction->addNewAlternative();
                      newDisjunction->m_parent = disjunction->m_parent;
                  }
                  PatternAlternative* newAlternative = newDisjunction->addNewAlternative();
+                newAlternative->m_terms.reserveInitialCapacity(alternative->m_terms.size());
                  for (unsigned i = 0; i < alternative->m_terms.size(); ++i)
                      newAlternative->m_terms.append(copyTerm(alternative->m_terms[i], filterStartsWithBOL));
              }
          }
          
                  for (unsigned i = 0; i < alternative->m_terms.size(); ++i)
                      newAlternative->m_terms.append(copyTerm(alternative->m_terms[i], filterStartsWithBOL));
              }
          }
          
-        if (newDisjunction)
-            m_pattern.m_disjunctions.append(newDisjunction);
-        return newDisjunction;
+        if (!newDisjunction)
+            return 0;
+
+        PatternDisjunction* copiedDisjunction = newDisjunction.get();
+        m_pattern.m_disjunctions.append(newDisjunction.release());
+        return copiedDisjunction;
      }
      
      PatternTerm copyTerm(PatternTerm& term, bool filterStartsWithBOL = false)
      }
      
      PatternTerm copyTerm(PatternTerm& term, bool filterStartsWithBOL = false)
@@ -478,14 +526,23 @@ public:
          ASSERT(term.type > PatternTerm::TypeAssertionWordBoundary);
          ASSERT((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount));
  
          ASSERT(term.type > PatternTerm::TypeAssertionWordBoundary);
          ASSERT((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount));
  
-        // For any assertion with a zero minimum, not matching is valid and has no effect,
-        // remove it.  Otherwise, we need to match as least once, but there is no point
-        // matching more than once, so remove the quantifier.  It is not entirely clear
-        // from the spec whether or not this behavior is correct, but I believe this
-        // matches Firefox. :-/
          if (term.type == PatternTerm::TypeParentheticalAssertion) {
          if (term.type == PatternTerm::TypeParentheticalAssertion) {
+            // If an assertion is quantified with a minimum count of zero, it can simply be removed.
+            // This arises from the RepeatMatcher behaviour in the spec. Matching an assertion never
+            // results in any input being consumed, however the continuation passed to the assertion
+            // (called in steps, 8c and 9 of the RepeatMatcher definition, ES5.1 15.10.2.5) will
+            // reject all zero length matches (see step 2.1). A match from the continuation of the
+            // expression will still be accepted regardless (via steps 8a and 11) - the upshot of all
+            // this is that matches from the assertion are not required, and won't be accepted anyway,
+            // so no need to ever run it.
              if (!min)
                  m_alternative->removeLastTerm();
              if (!min)
                  m_alternative->removeLastTerm();
+            // We never need to run an assertion more than once. Subsequent interations will be run
+            // with the same start index (since assertions are non-capturing) and the same captures
+            // (per step 4 of RepeatMatcher in ES5.1 15.10.2.5), and as such will always produce the
+            // same result and captures. If the first match succeeds then the subsequent (min - 1)
+            // matches will too. Any additional optional matches will fail (on the same basis as the
+            // minimum zero quantified assertions, above), but this will still result in a match.
              return;
          }
  
              return;
          }
  
@@ -604,11 +661,13 @@ public:
          bool hasFixedSize = true;
  
          for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
          bool hasFixedSize = true;
  
          for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
-            PatternAlternative* alternative = disjunction->m_alternatives[alt];
+            PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
              unsigned currentAlternativeCallFrameSize = setupAlternativeOffsets(alternative, initialCallFrameSize, initialInputPosition);
              unsigned currentAlternativeCallFrameSize = setupAlternativeOffsets(alternative, initialCallFrameSize, initialInputPosition);
-            minimumInputSize = min(minimumInputSize, alternative->m_minimumSize);
-            maximumCallFrameSize = max(maximumCallFrameSize, currentAlternativeCallFrameSize);
+            minimumInputSize = std::min(minimumInputSize, alternative->m_minimumSize);
+            maximumCallFrameSize = std::max(maximumCallFrameSize, currentAlternativeCallFrameSize);
              hasFixedSize &= alternative->m_hasFixedSize;
              hasFixedSize &= alternative->m_hasFixedSize;
+            if (alternative->m_minimumSize > INT_MAX)
+                m_pattern.m_containsUnsignedLengthPattern = true;
          }
          
          ASSERT(minimumInputSize != UINT_MAX);
          }
          
          ASSERT(minimumInputSize != UINT_MAX);
@@ -639,7 +698,7 @@ public:
          if (m_pattern.m_numSubpatterns)
              return;
  
          if (m_pattern.m_numSubpatterns)
              return;
  
-        Vector<PatternAlternative*>& alternatives = m_pattern.m_body->m_alternatives;
+        Vector<OwnPtr<PatternAlternative>>& alternatives = m_pattern.m_body->m_alternatives;
          for (size_t i = 0; i < alternatives.size(); ++i) {
              Vector<PatternTerm>& terms = alternatives[i]->m_terms;
              if (terms.size()) {
          for (size_t i = 0; i < alternatives.size(); ++i) {
              Vector<PatternTerm>& terms = alternatives[i]->m_terms;
              if (terms.size()) {
@@ -674,7 +733,7 @@ public:
          if (loopDisjunction) {
              // Move alternatives from loopDisjunction to disjunction
              for (unsigned alt = 0; alt < loopDisjunction->m_alternatives.size(); ++alt)
          if (loopDisjunction) {
              // Move alternatives from loopDisjunction to disjunction
              for (unsigned alt = 0; alt < loopDisjunction->m_alternatives.size(); ++alt)
-                disjunction->m_alternatives.append(loopDisjunction->m_alternatives[alt]);
+                disjunction->m_alternatives.append(loopDisjunction->m_alternatives[alt].release());
                  
              loopDisjunction->m_alternatives.clear();
          }
                  
              loopDisjunction->m_alternatives.clear();
          }
@@ -693,7 +752,7 @@ public:
              if (term.type == PatternTerm::TypeParenthesesSubpattern) {
                  PatternDisjunction* nestedDisjunction = term.parentheses.disjunction;
                  for (unsigned alt = 0; alt < nestedDisjunction->m_alternatives.size(); ++alt) {
              if (term.type == PatternTerm::TypeParenthesesSubpattern) {
                  PatternDisjunction* nestedDisjunction = term.parentheses.disjunction;
                  for (unsigned alt = 0; alt < nestedDisjunction->m_alternatives.size(); ++alt) {
-                    if (containsCapturingTerms(nestedDisjunction->m_alternatives[alt], 0, nestedDisjunction->m_alternatives[alt]->m_terms.size() - 1))
+                    if (containsCapturingTerms(nestedDisjunction->m_alternatives[alt].get(), 0, nestedDisjunction->m_alternatives[alt]->m_terms.size() - 1))
                          return true;
                  }
              }
                          return true;
                  }
              }
@@ -709,11 +768,11 @@ public:
      // beginning and the end of the match.
      void optimizeDotStarWrappedExpressions()
      {
      // beginning and the end of the match.
      void optimizeDotStarWrappedExpressions()
      {
-        Vector<PatternAlternative*>& alternatives = m_pattern.m_body->m_alternatives;
+        Vector<OwnPtr<PatternAlternative>>& alternatives = m_pattern.m_body->m_alternatives;
          if (alternatives.size() != 1)
              return;
  
          if (alternatives.size() != 1)
              return;
  
-        PatternAlternative* alternative = alternatives[0];
+        PatternAlternative* alternative = alternatives[0].get();
          Vector<PatternTerm>& terms = alternative->m_terms;
          if (terms.size() >= 3) {
              bool startsWithBOL = false;
          Vector<PatternTerm>& terms = alternative->m_terms;
          if (terms.size() >= 3) {
              bool startsWithBOL = false;
@@ -769,7 +828,7 @@ private:
      bool m_invertParentheticalAssertion;
  };
  
      bool m_invertParentheticalAssertion;
  };
  
-const char* YarrPattern::compile(const UString& patternString)
+const char* YarrPattern::compile(const String& patternString)
  {
      YarrPatternConstructor constructor(*this);
  
  {
      YarrPatternConstructor constructor(*this);
  
@@ -802,11 +861,12 @@ const char* YarrPattern::compile(const UString& patternString)
      return 0;
  }
  
      return 0;
  }
  
-YarrPattern::YarrPattern(const UString& pattern, bool ignoreCase, bool multiline, const char** error)
+YarrPattern::YarrPattern(const String& pattern, bool ignoreCase, bool multiline, const char** error)
      : m_ignoreCase(ignoreCase)
      , m_multiline(multiline)
      , m_containsBackreferences(false)
      , m_containsBOL(false)
      : m_ignoreCase(ignoreCase)
      , m_multiline(multiline)
      , m_containsBackreferences(false)
      , m_containsBOL(false)
+    , m_containsUnsignedLengthPattern(false)
      , m_numSubpatterns(0)
      , m_maxBackReference(0)
      , newlineCached(0)
      , m_numSubpatterns(0)
      , m_maxBackReference(0)
      , newlineCached(0)