/*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
* Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
*
* Redistribution and use in source and binary forms, with or without
}
// Add multiple matches, if necessary.
- UCS2CanonicalizationRange* info = rangeInfoFor(ch);
+ const UCS2CanonicalizationRange* info = rangeInfoFor(ch);
if (info->type == CanonicalizeUnique)
addSorted(m_matchesUnicode, ch);
else
putUnicodeIgnoreCase(ch, info);
}
- void putUnicodeIgnoreCase(UChar ch, UCS2CanonicalizationRange* info)
+ void putUnicodeIgnoreCase(UChar ch, const UCS2CanonicalizationRange* info)
{
ASSERT(m_isCaseInsensitive);
ASSERT(ch > 0x7f);
ASSERT(ch >= info->begin && ch <= info->end);
ASSERT(info->type != CanonicalizeUnique);
if (info->type == CanonicalizeSet) {
- for (uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
+ for (const uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
addSorted(m_matchesUnicode, ch);
} else {
addSorted(m_matchesUnicode, ch);
if (!m_isCaseInsensitive)
return;
- UCS2CanonicalizationRange* info = rangeInfoFor(lo);
+ const UCS2CanonicalizationRange* info = rangeInfoFor(lo);
while (true) {
// Handle the range [lo .. end]
UChar end = std::min<UChar>(info->end, hi);
break;
case CanonicalizeSet: {
UChar ch;
- for (uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
+ for (const uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
addSorted(m_matchesUnicode, ch);
break;
}
}
- CharacterClass* charClass()
+ PassOwnPtr<CharacterClass> charClass()
{
- CharacterClass* characterClass = new CharacterClass(0);
+ OwnPtr<CharacterClass> characterClass = adoptPtr(new CharacterClass);
characterClass->m_matches.swap(m_matches);
characterClass->m_ranges.swap(m_ranges);
characterClass->m_matchesUnicode.swap(m_matchesUnicode);
characterClass->m_rangesUnicode.swap(m_rangesUnicode);
- return characterClass;
+ return characterClass.release();
}
private:
, m_characterClassConstructor(pattern.m_ignoreCase)
, m_invertParentheticalAssertion(false)
{
- m_pattern.m_body = new PatternDisjunction();
- m_alternative = m_pattern.m_body->addNewAlternative();
- m_pattern.m_disjunctions.append(m_pattern.m_body);
+ OwnPtr<PatternDisjunction> body = adoptPtr(new PatternDisjunction);
+ m_pattern.m_body = body.get();
+ m_alternative = body->addNewAlternative();
+ m_pattern.m_disjunctions.append(body.release());
}
~YarrPatternConstructor()
m_pattern.reset();
m_characterClassConstructor.reset();
- m_pattern.m_body = new PatternDisjunction();
- m_alternative = m_pattern.m_body->addNewAlternative();
- m_pattern.m_disjunctions.append(m_pattern.m_body);
+ OwnPtr<PatternDisjunction> body = adoptPtr(new PatternDisjunction);
+ m_pattern.m_body = body.get();
+ m_alternative = body->addNewAlternative();
+ m_pattern.m_disjunctions.append(body.release());
}
void assertionBOL()
return;
}
- UCS2CanonicalizationRange* info = rangeInfoFor(ch);
+ const UCS2CanonicalizationRange* info = rangeInfoFor(ch);
if (info->type == CanonicalizeUnique) {
m_alternative->m_terms.append(PatternTerm(ch));
return;
}
m_characterClassConstructor.putUnicodeIgnoreCase(ch, info);
- CharacterClass* newCharacterClass = m_characterClassConstructor.charClass();
- m_pattern.m_userCharacterClasses.append(newCharacterClass);
- m_alternative->m_terms.append(PatternTerm(newCharacterClass, false));
+ OwnPtr<CharacterClass> newCharacterClass = m_characterClassConstructor.charClass();
+ m_alternative->m_terms.append(PatternTerm(newCharacterClass.get(), false));
+ m_pattern.m_userCharacterClasses.append(newCharacterClass.release());
}
void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
break;
default:
- ASSERT_NOT_REACHED();
+ RELEASE_ASSERT_NOT_REACHED();
}
}
void atomCharacterClassEnd()
{
- CharacterClass* newCharacterClass = m_characterClassConstructor.charClass();
- m_pattern.m_userCharacterClasses.append(newCharacterClass);
- m_alternative->m_terms.append(PatternTerm(newCharacterClass, m_invertCharacterClass));
+ OwnPtr<CharacterClass> newCharacterClass = m_characterClassConstructor.charClass();
+ m_alternative->m_terms.append(PatternTerm(newCharacterClass.get(), m_invertCharacterClass));
+ m_pattern.m_userCharacterClasses.append(newCharacterClass.release());
}
void atomParenthesesSubpatternBegin(bool capture = true)
if (capture)
m_pattern.m_numSubpatterns++;
- PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative);
- m_pattern.m_disjunctions.append(parenthesesDisjunction);
- m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, false));
+ OwnPtr<PatternDisjunction> parenthesesDisjunction = adoptPtr(new PatternDisjunction(m_alternative));
+ m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, false));
m_alternative = parenthesesDisjunction->addNewAlternative();
+ m_pattern.m_disjunctions.append(parenthesesDisjunction.release());
}
void atomParentheticalAssertionBegin(bool invert = false)
{
- PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative);
- m_pattern.m_disjunctions.append(parenthesesDisjunction);
- m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, false, invert));
+ OwnPtr<PatternDisjunction> parenthesesDisjunction = adoptPtr(new PatternDisjunction(m_alternative));
+ m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction.get(), false, invert));
m_alternative = parenthesesDisjunction->addNewAlternative();
m_invertParentheticalAssertion = invert;
+ m_pattern.m_disjunctions.append(parenthesesDisjunction.release());
}
void atomParenthesesEnd()
// skip alternatives with m_startsWithBOL set true.
PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false)
{
- PatternDisjunction* newDisjunction = 0;
+ OwnPtr<PatternDisjunction> newDisjunction;
for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
- PatternAlternative* alternative = disjunction->m_alternatives[alt];
+ PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
if (!filterStartsWithBOL || !alternative->m_startsWithBOL) {
if (!newDisjunction) {
- newDisjunction = new PatternDisjunction();
+ newDisjunction = adoptPtr(new PatternDisjunction());
newDisjunction->m_parent = disjunction->m_parent;
}
PatternAlternative* newAlternative = newDisjunction->addNewAlternative();
+ newAlternative->m_terms.reserveInitialCapacity(alternative->m_terms.size());
for (unsigned i = 0; i < alternative->m_terms.size(); ++i)
newAlternative->m_terms.append(copyTerm(alternative->m_terms[i], filterStartsWithBOL));
}
}
- if (newDisjunction)
- m_pattern.m_disjunctions.append(newDisjunction);
- return newDisjunction;
+ if (!newDisjunction)
+ return 0;
+
+ PatternDisjunction* copiedDisjunction = newDisjunction.get();
+ m_pattern.m_disjunctions.append(newDisjunction.release());
+ return copiedDisjunction;
}
PatternTerm copyTerm(PatternTerm& term, bool filterStartsWithBOL = false)
bool hasFixedSize = true;
for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
- PatternAlternative* alternative = disjunction->m_alternatives[alt];
+ PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
unsigned currentAlternativeCallFrameSize = setupAlternativeOffsets(alternative, initialCallFrameSize, initialInputPosition);
- minimumInputSize = min(minimumInputSize, alternative->m_minimumSize);
- maximumCallFrameSize = max(maximumCallFrameSize, currentAlternativeCallFrameSize);
+ minimumInputSize = std::min(minimumInputSize, alternative->m_minimumSize);
+ maximumCallFrameSize = std::max(maximumCallFrameSize, currentAlternativeCallFrameSize);
hasFixedSize &= alternative->m_hasFixedSize;
+ if (alternative->m_minimumSize > INT_MAX)
+ m_pattern.m_containsUnsignedLengthPattern = true;
}
ASSERT(minimumInputSize != UINT_MAX);
if (m_pattern.m_numSubpatterns)
return;
- Vector<PatternAlternative*>& alternatives = m_pattern.m_body->m_alternatives;
+ Vector<OwnPtr<PatternAlternative>>& alternatives = m_pattern.m_body->m_alternatives;
for (size_t i = 0; i < alternatives.size(); ++i) {
Vector<PatternTerm>& terms = alternatives[i]->m_terms;
if (terms.size()) {
if (loopDisjunction) {
// Move alternatives from loopDisjunction to disjunction
for (unsigned alt = 0; alt < loopDisjunction->m_alternatives.size(); ++alt)
- disjunction->m_alternatives.append(loopDisjunction->m_alternatives[alt]);
+ disjunction->m_alternatives.append(loopDisjunction->m_alternatives[alt].release());
loopDisjunction->m_alternatives.clear();
}
if (term.type == PatternTerm::TypeParenthesesSubpattern) {
PatternDisjunction* nestedDisjunction = term.parentheses.disjunction;
for (unsigned alt = 0; alt < nestedDisjunction->m_alternatives.size(); ++alt) {
- if (containsCapturingTerms(nestedDisjunction->m_alternatives[alt], 0, nestedDisjunction->m_alternatives[alt]->m_terms.size() - 1))
+ if (containsCapturingTerms(nestedDisjunction->m_alternatives[alt].get(), 0, nestedDisjunction->m_alternatives[alt]->m_terms.size() - 1))
return true;
}
}
// beginning and the end of the match.
void optimizeDotStarWrappedExpressions()
{
- Vector<PatternAlternative*>& alternatives = m_pattern.m_body->m_alternatives;
+ Vector<OwnPtr<PatternAlternative>>& alternatives = m_pattern.m_body->m_alternatives;
if (alternatives.size() != 1)
return;
- PatternAlternative* alternative = alternatives[0];
+ PatternAlternative* alternative = alternatives[0].get();
Vector<PatternTerm>& terms = alternative->m_terms;
if (terms.size() >= 3) {
bool startsWithBOL = false;
bool m_invertParentheticalAssertion;
};
-const char* YarrPattern::compile(const UString& patternString)
+const char* YarrPattern::compile(const String& patternString)
{
YarrPatternConstructor constructor(*this);
return 0;
}
-YarrPattern::YarrPattern(const UString& pattern, bool ignoreCase, bool multiline, const char** error)
+YarrPattern::YarrPattern(const String& pattern, bool ignoreCase, bool multiline, const char** error)
: m_ignoreCase(ignoreCase)
, m_multiline(multiline)
, m_containsBackreferences(false)
, m_containsBOL(false)
+ , m_containsUnsignedLengthPattern(false)
, m_numSubpatterns(0)
, m_maxBackReference(0)
, newlineCached(0)