X-Git-Url: https://git.saurik.com/apple/javascriptcore.git/blobdiff_plain/6fe7ccc865dc7d7541b93c5bcaf6368d2c98a174..ed1e77d3adeb83d26fd1dfb16dd84cabdcefd250:/runtime/RegExp.cpp?ds=sidebyside diff --git a/runtime/RegExp.cpp b/runtime/RegExp.cpp index b0f6760..af1f2fa 100644 --- a/runtime/RegExp.cpp +++ b/runtime/RegExp.cpp @@ -24,23 +24,25 @@ #include "RegExp.h" #include "Lexer.h" +#include "JSCInlines.h" #include "RegExpCache.h" -#include "yarr/Yarr.h" -#include "yarr/YarrJIT.h" -#include -#include -#include +#include "Yarr.h" +#include "YarrJIT.h" #include -#include - #define REGEXP_FUNC_TEST_DATA_GEN 0 +#if REGEXP_FUNC_TEST_DATA_GEN +#include +#include +#include +#endif + namespace JSC { -const ClassInfo RegExp::s_info = { "RegExp", 0, 0, 0, CREATE_METHOD_TABLE(RegExp) }; +const ClassInfo RegExp::s_info = { "RegExp", 0, 0, CREATE_METHOD_TABLE(RegExp) }; -RegExpFlags regExpFlags(const UString& string) +RegExpFlags regExpFlags(const String& string) { RegExpFlags flags = NoFlags; @@ -83,7 +85,7 @@ public: ~RegExpFunctionalTestCollector(); - void outputOneTest(RegExp*, UString, int, int*, int); + void outputOneTest(RegExp*, String, int, int*, int); void clearRegExp(RegExp* regExp) { if (regExp == m_lastRegExp) @@ -93,7 +95,7 @@ public: private: RegExpFunctionalTestCollector(); - void outputEscapedUString(const UString&, bool escapeSlash = false); + void outputEscapedString(const String&, bool escapeSlash = false); static RegExpFunctionalTestCollector* s_instance; FILE* m_file; @@ -111,12 +113,12 @@ RegExpFunctionalTestCollector* RegExpFunctionalTestCollector::get() return s_instance; } -void RegExpFunctionalTestCollector::outputOneTest(RegExp* regExp, UString s, int startOffset, int* ovector, int result) +void RegExpFunctionalTestCollector::outputOneTest(RegExp* regExp, const String& s, int startOffset, int* ovector, int result) { if ((!m_lastRegExp) || (m_lastRegExp != regExp)) { m_lastRegExp = regExp; fputc('/', m_file); - outputEscapedUString(regExp->pattern(), true); + outputEscapedString(regExp->pattern(), true); fputc('/', m_file); if (regExp->global()) fputc('g', m_file); @@ -128,7 +130,7 @@ void RegExpFunctionalTestCollector::outputOneTest(RegExp* regExp, UString s, int } fprintf(m_file, " \""); - outputEscapedUString(s); + outputEscapedString(s); fprintf(m_file, "\", %d, %d, (", startOffset, result); for (unsigned i = 0; i <= regExp->numSubpatterns(); i++) { int subpatternBegin = ovector[i * 2]; @@ -159,7 +161,7 @@ RegExpFunctionalTestCollector::~RegExpFunctionalTestCollector() s_instance = 0; } -void RegExpFunctionalTestCollector::outputEscapedUString(const UString& s, bool escapeSlash) +void RegExpFunctionalTestCollector::outputEscapedString(const String& s, bool escapeSlash) { int len = s.length(); @@ -217,23 +219,27 @@ void RegExpFunctionalTestCollector::outputEscapedUString(const UString& s, bool } #endif -RegExp::RegExp(JSGlobalData& globalData, const UString& patternString, RegExpFlags flags) - : JSCell(globalData, globalData.regExpStructure.get()) +RegExp::RegExp(VM& vm, const String& patternString, RegExpFlags flags) + : JSCell(vm, vm.regExpStructure.get()) , m_state(NotCompiled) , m_patternString(patternString) , m_flags(flags) , m_constructionError(0) , m_numSubpatterns(0) #if ENABLE(REGEXP_TRACING) + , m_rtMatchOnlyTotalSubjectStringLen(0.0) + , m_rtMatchTotalSubjectStringLen(0.0) + , m_rtMatchOnlyCallCount(0) + , m_rtMatchOnlyFoundCount(0) , m_rtMatchCallCount(0) , m_rtMatchFoundCount(0) #endif { } -void RegExp::finishCreation(JSGlobalData& globalData) +void RegExp::finishCreation(VM& vm) { - Base::finishCreation(globalData); + Base::finishCreation(vm); Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError); if (m_constructionError) m_state = ParseError; @@ -243,64 +249,60 @@ void RegExp::finishCreation(JSGlobalData& globalData) void RegExp::destroy(JSCell* cell) { - RegExp* thisObject = jsCast(cell); + RegExp* thisObject = static_cast(cell); #if REGEXP_FUNC_TEST_DATA_GEN RegExpFunctionalTestCollector::get()->clearRegExp(this); #endif thisObject->RegExp::~RegExp(); } -RegExp* RegExp::createWithoutCaching(JSGlobalData& globalData, const UString& patternString, RegExpFlags flags) +RegExp* RegExp::createWithoutCaching(VM& vm, const String& patternString, RegExpFlags flags) { - RegExp* regExp = new (NotNull, allocateCell(globalData.heap)) RegExp(globalData, patternString, flags); - regExp->finishCreation(globalData); + RegExp* regExp = new (NotNull, allocateCell(vm.heap)) RegExp(vm, patternString, flags); + regExp->finishCreation(vm); return regExp; } -RegExp* RegExp::create(JSGlobalData& globalData, const UString& patternString, RegExpFlags flags) +RegExp* RegExp::create(VM& vm, const String& patternString, RegExpFlags flags) { - return globalData.regExpCache()->lookupOrCreate(patternString, flags); + return vm.regExpCache()->lookupOrCreate(patternString, flags); } -void RegExp::compile(JSGlobalData* globalData, Yarr::YarrCharSize charSize) +void RegExp::compile(VM* vm, Yarr::YarrCharSize charSize) { Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError); if (m_constructionError) { - ASSERT_NOT_REACHED(); + RELEASE_ASSERT_NOT_REACHED(); +#if COMPILER_QUIRK(CONSIDERS_UNREACHABLE_CODE) m_state = ParseError; return; +#endif } ASSERT(m_numSubpatterns == pattern.m_numSubpatterns); if (!hasCode()) { ASSERT(m_state == NotCompiled); - globalData->regExpCache()->addToStrongCache(this); + vm->regExpCache()->addToStrongCache(this); m_state = ByteCode; } #if ENABLE(YARR_JIT) - if (!pattern.m_containsBackreferences && globalData->canUseRegExpJIT()) { - Yarr::jitCompile(pattern, charSize, globalData, m_regExpJITCode); -#if ENABLE(YARR_JIT_DEBUG) - if (!m_regExpJITCode.isFallBack()) - m_state = JITCode; - else - m_state = ByteCode; -#else + if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && vm->canUseRegExpJIT()) { + Yarr::jitCompile(pattern, charSize, vm, m_regExpJITCode); if (!m_regExpJITCode.isFallBack()) { m_state = JITCode; return; } -#endif } #else UNUSED_PARAM(charSize); #endif - m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator); + m_state = ByteCode; + m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator); } -void RegExp::compileIfNecessary(JSGlobalData& globalData, Yarr::YarrCharSize charSize) +void RegExp::compileIfNecessary(VM& vm, Yarr::YarrCharSize charSize) { if (hasCode()) { #if ENABLE(YARR_JIT) @@ -315,17 +317,18 @@ void RegExp::compileIfNecessary(JSGlobalData& globalData, Yarr::YarrCharSize cha #endif } - compile(&globalData, charSize); + compile(&vm, charSize); } -int RegExp::match(JSGlobalData& globalData, const UString& s, unsigned startOffset, Vector& ovector) +int RegExp::match(VM& vm, const String& s, unsigned startOffset, Vector& ovector) { #if ENABLE(REGEXP_TRACING) m_rtMatchCallCount++; + m_rtMatchTotalSubjectStringLen += (double)(s.length() - startOffset); #endif ASSERT(m_state != ParseError); - compileIfNecessary(globalData, s.is8Bit() ? Yarr::Char8 : Yarr::Char16); + compileIfNecessary(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16); int offsetVectorSize = (m_numSubpatterns + 1) * 2; ovector.resize(offsetVectorSize); @@ -350,8 +353,8 @@ int RegExp::match(JSGlobalData& globalData, const UString& s, unsigned startOffs // The offset vector handling needs to change as well. // Right now we convert a match where the offsets overflowed into match failure. // There are two places in WebCore that call the interpreter directly that need to - // have their offsets changed to int as well. They are platform/text/RegularExpression.cpp - // and inspector/ContentSearchUtils.cpp. + // have their offsets changed to int as well. They are yarr/RegularExpression.cpp + // and inspector/ContentSearchUtilities.cpp if (s.length() > INT_MAX) { bool overflowed = false; @@ -384,45 +387,41 @@ int RegExp::match(JSGlobalData& globalData, const UString& s, unsigned startOffs return result; } -void RegExp::compileMatchOnly(JSGlobalData* globalData, Yarr::YarrCharSize charSize) +void RegExp::compileMatchOnly(VM* vm, Yarr::YarrCharSize charSize) { Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError); if (m_constructionError) { - ASSERT_NOT_REACHED(); + RELEASE_ASSERT_NOT_REACHED(); +#if COMPILER_QUIRK(CONSIDERS_UNREACHABLE_CODE) m_state = ParseError; return; +#endif } ASSERT(m_numSubpatterns == pattern.m_numSubpatterns); if (!hasCode()) { ASSERT(m_state == NotCompiled); - globalData->regExpCache()->addToStrongCache(this); + vm->regExpCache()->addToStrongCache(this); m_state = ByteCode; } #if ENABLE(YARR_JIT) - if (!pattern.m_containsBackreferences && globalData->canUseRegExpJIT()) { - Yarr::jitCompile(pattern, charSize, globalData, m_regExpJITCode, Yarr::MatchOnly); -#if ENABLE(YARR_JIT_DEBUG) - if (!m_regExpJITCode.isFallBack()) - m_state = JITCode; - else - m_state = ByteCode; -#else + if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && vm->canUseRegExpJIT()) { + Yarr::jitCompile(pattern, charSize, vm, m_regExpJITCode, Yarr::MatchOnly); if (!m_regExpJITCode.isFallBack()) { m_state = JITCode; return; } -#endif } #else UNUSED_PARAM(charSize); #endif - m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator); + m_state = ByteCode; + m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator); } -void RegExp::compileIfNecessaryMatchOnly(JSGlobalData& globalData, Yarr::YarrCharSize charSize) +void RegExp::compileIfNecessaryMatchOnly(VM& vm, Yarr::YarrCharSize charSize) { if (hasCode()) { #if ENABLE(YARR_JIT) @@ -437,17 +436,18 @@ void RegExp::compileIfNecessaryMatchOnly(JSGlobalData& globalData, Yarr::YarrCha #endif } - compileMatchOnly(&globalData, charSize); + compileMatchOnly(&vm, charSize); } -MatchResult RegExp::match(JSGlobalData& globalData, const UString& s, unsigned startOffset) +MatchResult RegExp::match(VM& vm, const String& s, unsigned startOffset) { #if ENABLE(REGEXP_TRACING) - m_rtMatchCallCount++; + m_rtMatchOnlyCallCount++; + m_rtMatchOnlyTotalSubjectStringLen += (double)(s.length() - startOffset); #endif ASSERT(m_state != ParseError); - compileIfNecessaryMatchOnly(globalData, s.is8Bit() ? Yarr::Char8 : Yarr::Char16); + compileIfNecessaryMatchOnly(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16); #if ENABLE(YARR_JIT) if (m_state == JITCode) { @@ -456,7 +456,7 @@ MatchResult RegExp::match(JSGlobalData& globalData, const UString& s, unsigned s m_regExpJITCode.execute(s.characters16(), startOffset, s.length()); #if ENABLE(REGEXP_TRACING) if (!result) - m_rtMatchFoundCount++; + m_rtMatchOnlyFoundCount++; #endif return result; } @@ -474,7 +474,7 @@ MatchResult RegExp::match(JSGlobalData& globalData, const UString& s, unsigned s if (r >= 0) { #if ENABLE(REGEXP_TRACING) - m_rtMatchFoundCount++; + m_rtMatchOnlyFoundCount++; #endif return MatchResult(r, reinterpret_cast(offsetVector)[1]); } @@ -490,11 +490,11 @@ void RegExp::invalidateCode() #if ENABLE(YARR_JIT) m_regExpJITCode.clear(); #endif - m_regExpBytecode.clear(); + m_regExpBytecode = nullptr; } #if ENABLE(YARR_JIT_DEBUG) -void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* offsetVector, int jitResult) +void RegExp::matchCompareWithInterpreter(const String& s, int startOffset, int* offsetVector, int jitResult) { int offsetVectorSize = (m_numSubpatterns + 1) * 2; Vector interpreterOvector; @@ -520,24 +520,24 @@ void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* differences++; if (differences) { - dataLog("RegExp Discrepency for /%s/\n string input ", pattern().utf8().data()); + dataLogF("RegExp Discrepency for /%s/\n string input ", pattern().utf8().data()); unsigned segmentLen = s.length() - static_cast(startOffset); - dataLog((segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset); + dataLogF((segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset); if (jitResult != interpreterResult) { - dataLog(" JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult); + dataLogF(" JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult); differences--; } else { - dataLog(" Correct result = %d\n", jitResult); + dataLogF(" Correct result = %d\n", jitResult); } if (differences) { for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) { if (offsetVector[j] != interpreterOffsetVector[j]) - dataLog(" JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]); + dataLogF(" JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]); if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1])) - dataLog(" JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]); + dataLogF(" JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]); } } } @@ -561,16 +561,32 @@ void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* Yarr::YarrCodeBlock& codeBlock = m_regExpJITCode; const size_t jitAddrSize = 20; - char jitAddr[jitAddrSize]; - if (m_state == JITCode) - snprintf(jitAddr, jitAddrSize, "fallback"); - else - snprintf(jitAddr, jitAddrSize, "0x%014lx", reinterpret_cast(codeBlock.getAddr())); + char jit8BitMatchOnlyAddr[jitAddrSize]; + char jit16BitMatchOnlyAddr[jitAddrSize]; + char jit8BitMatchAddr[jitAddrSize]; + char jit16BitMatchAddr[jitAddrSize]; + if (m_state == ByteCode) { + snprintf(jit8BitMatchOnlyAddr, jitAddrSize, "fallback "); + snprintf(jit16BitMatchOnlyAddr, jitAddrSize, "---- "); + snprintf(jit8BitMatchAddr, jitAddrSize, "fallback "); + snprintf(jit16BitMatchAddr, jitAddrSize, "---- "); + } else { + snprintf(jit8BitMatchOnlyAddr, jitAddrSize, "0x%014lx", reinterpret_cast(codeBlock.get8BitMatchOnlyAddr())); + snprintf(jit16BitMatchOnlyAddr, jitAddrSize, "0x%014lx", reinterpret_cast(codeBlock.get16BitMatchOnlyAddr())); + snprintf(jit8BitMatchAddr, jitAddrSize, "0x%014lx", reinterpret_cast(codeBlock.get8BitMatchAddr())); + snprintf(jit16BitMatchAddr, jitAddrSize, "0x%014lx", reinterpret_cast(codeBlock.get16BitMatchAddr())); + } #else - const char* jitAddr = "JIT Off"; + const char* jit8BitMatchOnlyAddr = "JIT Off"; + const char* jit16BitMatchOnlyAddr = ""; + const char* jit8BitMatchAddr = "JIT Off"; + const char* jit16BitMatchAddr = ""; #endif + unsigned averageMatchOnlyStringLen = (unsigned)(m_rtMatchOnlyTotalSubjectStringLen / m_rtMatchOnlyCallCount); + unsigned averageMatchStringLen = (unsigned)(m_rtMatchTotalSubjectStringLen / m_rtMatchCallCount); - printf("%-40.40s %16.16s %10d %10d\n", formattedPattern, jitAddr, m_rtMatchCallCount, m_rtMatchFoundCount); + printf("%-40.40s %16.16s %16.16s %10d %10d %10u\n", formattedPattern, jit8BitMatchOnlyAddr, jit16BitMatchOnlyAddr, m_rtMatchOnlyCallCount, m_rtMatchOnlyFoundCount, averageMatchOnlyStringLen); + printf(" %16.16s %16.16s %10d %10d %10u\n", jit8BitMatchAddr, jit16BitMatchAddr, m_rtMatchCallCount, m_rtMatchFoundCount, averageMatchStringLen); } #endif