X-Git-Url: https://git.saurik.com/apple/javascriptcore.git/blobdiff_plain/14957cd040308e3eeec43d26bae5d76da13fcd85..ed1e77d3adeb83d26fd1dfb16dd84cabdcefd250:/runtime/RegExp.cpp?ds=inline diff --git a/runtime/RegExp.cpp b/runtime/RegExp.cpp index 9211a90..af1f2fa 100644 --- a/runtime/RegExp.cpp +++ b/runtime/RegExp.cpp @@ -24,25 +24,30 @@ #include "RegExp.h" #include "Lexer.h" +#include "JSCInlines.h" #include "RegExpCache.h" -#include "yarr/Yarr.h" -#include "yarr/YarrJIT.h" +#include "Yarr.h" +#include "YarrJIT.h" +#include + +#define REGEXP_FUNC_TEST_DATA_GEN 0 + +#if REGEXP_FUNC_TEST_DATA_GEN #include #include #include -#include -#include +#endif namespace JSC { -const ClassInfo RegExp::s_info = { "RegExp", 0, 0, 0 }; +const ClassInfo RegExp::s_info = { "RegExp", 0, 0, CREATE_METHOD_TABLE(RegExp) }; -RegExpFlags regExpFlags(const UString& string) +RegExpFlags regExpFlags(const String& string) { RegExpFlags flags = NoFlags; for (unsigned i = 0; i < string.length(); ++i) { - switch (string.characters()[i]) { + switch (string[i]) { case 'g': if (flags & FlagGlobal) return InvalidFlags; @@ -68,26 +73,173 @@ RegExpFlags regExpFlags(const UString& string) return flags; } - -struct RegExpRepresentation { -#if ENABLE(YARR_JIT) - Yarr::YarrCodeBlock m_regExpJITCode; -#endif - OwnPtr m_regExpBytecode; + +#if REGEXP_FUNC_TEST_DATA_GEN +class RegExpFunctionalTestCollector { + // This class is not thread safe. +protected: + static const char* const s_fileName; + +public: + static RegExpFunctionalTestCollector* get(); + + ~RegExpFunctionalTestCollector(); + + void outputOneTest(RegExp*, String, int, int*, int); + void clearRegExp(RegExp* regExp) + { + if (regExp == m_lastRegExp) + m_lastRegExp = 0; + } + +private: + RegExpFunctionalTestCollector(); + + void outputEscapedString(const String&, bool escapeSlash = false); + + static RegExpFunctionalTestCollector* s_instance; + FILE* m_file; + RegExp* m_lastRegExp; }; -RegExp::RegExp(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags) - : JSCell(*globalData, globalData->regExpStructure.get()) +const char* const RegExpFunctionalTestCollector::s_fileName = "/tmp/RegExpTestsData"; +RegExpFunctionalTestCollector* RegExpFunctionalTestCollector::s_instance = 0; + +RegExpFunctionalTestCollector* RegExpFunctionalTestCollector::get() +{ + if (!s_instance) + s_instance = new RegExpFunctionalTestCollector(); + + return s_instance; +} + +void RegExpFunctionalTestCollector::outputOneTest(RegExp* regExp, const String& s, int startOffset, int* ovector, int result) +{ + if ((!m_lastRegExp) || (m_lastRegExp != regExp)) { + m_lastRegExp = regExp; + fputc('/', m_file); + outputEscapedString(regExp->pattern(), true); + fputc('/', m_file); + if (regExp->global()) + fputc('g', m_file); + if (regExp->ignoreCase()) + fputc('i', m_file); + if (regExp->multiline()) + fputc('m', m_file); + fprintf(m_file, "\n"); + } + + fprintf(m_file, " \""); + outputEscapedString(s); + fprintf(m_file, "\", %d, %d, (", startOffset, result); + for (unsigned i = 0; i <= regExp->numSubpatterns(); i++) { + int subpatternBegin = ovector[i * 2]; + int subpatternEnd = ovector[i * 2 + 1]; + if (subpatternBegin == -1) + subpatternEnd = -1; + fprintf(m_file, "%d, %d", subpatternBegin, subpatternEnd); + if (i < regExp->numSubpatterns()) + fputs(", ", m_file); + } + + fprintf(m_file, ")\n"); + fflush(m_file); +} + +RegExpFunctionalTestCollector::RegExpFunctionalTestCollector() +{ + m_file = fopen(s_fileName, "r+"); + if (!m_file) + m_file = fopen(s_fileName, "w+"); + + fseek(m_file, 0L, SEEK_END); +} + +RegExpFunctionalTestCollector::~RegExpFunctionalTestCollector() +{ + fclose(m_file); + s_instance = 0; +} + +void RegExpFunctionalTestCollector::outputEscapedString(const String& s, bool escapeSlash) +{ + int len = s.length(); + + for (int i = 0; i < len; ++i) { + UChar c = s[i]; + + switch (c) { + case '\0': + fputs("\\0", m_file); + break; + case '\a': + fputs("\\a", m_file); + break; + case '\b': + fputs("\\b", m_file); + break; + case '\f': + fputs("\\f", m_file); + break; + case '\n': + fputs("\\n", m_file); + break; + case '\r': + fputs("\\r", m_file); + break; + case '\t': + fputs("\\t", m_file); + break; + case '\v': + fputs("\\v", m_file); + break; + case '/': + if (escapeSlash) + fputs("\\/", m_file); + else + fputs("/", m_file); + break; + case '\"': + fputs("\\\"", m_file); + break; + case '\\': + fputs("\\\\", m_file); + break; + case '\?': + fputs("\?", m_file); + break; + default: + if (c > 0x7f) + fprintf(m_file, "\\u%04x", c); + else + fputc(c, m_file); + break; + } + } +} +#endif + +RegExp::RegExp(VM& vm, const String& patternString, RegExpFlags flags) + : JSCell(vm, vm.regExpStructure.get()) , m_state(NotCompiled) , m_patternString(patternString) , m_flags(flags) , m_constructionError(0) , m_numSubpatterns(0) #if ENABLE(REGEXP_TRACING) + , m_rtMatchOnlyTotalSubjectStringLen(0.0) + , m_rtMatchTotalSubjectStringLen(0.0) + , m_rtMatchOnlyCallCount(0) + , m_rtMatchOnlyFoundCount(0) , m_rtMatchCallCount(0) , m_rtMatchFoundCount(0) #endif { +} + +void RegExp::finishCreation(VM& vm) +{ + Base::finishCreation(vm); Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError); if (m_constructionError) m_state = ParseError; @@ -95,119 +247,254 @@ RegExp::RegExp(JSGlobalData* globalData, const UString& patternString, RegExpFla m_numSubpatterns = pattern.m_numSubpatterns; } -RegExp::~RegExp() +void RegExp::destroy(JSCell* cell) +{ + RegExp* thisObject = static_cast(cell); +#if REGEXP_FUNC_TEST_DATA_GEN + RegExpFunctionalTestCollector::get()->clearRegExp(this); +#endif + thisObject->RegExp::~RegExp(); +} + +RegExp* RegExp::createWithoutCaching(VM& vm, const String& patternString, RegExpFlags flags) { + RegExp* regExp = new (NotNull, allocateCell(vm.heap)) RegExp(vm, patternString, flags); + regExp->finishCreation(vm); + return regExp; } -RegExp* RegExp::create(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags) +RegExp* RegExp::create(VM& vm, const String& patternString, RegExpFlags flags) { - return globalData->regExpCache()->lookupOrCreate(patternString, flags); + return vm.regExpCache()->lookupOrCreate(patternString, flags); } -void RegExp::compile(JSGlobalData* globalData) +void RegExp::compile(VM* vm, Yarr::YarrCharSize charSize) { - ASSERT(m_state == NotCompiled); - m_representation = adoptPtr(new RegExpRepresentation); - m_state = Compiling; Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError); if (m_constructionError) { - ASSERT_NOT_REACHED(); + RELEASE_ASSERT_NOT_REACHED(); +#if COMPILER_QUIRK(CONSIDERS_UNREACHABLE_CODE) m_state = ParseError; return; +#endif } - - globalData->regExpCache()->addToStrongCache(this); - ASSERT(m_numSubpatterns == pattern.m_numSubpatterns); + if (!hasCode()) { + ASSERT(m_state == NotCompiled); + vm->regExpCache()->addToStrongCache(this); + m_state = ByteCode; + } + #if ENABLE(YARR_JIT) - if (!pattern.m_containsBackreferences && globalData->canUseJIT()) { - Yarr::jitCompile(pattern, globalData, m_representation->m_regExpJITCode); -#if ENABLE(YARR_JIT_DEBUG) - if (!m_representation->m_regExpJITCode.isFallBack()) - m_state = JITCode; - else - m_state = ByteCode; -#else - if (!m_representation->m_regExpJITCode.isFallBack()) { + if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && vm->canUseRegExpJIT()) { + Yarr::jitCompile(pattern, charSize, vm, m_regExpJITCode); + if (!m_regExpJITCode.isFallBack()) { m_state = JITCode; return; } -#endif } +#else + UNUSED_PARAM(charSize); #endif - m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator); - m_state = ByteCode; + m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator); } -int RegExp::match(JSGlobalData& globalData, const UString& s, int startOffset, Vector* ovector) +void RegExp::compileIfNecessary(VM& vm, Yarr::YarrCharSize charSize) { - if (startOffset < 0) - startOffset = 0; + if (hasCode()) { +#if ENABLE(YARR_JIT) + if (m_state != JITCode) + return; + if ((charSize == Yarr::Char8) && (m_regExpJITCode.has8BitCode())) + return; + if ((charSize == Yarr::Char16) && (m_regExpJITCode.has16BitCode())) + return; +#else + return; +#endif + } + + compile(&vm, charSize); +} +int RegExp::match(VM& vm, const String& s, unsigned startOffset, Vector& ovector) +{ #if ENABLE(REGEXP_TRACING) m_rtMatchCallCount++; + m_rtMatchTotalSubjectStringLen += (double)(s.length() - startOffset); #endif - if (static_cast(startOffset) > s.length() || s.isNull()) - return -1; + ASSERT(m_state != ParseError); + compileIfNecessary(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16); - if (m_state != ParseError) { - compileIfNecessary(globalData); + int offsetVectorSize = (m_numSubpatterns + 1) * 2; + ovector.resize(offsetVectorSize); + int* offsetVector = ovector.data(); - int offsetVectorSize = (m_numSubpatterns + 1) * 2; - int* offsetVector; - Vector nonReturnedOvector; - if (ovector) { - ovector->resize(offsetVectorSize); - offsetVector = ovector->data(); - } else { - nonReturnedOvector.resize(offsetVectorSize); - offsetVector = nonReturnedOvector.data(); + int result; +#if ENABLE(YARR_JIT) + if (m_state == JITCode) { + if (s.is8Bit()) + result = m_regExpJITCode.execute(s.characters8(), startOffset, s.length(), offsetVector).start; + else + result = m_regExpJITCode.execute(s.characters16(), startOffset, s.length(), offsetVector).start; +#if ENABLE(YARR_JIT_DEBUG) + matchCompareWithInterpreter(s, startOffset, offsetVector, result); +#endif + } else +#endif + result = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast(offsetVector)); + + // FIXME: The YARR engine should handle unsigned or size_t length matches. + // The YARR Interpreter is "unsigned" clean, while the YARR JIT hasn't been addressed. + // The offset vector handling needs to change as well. + // Right now we convert a match where the offsets overflowed into match failure. + // There are two places in WebCore that call the interpreter directly that need to + // have their offsets changed to int as well. They are yarr/RegularExpression.cpp + // and inspector/ContentSearchUtilities.cpp + if (s.length() > INT_MAX) { + bool overflowed = false; + + if (result < -1) + overflowed = true; + + for (unsigned i = 0; i <= m_numSubpatterns; i++) { + if ((offsetVector[i*2] < -1) || ((offsetVector[i*2] >= 0) && (offsetVector[i*2+1] < -1))) { + overflowed = true; + offsetVector[i*2] = -1; + offsetVector[i*2+1] = -1; + } } - ASSERT(offsetVector); - // Initialize offsetVector with the return value (index 0) and the - // first subpattern start indicies (even index values) set to -1. - // No need to init the subpattern end indicies. - for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++) - offsetVector[j] = -1; + if (overflowed) + result = -1; + } + + ASSERT(result >= -1); + +#if REGEXP_FUNC_TEST_DATA_GEN + RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result); +#endif + +#if ENABLE(REGEXP_TRACING) + if (result != -1) + m_rtMatchFoundCount++; +#endif + + return result; +} + +void RegExp::compileMatchOnly(VM* vm, Yarr::YarrCharSize charSize) +{ + Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError); + if (m_constructionError) { + RELEASE_ASSERT_NOT_REACHED(); +#if COMPILER_QUIRK(CONSIDERS_UNREACHABLE_CODE) + m_state = ParseError; + return; +#endif + } + ASSERT(m_numSubpatterns == pattern.m_numSubpatterns); + + if (!hasCode()) { + ASSERT(m_state == NotCompiled); + vm->regExpCache()->addToStrongCache(this); + m_state = ByteCode; + } - int result; #if ENABLE(YARR_JIT) - if (m_state == JITCode) { - result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector); -#if ENABLE(YARR_JIT_DEBUG) - matchCompareWithInterpreter(s, startOffset, offsetVector, result); + if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && vm->canUseRegExpJIT()) { + Yarr::jitCompile(pattern, charSize, vm, m_regExpJITCode, Yarr::MatchOnly); + if (!m_regExpJITCode.isFallBack()) { + m_state = JITCode; + return; + } + } +#else + UNUSED_PARAM(charSize); #endif - } else + + m_state = ByteCode; + m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator); +} + +void RegExp::compileIfNecessaryMatchOnly(VM& vm, Yarr::YarrCharSize charSize) +{ + if (hasCode()) { +#if ENABLE(YARR_JIT) + if (m_state != JITCode) + return; + if ((charSize == Yarr::Char8) && (m_regExpJITCode.has8BitCodeMatchOnly())) + return; + if ((charSize == Yarr::Char16) && (m_regExpJITCode.has16BitCodeMatchOnly())) + return; +#else + return; #endif - result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector); - ASSERT(result >= -1); + } + + compileMatchOnly(&vm, charSize); +} +MatchResult RegExp::match(VM& vm, const String& s, unsigned startOffset) +{ #if ENABLE(REGEXP_TRACING) - if (result != -1) - m_rtMatchFoundCount++; + m_rtMatchOnlyCallCount++; + m_rtMatchOnlyTotalSubjectStringLen += (double)(s.length() - startOffset); #endif + ASSERT(m_state != ParseError); + compileIfNecessaryMatchOnly(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16); + +#if ENABLE(YARR_JIT) + if (m_state == JITCode) { + MatchResult result = s.is8Bit() ? + m_regExpJITCode.execute(s.characters8(), startOffset, s.length()) : + m_regExpJITCode.execute(s.characters16(), startOffset, s.length()); +#if ENABLE(REGEXP_TRACING) + if (!result) + m_rtMatchOnlyFoundCount++; +#endif return result; } +#endif - return -1; + int offsetVectorSize = (m_numSubpatterns + 1) * 2; + int* offsetVector; + Vector nonReturnedOvector; + nonReturnedOvector.resize(offsetVectorSize); + offsetVector = nonReturnedOvector.data(); + int r = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast(offsetVector)); +#if REGEXP_FUNC_TEST_DATA_GEN + RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result); +#endif + + if (r >= 0) { +#if ENABLE(REGEXP_TRACING) + m_rtMatchOnlyFoundCount++; +#endif + return MatchResult(r, reinterpret_cast(offsetVector)[1]); + } + + return MatchResult::failed(); } void RegExp::invalidateCode() { - if (!m_representation || m_state == Compiling) + if (!hasCode()) return; m_state = NotCompiled; - m_representation.clear(); +#if ENABLE(YARR_JIT) + m_regExpJITCode.clear(); +#endif + m_regExpBytecode = nullptr; } #if ENABLE(YARR_JIT_DEBUG) -void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* offsetVector, int jitResult) +void RegExp::matchCompareWithInterpreter(const String& s, int startOffset, int* offsetVector, int jitResult) { int offsetVectorSize = (m_numSubpatterns + 1) * 2; Vector interpreterOvector; @@ -222,7 +509,7 @@ void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++) interpreterOffsetVector[j] = -1; - interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector); + interpreterResult = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, interpreterOffsetVector); if (jitResult != interpreterResult) differences++; @@ -233,24 +520,24 @@ void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* differences++; if (differences) { - fprintf(stderr, "RegExp Discrepency for /%s/\n string input ", pattern().utf8().data()); + dataLogF("RegExp Discrepency for /%s/\n string input ", pattern().utf8().data()); unsigned segmentLen = s.length() - static_cast(startOffset); - fprintf(stderr, (segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset); + dataLogF((segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset); if (jitResult != interpreterResult) { - fprintf(stderr, " JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult); + dataLogF(" JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult); differences--; } else { - fprintf(stderr, " Correct result = %d\n", jitResult); + dataLogF(" Correct result = %d\n", jitResult); } if (differences) { for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) { if (offsetVector[j] != interpreterOffsetVector[j]) - fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]); + dataLogF(" JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]); if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1])) - fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]); + dataLogF(" JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]); } } } @@ -271,20 +558,36 @@ void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern); #if ENABLE(YARR_JIT) - Yarr::YarrCodeBlock& codeBlock = m_representation->m_regExpJITCode; + Yarr::YarrCodeBlock& codeBlock = m_regExpJITCode; const size_t jitAddrSize = 20; - char jitAddr[jitAddrSize]; - if (m_state == JITCode) - snprintf(jitAddr, jitAddrSize, "fallback"); - else - snprintf(jitAddr, jitAddrSize, "0x%014lx", reinterpret_cast(codeBlock.getAddr())); + char jit8BitMatchOnlyAddr[jitAddrSize]; + char jit16BitMatchOnlyAddr[jitAddrSize]; + char jit8BitMatchAddr[jitAddrSize]; + char jit16BitMatchAddr[jitAddrSize]; + if (m_state == ByteCode) { + snprintf(jit8BitMatchOnlyAddr, jitAddrSize, "fallback "); + snprintf(jit16BitMatchOnlyAddr, jitAddrSize, "---- "); + snprintf(jit8BitMatchAddr, jitAddrSize, "fallback "); + snprintf(jit16BitMatchAddr, jitAddrSize, "---- "); + } else { + snprintf(jit8BitMatchOnlyAddr, jitAddrSize, "0x%014lx", reinterpret_cast(codeBlock.get8BitMatchOnlyAddr())); + snprintf(jit16BitMatchOnlyAddr, jitAddrSize, "0x%014lx", reinterpret_cast(codeBlock.get16BitMatchOnlyAddr())); + snprintf(jit8BitMatchAddr, jitAddrSize, "0x%014lx", reinterpret_cast(codeBlock.get8BitMatchAddr())); + snprintf(jit16BitMatchAddr, jitAddrSize, "0x%014lx", reinterpret_cast(codeBlock.get16BitMatchAddr())); + } #else - const char* jitAddr = "JIT Off"; + const char* jit8BitMatchOnlyAddr = "JIT Off"; + const char* jit16BitMatchOnlyAddr = ""; + const char* jit8BitMatchAddr = "JIT Off"; + const char* jit16BitMatchAddr = ""; #endif + unsigned averageMatchOnlyStringLen = (unsigned)(m_rtMatchOnlyTotalSubjectStringLen / m_rtMatchOnlyCallCount); + unsigned averageMatchStringLen = (unsigned)(m_rtMatchTotalSubjectStringLen / m_rtMatchCallCount); - printf("%-40.40s %16.16s %10d %10d\n", formattedPattern, jitAddr, m_rtMatchCallCount, m_rtMatchFoundCount); + printf("%-40.40s %16.16s %16.16s %10d %10d %10u\n", formattedPattern, jit8BitMatchOnlyAddr, jit16BitMatchOnlyAddr, m_rtMatchOnlyCallCount, m_rtMatchOnlyFoundCount, averageMatchOnlyStringLen); + printf(" %16.16s %16.16s %10d %10d %10u\n", jit8BitMatchAddr, jit16BitMatchAddr, m_rtMatchCallCount, m_rtMatchFoundCount, averageMatchStringLen); } #endif - + } // namespace JSC