#include <wtf/Assertions.h>
#include <wtf/OwnArrayPtr.h>
+
+#define REGEXP_FUNC_TEST_DATA_GEN 0
+
namespace JSC {
-const ClassInfo RegExp::s_info = { "RegExp", 0, 0, 0 };
+const ClassInfo RegExp::s_info = { "RegExp", 0, 0, 0, CREATE_METHOD_TABLE(RegExp) };
RegExpFlags regExpFlags(const UString& string)
{
RegExpFlags flags = NoFlags;
for (unsigned i = 0; i < string.length(); ++i) {
- switch (string.characters()[i]) {
+ switch (string[i]) {
case 'g':
if (flags & FlagGlobal)
return InvalidFlags;
return flags;
}
-
-struct RegExpRepresentation {
-#if ENABLE(YARR_JIT)
- Yarr::YarrCodeBlock m_regExpJITCode;
-#endif
- OwnPtr<Yarr::BytecodePattern> m_regExpBytecode;
+
+#if REGEXP_FUNC_TEST_DATA_GEN
+class RegExpFunctionalTestCollector {
+ // This class is not thread safe.
+protected:
+ static const char* const s_fileName;
+
+public:
+ static RegExpFunctionalTestCollector* get();
+
+ ~RegExpFunctionalTestCollector();
+
+ void outputOneTest(RegExp*, UString, int, int*, int);
+ void clearRegExp(RegExp* regExp)
+ {
+ if (regExp == m_lastRegExp)
+ m_lastRegExp = 0;
+ }
+
+private:
+ RegExpFunctionalTestCollector();
+
+ void outputEscapedUString(const UString&, bool escapeSlash = false);
+
+ static RegExpFunctionalTestCollector* s_instance;
+ FILE* m_file;
+ RegExp* m_lastRegExp;
};
-RegExp::RegExp(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags)
- : JSCell(*globalData, globalData->regExpStructure.get())
+const char* const RegExpFunctionalTestCollector::s_fileName = "/tmp/RegExpTestsData";
+RegExpFunctionalTestCollector* RegExpFunctionalTestCollector::s_instance = 0;
+
+RegExpFunctionalTestCollector* RegExpFunctionalTestCollector::get()
+{
+ if (!s_instance)
+ s_instance = new RegExpFunctionalTestCollector();
+
+ return s_instance;
+}
+
+void RegExpFunctionalTestCollector::outputOneTest(RegExp* regExp, UString s, int startOffset, int* ovector, int result)
+{
+ if ((!m_lastRegExp) || (m_lastRegExp != regExp)) {
+ m_lastRegExp = regExp;
+ fputc('/', m_file);
+ outputEscapedUString(regExp->pattern(), true);
+ fputc('/', m_file);
+ if (regExp->global())
+ fputc('g', m_file);
+ if (regExp->ignoreCase())
+ fputc('i', m_file);
+ if (regExp->multiline())
+ fputc('m', m_file);
+ fprintf(m_file, "\n");
+ }
+
+ fprintf(m_file, " \"");
+ outputEscapedUString(s);
+ fprintf(m_file, "\", %d, %d, (", startOffset, result);
+ for (unsigned i = 0; i <= regExp->numSubpatterns(); i++) {
+ int subpatternBegin = ovector[i * 2];
+ int subpatternEnd = ovector[i * 2 + 1];
+ if (subpatternBegin == -1)
+ subpatternEnd = -1;
+ fprintf(m_file, "%d, %d", subpatternBegin, subpatternEnd);
+ if (i < regExp->numSubpatterns())
+ fputs(", ", m_file);
+ }
+
+ fprintf(m_file, ")\n");
+ fflush(m_file);
+}
+
+RegExpFunctionalTestCollector::RegExpFunctionalTestCollector()
+{
+ m_file = fopen(s_fileName, "r+");
+ if (!m_file)
+ m_file = fopen(s_fileName, "w+");
+
+ fseek(m_file, 0L, SEEK_END);
+}
+
+RegExpFunctionalTestCollector::~RegExpFunctionalTestCollector()
+{
+ fclose(m_file);
+ s_instance = 0;
+}
+
+void RegExpFunctionalTestCollector::outputEscapedUString(const UString& s, bool escapeSlash)
+{
+ int len = s.length();
+
+ for (int i = 0; i < len; ++i) {
+ UChar c = s[i];
+
+ switch (c) {
+ case '\0':
+ fputs("\\0", m_file);
+ break;
+ case '\a':
+ fputs("\\a", m_file);
+ break;
+ case '\b':
+ fputs("\\b", m_file);
+ break;
+ case '\f':
+ fputs("\\f", m_file);
+ break;
+ case '\n':
+ fputs("\\n", m_file);
+ break;
+ case '\r':
+ fputs("\\r", m_file);
+ break;
+ case '\t':
+ fputs("\\t", m_file);
+ break;
+ case '\v':
+ fputs("\\v", m_file);
+ break;
+ case '/':
+ if (escapeSlash)
+ fputs("\\/", m_file);
+ else
+ fputs("/", m_file);
+ break;
+ case '\"':
+ fputs("\\\"", m_file);
+ break;
+ case '\\':
+ fputs("\\\\", m_file);
+ break;
+ case '\?':
+ fputs("\?", m_file);
+ break;
+ default:
+ if (c > 0x7f)
+ fprintf(m_file, "\\u%04x", c);
+ else
+ fputc(c, m_file);
+ break;
+ }
+ }
+}
+#endif
+
+RegExp::RegExp(JSGlobalData& globalData, const UString& patternString, RegExpFlags flags)
+ : JSCell(globalData, globalData.regExpStructure.get())
, m_state(NotCompiled)
, m_patternString(patternString)
, m_flags(flags)
, m_rtMatchFoundCount(0)
#endif
{
+}
+
+void RegExp::finishCreation(JSGlobalData& globalData)
+{
+ Base::finishCreation(globalData);
Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
if (m_constructionError)
m_state = ParseError;
m_numSubpatterns = pattern.m_numSubpatterns;
}
-RegExp::~RegExp()
+void RegExp::destroy(JSCell* cell)
{
+ RegExp* thisObject = jsCast<RegExp*>(cell);
+#if REGEXP_FUNC_TEST_DATA_GEN
+ RegExpFunctionalTestCollector::get()->clearRegExp(this);
+#endif
+ thisObject->RegExp::~RegExp();
}
-RegExp* RegExp::create(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags)
+RegExp* RegExp::createWithoutCaching(JSGlobalData& globalData, const UString& patternString, RegExpFlags flags)
{
- return globalData->regExpCache()->lookupOrCreate(patternString, flags);
+ RegExp* regExp = new (NotNull, allocateCell<RegExp>(globalData.heap)) RegExp(globalData, patternString, flags);
+ regExp->finishCreation(globalData);
+ return regExp;
}
-void RegExp::compile(JSGlobalData* globalData)
+RegExp* RegExp::create(JSGlobalData& globalData, const UString& patternString, RegExpFlags flags)
+{
+ return globalData.regExpCache()->lookupOrCreate(patternString, flags);
+}
+
+void RegExp::compile(JSGlobalData* globalData, Yarr::YarrCharSize charSize)
{
- ASSERT(m_state == NotCompiled);
- m_representation = adoptPtr(new RegExpRepresentation);
- m_state = Compiling;
Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
if (m_constructionError) {
ASSERT_NOT_REACHED();
m_state = ParseError;
return;
}
-
- globalData->regExpCache()->addToStrongCache(this);
-
ASSERT(m_numSubpatterns == pattern.m_numSubpatterns);
+ if (!hasCode()) {
+ ASSERT(m_state == NotCompiled);
+ globalData->regExpCache()->addToStrongCache(this);
+ m_state = ByteCode;
+ }
+
#if ENABLE(YARR_JIT)
- if (!pattern.m_containsBackreferences && globalData->canUseJIT()) {
- Yarr::jitCompile(pattern, globalData, m_representation->m_regExpJITCode);
+ if (!pattern.m_containsBackreferences && globalData->canUseRegExpJIT()) {
+ Yarr::jitCompile(pattern, charSize, globalData, m_regExpJITCode);
#if ENABLE(YARR_JIT_DEBUG)
- if (!m_representation->m_regExpJITCode.isFallBack())
+ if (!m_regExpJITCode.isFallBack())
m_state = JITCode;
else
m_state = ByteCode;
#else
- if (!m_representation->m_regExpJITCode.isFallBack()) {
+ if (!m_regExpJITCode.isFallBack()) {
m_state = JITCode;
return;
}
#endif
}
+#else
+ UNUSED_PARAM(charSize);
#endif
- m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator);
-
- m_state = ByteCode;
+ m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator);
}
-int RegExp::match(JSGlobalData& globalData, const UString& s, int startOffset, Vector<int, 32>* ovector)
+void RegExp::compileIfNecessary(JSGlobalData& globalData, Yarr::YarrCharSize charSize)
{
- if (startOffset < 0)
- startOffset = 0;
+ if (hasCode()) {
+#if ENABLE(YARR_JIT)
+ if (m_state != JITCode)
+ return;
+ if ((charSize == Yarr::Char8) && (m_regExpJITCode.has8BitCode()))
+ return;
+ if ((charSize == Yarr::Char16) && (m_regExpJITCode.has16BitCode()))
+ return;
+#else
+ return;
+#endif
+ }
+ compile(&globalData, charSize);
+}
+
+int RegExp::match(JSGlobalData& globalData, const UString& s, unsigned startOffset, Vector<int, 32>& ovector)
+{
#if ENABLE(REGEXP_TRACING)
m_rtMatchCallCount++;
#endif
- if (static_cast<unsigned>(startOffset) > s.length() || s.isNull())
- return -1;
+ ASSERT(m_state != ParseError);
+ compileIfNecessary(globalData, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
- if (m_state != ParseError) {
- compileIfNecessary(globalData);
+ int offsetVectorSize = (m_numSubpatterns + 1) * 2;
+ ovector.resize(offsetVectorSize);
+ int* offsetVector = ovector.data();
- int offsetVectorSize = (m_numSubpatterns + 1) * 2;
- int* offsetVector;
- Vector<int, 32> nonReturnedOvector;
- if (ovector) {
- ovector->resize(offsetVectorSize);
- offsetVector = ovector->data();
- } else {
- nonReturnedOvector.resize(offsetVectorSize);
- offsetVector = nonReturnedOvector.data();
+ int result;
+#if ENABLE(YARR_JIT)
+ if (m_state == JITCode) {
+ if (s.is8Bit())
+ result = m_regExpJITCode.execute(s.characters8(), startOffset, s.length(), offsetVector).start;
+ else
+ result = m_regExpJITCode.execute(s.characters16(), startOffset, s.length(), offsetVector).start;
+#if ENABLE(YARR_JIT_DEBUG)
+ matchCompareWithInterpreter(s, startOffset, offsetVector, result);
+#endif
+ } else
+#endif
+ result = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
+
+ // FIXME: The YARR engine should handle unsigned or size_t length matches.
+ // The YARR Interpreter is "unsigned" clean, while the YARR JIT hasn't been addressed.
+ // The offset vector handling needs to change as well.
+ // Right now we convert a match where the offsets overflowed into match failure.
+ // There are two places in WebCore that call the interpreter directly that need to
+ // have their offsets changed to int as well. They are platform/text/RegularExpression.cpp
+ // and inspector/ContentSearchUtils.cpp.
+ if (s.length() > INT_MAX) {
+ bool overflowed = false;
+
+ if (result < -1)
+ overflowed = true;
+
+ for (unsigned i = 0; i <= m_numSubpatterns; i++) {
+ if ((offsetVector[i*2] < -1) || ((offsetVector[i*2] >= 0) && (offsetVector[i*2+1] < -1))) {
+ overflowed = true;
+ offsetVector[i*2] = -1;
+ offsetVector[i*2+1] = -1;
+ }
}
- ASSERT(offsetVector);
- // Initialize offsetVector with the return value (index 0) and the
- // first subpattern start indicies (even index values) set to -1.
- // No need to init the subpattern end indicies.
- for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
- offsetVector[j] = -1;
+ if (overflowed)
+ result = -1;
+ }
+
+ ASSERT(result >= -1);
+
+#if REGEXP_FUNC_TEST_DATA_GEN
+ RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result);
+#endif
+
+#if ENABLE(REGEXP_TRACING)
+ if (result != -1)
+ m_rtMatchFoundCount++;
+#endif
+
+ return result;
+}
+
+void RegExp::compileMatchOnly(JSGlobalData* globalData, Yarr::YarrCharSize charSize)
+{
+ Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
+ if (m_constructionError) {
+ ASSERT_NOT_REACHED();
+ m_state = ParseError;
+ return;
+ }
+ ASSERT(m_numSubpatterns == pattern.m_numSubpatterns);
+
+ if (!hasCode()) {
+ ASSERT(m_state == NotCompiled);
+ globalData->regExpCache()->addToStrongCache(this);
+ m_state = ByteCode;
+ }
- int result;
#if ENABLE(YARR_JIT)
- if (m_state == JITCode) {
- result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector);
+ if (!pattern.m_containsBackreferences && globalData->canUseRegExpJIT()) {
+ Yarr::jitCompile(pattern, charSize, globalData, m_regExpJITCode, Yarr::MatchOnly);
#if ENABLE(YARR_JIT_DEBUG)
- matchCompareWithInterpreter(s, startOffset, offsetVector, result);
+ if (!m_regExpJITCode.isFallBack())
+ m_state = JITCode;
+ else
+ m_state = ByteCode;
+#else
+ if (!m_regExpJITCode.isFallBack()) {
+ m_state = JITCode;
+ return;
+ }
#endif
- } else
+ }
+#else
+ UNUSED_PARAM(charSize);
#endif
- result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector);
- ASSERT(result >= -1);
+ m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator);
+}
+
+void RegExp::compileIfNecessaryMatchOnly(JSGlobalData& globalData, Yarr::YarrCharSize charSize)
+{
+ if (hasCode()) {
+#if ENABLE(YARR_JIT)
+ if (m_state != JITCode)
+ return;
+ if ((charSize == Yarr::Char8) && (m_regExpJITCode.has8BitCodeMatchOnly()))
+ return;
+ if ((charSize == Yarr::Char16) && (m_regExpJITCode.has16BitCodeMatchOnly()))
+ return;
+#else
+ return;
+#endif
+ }
+
+ compileMatchOnly(&globalData, charSize);
+}
+
+MatchResult RegExp::match(JSGlobalData& globalData, const UString& s, unsigned startOffset)
+{
#if ENABLE(REGEXP_TRACING)
- if (result != -1)
- m_rtMatchFoundCount++;
+ m_rtMatchCallCount++;
#endif
+ ASSERT(m_state != ParseError);
+ compileIfNecessaryMatchOnly(globalData, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
+
+#if ENABLE(YARR_JIT)
+ if (m_state == JITCode) {
+ MatchResult result = s.is8Bit() ?
+ m_regExpJITCode.execute(s.characters8(), startOffset, s.length()) :
+ m_regExpJITCode.execute(s.characters16(), startOffset, s.length());
+#if ENABLE(REGEXP_TRACING)
+ if (!result)
+ m_rtMatchFoundCount++;
+#endif
return result;
}
+#endif
+
+ int offsetVectorSize = (m_numSubpatterns + 1) * 2;
+ int* offsetVector;
+ Vector<int, 32> nonReturnedOvector;
+ nonReturnedOvector.resize(offsetVectorSize);
+ offsetVector = nonReturnedOvector.data();
+ int r = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
+#if REGEXP_FUNC_TEST_DATA_GEN
+ RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result);
+#endif
+
+ if (r >= 0) {
+#if ENABLE(REGEXP_TRACING)
+ m_rtMatchFoundCount++;
+#endif
+ return MatchResult(r, reinterpret_cast<unsigned*>(offsetVector)[1]);
+ }
- return -1;
+ return MatchResult::failed();
}
void RegExp::invalidateCode()
{
- if (!m_representation || m_state == Compiling)
+ if (!hasCode())
return;
m_state = NotCompiled;
- m_representation.clear();
+#if ENABLE(YARR_JIT)
+ m_regExpJITCode.clear();
+#endif
+ m_regExpBytecode.clear();
}
#if ENABLE(YARR_JIT_DEBUG)
for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
interpreterOffsetVector[j] = -1;
- interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector);
+ interpreterResult = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, interpreterOffsetVector);
if (jitResult != interpreterResult)
differences++;
differences++;
if (differences) {
- fprintf(stderr, "RegExp Discrepency for /%s/\n string input ", pattern().utf8().data());
+ dataLog("RegExp Discrepency for /%s/\n string input ", pattern().utf8().data());
unsigned segmentLen = s.length() - static_cast<unsigned>(startOffset);
- fprintf(stderr, (segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset);
+ dataLog((segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset);
if (jitResult != interpreterResult) {
- fprintf(stderr, " JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult);
+ dataLog(" JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult);
differences--;
} else {
- fprintf(stderr, " Correct result = %d\n", jitResult);
+ dataLog(" Correct result = %d\n", jitResult);
}
if (differences) {
for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) {
if (offsetVector[j] != interpreterOffsetVector[j])
- fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]);
+ dataLog(" JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]);
if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1]))
- fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]);
+ dataLog(" JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]);
}
}
}
snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern);
#if ENABLE(YARR_JIT)
- Yarr::YarrCodeBlock& codeBlock = m_representation->m_regExpJITCode;
+ Yarr::YarrCodeBlock& codeBlock = m_regExpJITCode;
const size_t jitAddrSize = 20;
char jitAddr[jitAddrSize];
printf("%-40.40s %16.16s %10d %10d\n", formattedPattern, jitAddr, m_rtMatchCallCount, m_rtMatchFoundCount);
}
#endif
-
+
} // namespace JSC