* Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
* Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
* Copyright (C) 2009 Torch Mobile, Inc.
+ * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
#include "config.h"
#include "RegExp.h"
+
#include "Lexer.h"
+#include "RegExpCache.h"
+#include "yarr/Yarr.h"
+#include "yarr/YarrJIT.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wtf/Assertions.h>
#include <wtf/OwnArrayPtr.h>
-
-#if ENABLE(YARR)
-
-#include "yarr/RegexCompiler.h"
-#if ENABLE(YARR_JIT)
-#include "yarr/RegexJIT.h"
-#else
-#include "yarr/RegexInterpreter.h"
-#endif
-
-#else
-
-#if ENABLE(WREC)
-#include "JIT.h"
-#include "WRECGenerator.h"
-#endif
-#include <pcre/pcre.h>
-
-#endif
-
namespace JSC {
-#if ENABLE(WREC)
-using namespace WREC;
-#endif
+const ClassInfo RegExp::s_info = { "RegExp", 0, 0, 0 };
-inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern)
- : m_pattern(pattern)
- , m_flagBits(0)
- , m_constructionError(0)
- , m_numSubpatterns(0)
+RegExpFlags regExpFlags(const UString& string)
{
- compile(globalData);
-}
+ RegExpFlags flags = NoFlags;
+
+ for (unsigned i = 0; i < string.length(); ++i) {
+ switch (string.characters()[i]) {
+ case 'g':
+ if (flags & FlagGlobal)
+ return InvalidFlags;
+ flags = static_cast<RegExpFlags>(flags | FlagGlobal);
+ break;
+
+ case 'i':
+ if (flags & FlagIgnoreCase)
+ return InvalidFlags;
+ flags = static_cast<RegExpFlags>(flags | FlagIgnoreCase);
+ break;
+
+ case 'm':
+ if (flags & FlagMultiline)
+ return InvalidFlags;
+ flags = static_cast<RegExpFlags>(flags | FlagMultiline);
+ break;
+
+ default:
+ return InvalidFlags;
+ }
+ }
-inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern, const UString& flags)
- : m_pattern(pattern)
- , m_flagBits(0)
+ return flags;
+}
+
+struct RegExpRepresentation {
+#if ENABLE(YARR_JIT)
+ Yarr::YarrCodeBlock m_regExpJITCode;
+#endif
+ OwnPtr<Yarr::BytecodePattern> m_regExpBytecode;
+};
+
+RegExp::RegExp(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags)
+ : JSCell(*globalData, globalData->regExpStructure.get())
+ , m_state(NotCompiled)
+ , m_patternString(patternString)
+ , m_flags(flags)
, m_constructionError(0)
, m_numSubpatterns(0)
+#if ENABLE(REGEXP_TRACING)
+ , m_rtMatchCallCount(0)
+ , m_rtMatchFoundCount(0)
+#endif
{
- // NOTE: The global flag is handled on a case-by-case basis by functions like
- // String::match and RegExpObject::match.
- if (flags.find('g') != -1)
- m_flagBits |= Global;
- if (flags.find('i') != -1)
- m_flagBits |= IgnoreCase;
- if (flags.find('m') != -1)
- m_flagBits |= Multiline;
-
- compile(globalData);
+ Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
+ if (m_constructionError)
+ m_state = ParseError;
+ else
+ m_numSubpatterns = pattern.m_numSubpatterns;
}
-#if !ENABLE(YARR)
RegExp::~RegExp()
{
- jsRegExpFree(m_regExp);
}
-#endif
-PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern)
+RegExp* RegExp::create(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags)
{
- return adoptRef(new RegExp(globalData, pattern));
+ return globalData->regExpCache()->lookupOrCreate(patternString, flags);
}
-PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern, const UString& flags)
+void RegExp::compile(JSGlobalData* globalData)
{
- return adoptRef(new RegExp(globalData, pattern, flags));
-}
+ ASSERT(m_state == NotCompiled);
+ m_representation = adoptPtr(new RegExpRepresentation);
+ m_state = Compiling;
+ Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
+ if (m_constructionError) {
+ ASSERT_NOT_REACHED();
+ m_state = ParseError;
+ return;
+ }
-#if ENABLE(YARR)
+ globalData->regExpCache()->addToStrongCache(this);
+
+ ASSERT(m_numSubpatterns == pattern.m_numSubpatterns);
-void RegExp::compile(JSGlobalData* globalData)
-{
#if ENABLE(YARR_JIT)
- Yarr::jitCompileRegex(globalData, m_regExpJITCode, m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline());
+ if (!pattern.m_containsBackreferences && globalData->canUseJIT()) {
+ Yarr::jitCompile(pattern, globalData, m_representation->m_regExpJITCode);
+#if ENABLE(YARR_JIT_DEBUG)
+ if (!m_representation->m_regExpJITCode.isFallBack())
+ m_state = JITCode;
+ else
+ m_state = ByteCode;
#else
- UNUSED_PARAM(globalData);
- m_regExpBytecode.set(Yarr::byteCompileRegex(m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline()));
+ if (!m_representation->m_regExpJITCode.isFallBack()) {
+ m_state = JITCode;
+ return;
+ }
#endif
+ }
+#endif
+
+ m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator);
+
+ m_state = ByteCode;
}
-int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
+int RegExp::match(JSGlobalData& globalData, const UString& s, int startOffset, Vector<int, 32>* ovector)
{
if (startOffset < 0)
startOffset = 0;
- if (ovector)
- ovector->clear();
- if (startOffset > s.size() || s.isNull())
+#if ENABLE(REGEXP_TRACING)
+ m_rtMatchCallCount++;
+#endif
+
+ if (static_cast<unsigned>(startOffset) > s.length() || s.isNull())
return -1;
-#if ENABLE(YARR_JIT)
- if (!!m_regExpJITCode) {
-#else
- if (m_regExpBytecode) {
-#endif
- int offsetVectorSize = (m_numSubpatterns + 1) * 3; // FIXME: should be 2 - but adding temporary fallback to pcre.
+ if (m_state != ParseError) {
+ compileIfNecessary(globalData);
+
+ int offsetVectorSize = (m_numSubpatterns + 1) * 2;
int* offsetVector;
Vector<int, 32> nonReturnedOvector;
if (ovector) {
}
ASSERT(offsetVector);
- for (int j = 0; j < offsetVectorSize; ++j)
+ // Initialize offsetVector with the return value (index 0) and the
+ // first subpattern start indicies (even index values) set to -1.
+ // No need to init the subpattern end indicies.
+ for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
offsetVector[j] = -1;
-
+ int result;
#if ENABLE(YARR_JIT)
- int result = Yarr::executeRegex(m_regExpJITCode, s.data(), startOffset, s.size(), offsetVector, offsetVectorSize);
-#else
- int result = Yarr::interpretRegex(m_regExpBytecode.get(), s.data(), startOffset, s.size(), offsetVector);
+ if (m_state == JITCode) {
+ result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector);
+#if ENABLE(YARR_JIT_DEBUG)
+ matchCompareWithInterpreter(s, startOffset, offsetVector, result);
+#endif
+ } else
#endif
+ result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector);
+ ASSERT(result >= -1);
- if (result < 0) {
-#ifndef NDEBUG
- // TODO: define up a symbol, rather than magic -1
- if (result != -1)
- fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
+#if ENABLE(REGEXP_TRACING)
+ if (result != -1)
+ m_rtMatchFoundCount++;
#endif
- if (ovector)
- ovector->clear();
- }
+
return result;
}
return -1;
}
-#else
-
-void RegExp::compile(JSGlobalData* globalData)
+void RegExp::invalidateCode()
{
- m_regExp = 0;
-#if ENABLE(WREC)
- m_wrecFunction = Generator::compileRegExp(globalData, m_pattern, &m_numSubpatterns, &m_constructionError, m_executablePool, ignoreCase(), multiline());
- if (m_wrecFunction || m_constructionError)
+ if (!m_representation || m_state == Compiling)
return;
- // Fall through to non-WREC case.
-#else
- UNUSED_PARAM(globalData);
-#endif
-
- JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
- JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine;
- m_regExp = jsRegExpCompile(reinterpret_cast<const UChar*>(m_pattern.data()), m_pattern.size(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError);
+ m_state = NotCompiled;
+ m_representation.clear();
}
-int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
+#if ENABLE(YARR_JIT_DEBUG)
+void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* offsetVector, int jitResult)
{
- if (startOffset < 0)
- startOffset = 0;
- if (ovector)
- ovector->clear();
-
- if (startOffset > s.size() || s.isNull())
- return -1;
-
-#if ENABLE(WREC)
- if (m_wrecFunction) {
- int offsetVectorSize = (m_numSubpatterns + 1) * 2;
- int* offsetVector;
- Vector<int, 32> nonReturnedOvector;
- if (ovector) {
- ovector->resize(offsetVectorSize);
- offsetVector = ovector->data();
+ int offsetVectorSize = (m_numSubpatterns + 1) * 2;
+ Vector<int, 32> interpreterOvector;
+ interpreterOvector.resize(offsetVectorSize);
+ int* interpreterOffsetVector = interpreterOvector.data();
+ int interpreterResult = 0;
+ int differences = 0;
+
+ // Initialize interpreterOffsetVector with the return value (index 0) and the
+ // first subpattern start indicies (even index values) set to -1.
+ // No need to init the subpattern end indicies.
+ for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
+ interpreterOffsetVector[j] = -1;
+
+ interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector);
+
+ if (jitResult != interpreterResult)
+ differences++;
+
+ for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++)
+ if ((offsetVector[j] != interpreterOffsetVector[j])
+ || ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1])))
+ differences++;
+
+ if (differences) {
+ fprintf(stderr, "RegExp Discrepency for /%s/\n string input ", pattern().utf8().data());
+ unsigned segmentLen = s.length() - static_cast<unsigned>(startOffset);
+
+ fprintf(stderr, (segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset);
+
+ if (jitResult != interpreterResult) {
+ fprintf(stderr, " JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult);
+ differences--;
} else {
- nonReturnedOvector.resize(offsetVectorSize);
- offsetVector = nonReturnedOvector.data();
+ fprintf(stderr, " Correct result = %d\n", jitResult);
}
- ASSERT(offsetVector);
- for (int j = 0; j < offsetVectorSize; ++j)
- offsetVector[j] = -1;
- int result = m_wrecFunction(s.data(), startOffset, s.size(), offsetVector);
-
- if (result < 0) {
-#ifndef NDEBUG
- // TODO: define up a symbol, rather than magic -1
- if (result != -1)
- fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
-#endif
- if (ovector)
- ovector->clear();
+ if (differences) {
+ for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) {
+ if (offsetVector[j] != interpreterOffsetVector[j])
+ fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]);
+ if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1]))
+ fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]);
+ }
}
- return result;
- } else
+ }
+}
#endif
- if (m_regExp) {
- // Set up the offset vector for the result.
- // First 2/3 used for result, the last third used by PCRE.
- int* offsetVector;
- int offsetVectorSize;
- int fixedSizeOffsetVector[3];
- if (!ovector) {
- offsetVectorSize = 3;
- offsetVector = fixedSizeOffsetVector;
- } else {
- offsetVectorSize = (m_numSubpatterns + 1) * 3;
- ovector->resize(offsetVectorSize);
- offsetVector = ovector->data();
- }
- int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const UChar*>(s.data()), s.size(), startOffset, offsetVector, offsetVectorSize);
-
- if (numMatches < 0) {
-#ifndef NDEBUG
- if (numMatches != JSRegExpErrorNoMatch)
- fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches);
-#endif
- if (ovector)
- ovector->clear();
- return -1;
- }
+#if ENABLE(REGEXP_TRACING)
+ void RegExp::printTraceData()
+ {
+ char formattedPattern[41];
+ char rawPattern[41];
- return offsetVector[0];
- }
+ strncpy(rawPattern, pattern().utf8().data(), 40);
+ rawPattern[40]= '\0';
- return -1;
-}
+ int pattLen = strlen(rawPattern);
+
+ snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern);
+#if ENABLE(YARR_JIT)
+ Yarr::YarrCodeBlock& codeBlock = m_representation->m_regExpJITCode;
+
+ const size_t jitAddrSize = 20;
+ char jitAddr[jitAddrSize];
+ if (m_state == JITCode)
+ snprintf(jitAddr, jitAddrSize, "fallback");
+ else
+ snprintf(jitAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.getAddr()));
+#else
+ const char* jitAddr = "JIT Off";
#endif
+ printf("%-40.40s %16.16s %10d %10d\n", formattedPattern, jitAddr, m_rtMatchCallCount, m_rtMatchFoundCount);
+ }
+#endif
+
} // namespace JSC