]> git.saurik.com Git - apple/javascriptcore.git/blame - runtime/RegExp.cpp
JavaScriptCore-903.5.tar.gz
[apple/javascriptcore.git] / runtime / RegExp.cpp
CommitLineData
9dae56ea
A
1/*
2 * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
3 * Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
ba379fdc 4 * Copyright (C) 2009 Torch Mobile, Inc.
14957cd0 5 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
9dae56ea
A
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 *
21 */
22
23#include "config.h"
24#include "RegExp.h"
14957cd0 25
9dae56ea 26#include "Lexer.h"
14957cd0
A
27#include "RegExpCache.h"
28#include "yarr/Yarr.h"
29#include "yarr/YarrJIT.h"
9dae56ea
A
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <wtf/Assertions.h>
34#include <wtf/OwnArrayPtr.h>
35
36namespace JSC {
37
14957cd0
A
38const ClassInfo RegExp::s_info = { "RegExp", 0, 0, 0 };
39
40RegExpFlags regExpFlags(const UString& string)
9dae56ea 41{
14957cd0
A
42 RegExpFlags flags = NoFlags;
43
44 for (unsigned i = 0; i < string.length(); ++i) {
45 switch (string.characters()[i]) {
46 case 'g':
47 if (flags & FlagGlobal)
48 return InvalidFlags;
49 flags = static_cast<RegExpFlags>(flags | FlagGlobal);
50 break;
51
52 case 'i':
53 if (flags & FlagIgnoreCase)
54 return InvalidFlags;
55 flags = static_cast<RegExpFlags>(flags | FlagIgnoreCase);
56 break;
57
58 case 'm':
59 if (flags & FlagMultiline)
60 return InvalidFlags;
61 flags = static_cast<RegExpFlags>(flags | FlagMultiline);
62 break;
63
64 default:
65 return InvalidFlags;
66 }
67 }
9dae56ea 68
14957cd0
A
69 return flags;
70}
71
72struct RegExpRepresentation {
73#if ENABLE(YARR_JIT)
74 Yarr::YarrCodeBlock m_regExpJITCode;
75#endif
76 OwnPtr<Yarr::BytecodePattern> m_regExpBytecode;
77};
78
79RegExp::RegExp(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags)
80 : JSCell(*globalData, globalData->regExpStructure.get())
81 , m_state(NotCompiled)
82 , m_patternString(patternString)
83 , m_flags(flags)
9dae56ea
A
84 , m_constructionError(0)
85 , m_numSubpatterns(0)
14957cd0
A
86#if ENABLE(REGEXP_TRACING)
87 , m_rtMatchCallCount(0)
88 , m_rtMatchFoundCount(0)
89#endif
9dae56ea 90{
14957cd0
A
91 Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
92 if (m_constructionError)
93 m_state = ParseError;
94 else
95 m_numSubpatterns = pattern.m_numSubpatterns;
ba379fdc
A
96}
97
ba379fdc
A
98RegExp::~RegExp()
99{
ba379fdc 100}
ba379fdc 101
14957cd0 102RegExp* RegExp::create(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags)
ba379fdc 103{
14957cd0 104 return globalData->regExpCache()->lookupOrCreate(patternString, flags);
9dae56ea
A
105}
106
14957cd0 107void RegExp::compile(JSGlobalData* globalData)
9dae56ea 108{
14957cd0
A
109 ASSERT(m_state == NotCompiled);
110 m_representation = adoptPtr(new RegExpRepresentation);
111 m_state = Compiling;
112 Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
113 if (m_constructionError) {
114 ASSERT_NOT_REACHED();
115 m_state = ParseError;
116 return;
117 }
9dae56ea 118
14957cd0
A
119 globalData->regExpCache()->addToStrongCache(this);
120
121 ASSERT(m_numSubpatterns == pattern.m_numSubpatterns);
ba379fdc 122
ba379fdc 123#if ENABLE(YARR_JIT)
14957cd0
A
124 if (!pattern.m_containsBackreferences && globalData->canUseJIT()) {
125 Yarr::jitCompile(pattern, globalData, m_representation->m_regExpJITCode);
126#if ENABLE(YARR_JIT_DEBUG)
127 if (!m_representation->m_regExpJITCode.isFallBack())
128 m_state = JITCode;
129 else
130 m_state = ByteCode;
ba379fdc 131#else
14957cd0
A
132 if (!m_representation->m_regExpJITCode.isFallBack()) {
133 m_state = JITCode;
134 return;
135 }
136#endif
137 }
ba379fdc 138#endif
14957cd0
A
139
140 m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator);
141
142 m_state = ByteCode;
9dae56ea
A
143}
144
14957cd0 145int RegExp::match(JSGlobalData& globalData, const UString& s, int startOffset, Vector<int, 32>* ovector)
ba379fdc
A
146{
147 if (startOffset < 0)
148 startOffset = 0;
ba379fdc 149
14957cd0
A
150#if ENABLE(REGEXP_TRACING)
151 m_rtMatchCallCount++;
152#endif
153
154 if (static_cast<unsigned>(startOffset) > s.length() || s.isNull())
ba379fdc
A
155 return -1;
156
14957cd0
A
157 if (m_state != ParseError) {
158 compileIfNecessary(globalData);
159
160 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
ba379fdc
A
161 int* offsetVector;
162 Vector<int, 32> nonReturnedOvector;
163 if (ovector) {
164 ovector->resize(offsetVectorSize);
165 offsetVector = ovector->data();
166 } else {
167 nonReturnedOvector.resize(offsetVectorSize);
168 offsetVector = nonReturnedOvector.data();
169 }
170
171 ASSERT(offsetVector);
14957cd0
A
172 // Initialize offsetVector with the return value (index 0) and the
173 // first subpattern start indicies (even index values) set to -1.
174 // No need to init the subpattern end indicies.
175 for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
ba379fdc
A
176 offsetVector[j] = -1;
177
14957cd0 178 int result;
ba379fdc 179#if ENABLE(YARR_JIT)
14957cd0
A
180 if (m_state == JITCode) {
181 result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector);
182#if ENABLE(YARR_JIT_DEBUG)
183 matchCompareWithInterpreter(s, startOffset, offsetVector, result);
ba379fdc 184#endif
14957cd0 185 } else
ba379fdc 186#endif
14957cd0
A
187 result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector);
188 ASSERT(result >= -1);
4e4e5a6f 189
14957cd0
A
190#if ENABLE(REGEXP_TRACING)
191 if (result != -1)
192 m_rtMatchFoundCount++;
193#endif
4e4e5a6f 194
ba379fdc
A
195 return result;
196 }
197
198 return -1;
199}
200
14957cd0 201void RegExp::invalidateCode()
ba379fdc 202{
14957cd0
A
203 if (!m_representation || m_state == Compiling)
204 return;
205 m_state = NotCompiled;
206 m_representation.clear();
ba379fdc
A
207}
208
14957cd0
A
209#if ENABLE(YARR_JIT_DEBUG)
210void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* offsetVector, int jitResult)
9dae56ea 211{
14957cd0
A
212 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
213 Vector<int, 32> interpreterOvector;
214 interpreterOvector.resize(offsetVectorSize);
215 int* interpreterOffsetVector = interpreterOvector.data();
216 int interpreterResult = 0;
217 int differences = 0;
218
219 // Initialize interpreterOffsetVector with the return value (index 0) and the
220 // first subpattern start indicies (even index values) set to -1.
221 // No need to init the subpattern end indicies.
222 for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
223 interpreterOffsetVector[j] = -1;
224
225 interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector);
226
227 if (jitResult != interpreterResult)
228 differences++;
229
230 for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++)
231 if ((offsetVector[j] != interpreterOffsetVector[j])
232 || ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1])))
233 differences++;
234
235 if (differences) {
236 fprintf(stderr, "RegExp Discrepency for /%s/\n string input ", pattern().utf8().data());
237 unsigned segmentLen = s.length() - static_cast<unsigned>(startOffset);
238
239 fprintf(stderr, (segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset);
240
241 if (jitResult != interpreterResult) {
242 fprintf(stderr, " JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult);
243 differences--;
9dae56ea 244 } else {
14957cd0 245 fprintf(stderr, " Correct result = %d\n", jitResult);
9dae56ea
A
246 }
247
14957cd0
A
248 if (differences) {
249 for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) {
250 if (offsetVector[j] != interpreterOffsetVector[j])
251 fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]);
252 if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1]))
253 fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]);
254 }
9dae56ea 255 }
9dae56ea 256 }
9dae56ea 257}
14957cd0
A
258#endif
259
260#if ENABLE(REGEXP_TRACING)
261 void RegExp::printTraceData()
262 {
263 char formattedPattern[41];
264 char rawPattern[41];
265
266 strncpy(rawPattern, pattern().utf8().data(), 40);
267 rawPattern[40]= '\0';
9dae56ea 268
14957cd0
A
269 int pattLen = strlen(rawPattern);
270
271 snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern);
272
273#if ENABLE(YARR_JIT)
274 Yarr::YarrCodeBlock& codeBlock = m_representation->m_regExpJITCode;
275
276 const size_t jitAddrSize = 20;
277 char jitAddr[jitAddrSize];
278 if (m_state == JITCode)
279 snprintf(jitAddr, jitAddrSize, "fallback");
280 else
281 snprintf(jitAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.getAddr()));
282#else
283 const char* jitAddr = "JIT Off";
ba379fdc
A
284#endif
285
14957cd0
A
286 printf("%-40.40s %16.16s %10d %10d\n", formattedPattern, jitAddr, m_rtMatchCallCount, m_rtMatchFoundCount);
287 }
288#endif
289
9dae56ea 290} // namespace JSC