]> git.saurik.com Git - apple/javascriptcore.git/blob - runtime/RegExp.cpp
9211a90ede39214b4f2ce4cd91d44d071696ac81
[apple/javascriptcore.git] / runtime / RegExp.cpp
1 /*
2 * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
3 * Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile, Inc.
5 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 *
21 */
22
23 #include "config.h"
24 #include "RegExp.h"
25
26 #include "Lexer.h"
27 #include "RegExpCache.h"
28 #include "yarr/Yarr.h"
29 #include "yarr/YarrJIT.h"
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <wtf/Assertions.h>
34 #include <wtf/OwnArrayPtr.h>
35
36 namespace JSC {
37
38 const ClassInfo RegExp::s_info = { "RegExp", 0, 0, 0 };
39
40 RegExpFlags regExpFlags(const UString& string)
41 {
42 RegExpFlags flags = NoFlags;
43
44 for (unsigned i = 0; i < string.length(); ++i) {
45 switch (string.characters()[i]) {
46 case 'g':
47 if (flags & FlagGlobal)
48 return InvalidFlags;
49 flags = static_cast<RegExpFlags>(flags | FlagGlobal);
50 break;
51
52 case 'i':
53 if (flags & FlagIgnoreCase)
54 return InvalidFlags;
55 flags = static_cast<RegExpFlags>(flags | FlagIgnoreCase);
56 break;
57
58 case 'm':
59 if (flags & FlagMultiline)
60 return InvalidFlags;
61 flags = static_cast<RegExpFlags>(flags | FlagMultiline);
62 break;
63
64 default:
65 return InvalidFlags;
66 }
67 }
68
69 return flags;
70 }
71
72 struct RegExpRepresentation {
73 #if ENABLE(YARR_JIT)
74 Yarr::YarrCodeBlock m_regExpJITCode;
75 #endif
76 OwnPtr<Yarr::BytecodePattern> m_regExpBytecode;
77 };
78
79 RegExp::RegExp(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags)
80 : JSCell(*globalData, globalData->regExpStructure.get())
81 , m_state(NotCompiled)
82 , m_patternString(patternString)
83 , m_flags(flags)
84 , m_constructionError(0)
85 , m_numSubpatterns(0)
86 #if ENABLE(REGEXP_TRACING)
87 , m_rtMatchCallCount(0)
88 , m_rtMatchFoundCount(0)
89 #endif
90 {
91 Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
92 if (m_constructionError)
93 m_state = ParseError;
94 else
95 m_numSubpatterns = pattern.m_numSubpatterns;
96 }
97
98 RegExp::~RegExp()
99 {
100 }
101
102 RegExp* RegExp::create(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags)
103 {
104 return globalData->regExpCache()->lookupOrCreate(patternString, flags);
105 }
106
107 void RegExp::compile(JSGlobalData* globalData)
108 {
109 ASSERT(m_state == NotCompiled);
110 m_representation = adoptPtr(new RegExpRepresentation);
111 m_state = Compiling;
112 Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
113 if (m_constructionError) {
114 ASSERT_NOT_REACHED();
115 m_state = ParseError;
116 return;
117 }
118
119 globalData->regExpCache()->addToStrongCache(this);
120
121 ASSERT(m_numSubpatterns == pattern.m_numSubpatterns);
122
123 #if ENABLE(YARR_JIT)
124 if (!pattern.m_containsBackreferences && globalData->canUseJIT()) {
125 Yarr::jitCompile(pattern, globalData, m_representation->m_regExpJITCode);
126 #if ENABLE(YARR_JIT_DEBUG)
127 if (!m_representation->m_regExpJITCode.isFallBack())
128 m_state = JITCode;
129 else
130 m_state = ByteCode;
131 #else
132 if (!m_representation->m_regExpJITCode.isFallBack()) {
133 m_state = JITCode;
134 return;
135 }
136 #endif
137 }
138 #endif
139
140 m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator);
141
142 m_state = ByteCode;
143 }
144
145 int RegExp::match(JSGlobalData& globalData, const UString& s, int startOffset, Vector<int, 32>* ovector)
146 {
147 if (startOffset < 0)
148 startOffset = 0;
149
150 #if ENABLE(REGEXP_TRACING)
151 m_rtMatchCallCount++;
152 #endif
153
154 if (static_cast<unsigned>(startOffset) > s.length() || s.isNull())
155 return -1;
156
157 if (m_state != ParseError) {
158 compileIfNecessary(globalData);
159
160 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
161 int* offsetVector;
162 Vector<int, 32> nonReturnedOvector;
163 if (ovector) {
164 ovector->resize(offsetVectorSize);
165 offsetVector = ovector->data();
166 } else {
167 nonReturnedOvector.resize(offsetVectorSize);
168 offsetVector = nonReturnedOvector.data();
169 }
170
171 ASSERT(offsetVector);
172 // Initialize offsetVector with the return value (index 0) and the
173 // first subpattern start indicies (even index values) set to -1.
174 // No need to init the subpattern end indicies.
175 for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
176 offsetVector[j] = -1;
177
178 int result;
179 #if ENABLE(YARR_JIT)
180 if (m_state == JITCode) {
181 result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector);
182 #if ENABLE(YARR_JIT_DEBUG)
183 matchCompareWithInterpreter(s, startOffset, offsetVector, result);
184 #endif
185 } else
186 #endif
187 result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector);
188 ASSERT(result >= -1);
189
190 #if ENABLE(REGEXP_TRACING)
191 if (result != -1)
192 m_rtMatchFoundCount++;
193 #endif
194
195 return result;
196 }
197
198 return -1;
199 }
200
201 void RegExp::invalidateCode()
202 {
203 if (!m_representation || m_state == Compiling)
204 return;
205 m_state = NotCompiled;
206 m_representation.clear();
207 }
208
209 #if ENABLE(YARR_JIT_DEBUG)
210 void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* offsetVector, int jitResult)
211 {
212 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
213 Vector<int, 32> interpreterOvector;
214 interpreterOvector.resize(offsetVectorSize);
215 int* interpreterOffsetVector = interpreterOvector.data();
216 int interpreterResult = 0;
217 int differences = 0;
218
219 // Initialize interpreterOffsetVector with the return value (index 0) and the
220 // first subpattern start indicies (even index values) set to -1.
221 // No need to init the subpattern end indicies.
222 for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
223 interpreterOffsetVector[j] = -1;
224
225 interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector);
226
227 if (jitResult != interpreterResult)
228 differences++;
229
230 for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++)
231 if ((offsetVector[j] != interpreterOffsetVector[j])
232 || ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1])))
233 differences++;
234
235 if (differences) {
236 fprintf(stderr, "RegExp Discrepency for /%s/\n string input ", pattern().utf8().data());
237 unsigned segmentLen = s.length() - static_cast<unsigned>(startOffset);
238
239 fprintf(stderr, (segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset);
240
241 if (jitResult != interpreterResult) {
242 fprintf(stderr, " JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult);
243 differences--;
244 } else {
245 fprintf(stderr, " Correct result = %d\n", jitResult);
246 }
247
248 if (differences) {
249 for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) {
250 if (offsetVector[j] != interpreterOffsetVector[j])
251 fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]);
252 if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1]))
253 fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]);
254 }
255 }
256 }
257 }
258 #endif
259
260 #if ENABLE(REGEXP_TRACING)
261 void RegExp::printTraceData()
262 {
263 char formattedPattern[41];
264 char rawPattern[41];
265
266 strncpy(rawPattern, pattern().utf8().data(), 40);
267 rawPattern[40]= '\0';
268
269 int pattLen = strlen(rawPattern);
270
271 snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern);
272
273 #if ENABLE(YARR_JIT)
274 Yarr::YarrCodeBlock& codeBlock = m_representation->m_regExpJITCode;
275
276 const size_t jitAddrSize = 20;
277 char jitAddr[jitAddrSize];
278 if (m_state == JITCode)
279 snprintf(jitAddr, jitAddrSize, "fallback");
280 else
281 snprintf(jitAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.getAddr()));
282 #else
283 const char* jitAddr = "JIT Off";
284 #endif
285
286 printf("%-40.40s %16.16s %10d %10d\n", formattedPattern, jitAddr, m_rtMatchCallCount, m_rtMatchFoundCount);
287 }
288 #endif
289
290 } // namespace JSC