]>
Commit | Line | Data |
---|---|---|
9dae56ea A |
1 | /* |
2 | * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org) | |
3 | * Copyright (c) 2007, 2008 Apple Inc. All rights reserved. | |
ba379fdc | 4 | * Copyright (C) 2009 Torch Mobile, Inc. |
14957cd0 | 5 | * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged |
9dae56ea A |
6 | * |
7 | * This library is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2 of the License, or (at your option) any later version. | |
11 | * | |
12 | * This library is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with this library; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | * | |
21 | */ | |
22 | ||
23 | #include "config.h" | |
24 | #include "RegExp.h" | |
14957cd0 | 25 | |
9dae56ea | 26 | #include "Lexer.h" |
81345200 | 27 | #include "JSCInlines.h" |
14957cd0 | 28 | #include "RegExpCache.h" |
93a37866 A |
29 | #include "Yarr.h" |
30 | #include "YarrJIT.h" | |
9dae56ea | 31 | #include <wtf/Assertions.h> |
9dae56ea | 32 | |
6fe7ccc8 A |
33 | #define REGEXP_FUNC_TEST_DATA_GEN 0 |
34 | ||
81345200 A |
35 | #if REGEXP_FUNC_TEST_DATA_GEN |
36 | #include <stdio.h> | |
37 | #include <stdlib.h> | |
38 | #include <string.h> | |
39 | #endif | |
40 | ||
9dae56ea A |
41 | namespace JSC { |
42 | ||
6fe7ccc8 | 43 | const ClassInfo RegExp::s_info = { "RegExp", 0, 0, 0, CREATE_METHOD_TABLE(RegExp) }; |
14957cd0 | 44 | |
93a37866 | 45 | RegExpFlags regExpFlags(const String& string) |
9dae56ea | 46 | { |
14957cd0 A |
47 | RegExpFlags flags = NoFlags; |
48 | ||
49 | for (unsigned i = 0; i < string.length(); ++i) { | |
6fe7ccc8 | 50 | switch (string[i]) { |
14957cd0 A |
51 | case 'g': |
52 | if (flags & FlagGlobal) | |
53 | return InvalidFlags; | |
54 | flags = static_cast<RegExpFlags>(flags | FlagGlobal); | |
55 | break; | |
56 | ||
57 | case 'i': | |
58 | if (flags & FlagIgnoreCase) | |
59 | return InvalidFlags; | |
60 | flags = static_cast<RegExpFlags>(flags | FlagIgnoreCase); | |
61 | break; | |
62 | ||
63 | case 'm': | |
64 | if (flags & FlagMultiline) | |
65 | return InvalidFlags; | |
66 | flags = static_cast<RegExpFlags>(flags | FlagMultiline); | |
67 | break; | |
68 | ||
69 | default: | |
70 | return InvalidFlags; | |
71 | } | |
72 | } | |
9dae56ea | 73 | |
14957cd0 A |
74 | return flags; |
75 | } | |
6fe7ccc8 A |
76 | |
77 | #if REGEXP_FUNC_TEST_DATA_GEN | |
78 | class RegExpFunctionalTestCollector { | |
79 | // This class is not thread safe. | |
80 | protected: | |
81 | static const char* const s_fileName; | |
82 | ||
83 | public: | |
84 | static RegExpFunctionalTestCollector* get(); | |
85 | ||
86 | ~RegExpFunctionalTestCollector(); | |
87 | ||
93a37866 | 88 | void outputOneTest(RegExp*, String, int, int*, int); |
6fe7ccc8 A |
89 | void clearRegExp(RegExp* regExp) |
90 | { | |
91 | if (regExp == m_lastRegExp) | |
92 | m_lastRegExp = 0; | |
93 | } | |
94 | ||
95 | private: | |
96 | RegExpFunctionalTestCollector(); | |
97 | ||
93a37866 | 98 | void outputEscapedString(const String&, bool escapeSlash = false); |
6fe7ccc8 A |
99 | |
100 | static RegExpFunctionalTestCollector* s_instance; | |
101 | FILE* m_file; | |
102 | RegExp* m_lastRegExp; | |
14957cd0 A |
103 | }; |
104 | ||
6fe7ccc8 A |
105 | const char* const RegExpFunctionalTestCollector::s_fileName = "/tmp/RegExpTestsData"; |
106 | RegExpFunctionalTestCollector* RegExpFunctionalTestCollector::s_instance = 0; | |
107 | ||
108 | RegExpFunctionalTestCollector* RegExpFunctionalTestCollector::get() | |
109 | { | |
110 | if (!s_instance) | |
111 | s_instance = new RegExpFunctionalTestCollector(); | |
112 | ||
113 | return s_instance; | |
114 | } | |
115 | ||
93a37866 | 116 | void RegExpFunctionalTestCollector::outputOneTest(RegExp* regExp, String s, int startOffset, int* ovector, int result) |
6fe7ccc8 A |
117 | { |
118 | if ((!m_lastRegExp) || (m_lastRegExp != regExp)) { | |
119 | m_lastRegExp = regExp; | |
120 | fputc('/', m_file); | |
93a37866 | 121 | outputEscapedString(regExp->pattern(), true); |
6fe7ccc8 A |
122 | fputc('/', m_file); |
123 | if (regExp->global()) | |
124 | fputc('g', m_file); | |
125 | if (regExp->ignoreCase()) | |
126 | fputc('i', m_file); | |
127 | if (regExp->multiline()) | |
128 | fputc('m', m_file); | |
129 | fprintf(m_file, "\n"); | |
130 | } | |
131 | ||
132 | fprintf(m_file, " \""); | |
93a37866 | 133 | outputEscapedString(s); |
6fe7ccc8 A |
134 | fprintf(m_file, "\", %d, %d, (", startOffset, result); |
135 | for (unsigned i = 0; i <= regExp->numSubpatterns(); i++) { | |
136 | int subpatternBegin = ovector[i * 2]; | |
137 | int subpatternEnd = ovector[i * 2 + 1]; | |
138 | if (subpatternBegin == -1) | |
139 | subpatternEnd = -1; | |
140 | fprintf(m_file, "%d, %d", subpatternBegin, subpatternEnd); | |
141 | if (i < regExp->numSubpatterns()) | |
142 | fputs(", ", m_file); | |
143 | } | |
144 | ||
145 | fprintf(m_file, ")\n"); | |
146 | fflush(m_file); | |
147 | } | |
148 | ||
149 | RegExpFunctionalTestCollector::RegExpFunctionalTestCollector() | |
150 | { | |
151 | m_file = fopen(s_fileName, "r+"); | |
152 | if (!m_file) | |
153 | m_file = fopen(s_fileName, "w+"); | |
154 | ||
155 | fseek(m_file, 0L, SEEK_END); | |
156 | } | |
157 | ||
158 | RegExpFunctionalTestCollector::~RegExpFunctionalTestCollector() | |
159 | { | |
160 | fclose(m_file); | |
161 | s_instance = 0; | |
162 | } | |
163 | ||
93a37866 | 164 | void RegExpFunctionalTestCollector::outputEscapedString(const String& s, bool escapeSlash) |
6fe7ccc8 A |
165 | { |
166 | int len = s.length(); | |
167 | ||
168 | for (int i = 0; i < len; ++i) { | |
169 | UChar c = s[i]; | |
170 | ||
171 | switch (c) { | |
172 | case '\0': | |
173 | fputs("\\0", m_file); | |
174 | break; | |
175 | case '\a': | |
176 | fputs("\\a", m_file); | |
177 | break; | |
178 | case '\b': | |
179 | fputs("\\b", m_file); | |
180 | break; | |
181 | case '\f': | |
182 | fputs("\\f", m_file); | |
183 | break; | |
184 | case '\n': | |
185 | fputs("\\n", m_file); | |
186 | break; | |
187 | case '\r': | |
188 | fputs("\\r", m_file); | |
189 | break; | |
190 | case '\t': | |
191 | fputs("\\t", m_file); | |
192 | break; | |
193 | case '\v': | |
194 | fputs("\\v", m_file); | |
195 | break; | |
196 | case '/': | |
197 | if (escapeSlash) | |
198 | fputs("\\/", m_file); | |
199 | else | |
200 | fputs("/", m_file); | |
201 | break; | |
202 | case '\"': | |
203 | fputs("\\\"", m_file); | |
204 | break; | |
205 | case '\\': | |
206 | fputs("\\\\", m_file); | |
207 | break; | |
208 | case '\?': | |
209 | fputs("\?", m_file); | |
210 | break; | |
211 | default: | |
212 | if (c > 0x7f) | |
213 | fprintf(m_file, "\\u%04x", c); | |
214 | else | |
215 | fputc(c, m_file); | |
216 | break; | |
217 | } | |
218 | } | |
219 | } | |
220 | #endif | |
221 | ||
93a37866 A |
222 | RegExp::RegExp(VM& vm, const String& patternString, RegExpFlags flags) |
223 | : JSCell(vm, vm.regExpStructure.get()) | |
14957cd0 A |
224 | , m_state(NotCompiled) |
225 | , m_patternString(patternString) | |
226 | , m_flags(flags) | |
9dae56ea A |
227 | , m_constructionError(0) |
228 | , m_numSubpatterns(0) | |
14957cd0 | 229 | #if ENABLE(REGEXP_TRACING) |
81345200 A |
230 | , m_rtMatchOnlyTotalSubjectStringLen(0.0) |
231 | , m_rtMatchTotalSubjectStringLen(0.0) | |
232 | , m_rtMatchOnlyCallCount(0) | |
233 | , m_rtMatchOnlyFoundCount(0) | |
14957cd0 A |
234 | , m_rtMatchCallCount(0) |
235 | , m_rtMatchFoundCount(0) | |
236 | #endif | |
9dae56ea | 237 | { |
6fe7ccc8 A |
238 | } |
239 | ||
93a37866 | 240 | void RegExp::finishCreation(VM& vm) |
6fe7ccc8 | 241 | { |
93a37866 | 242 | Base::finishCreation(vm); |
14957cd0 A |
243 | Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError); |
244 | if (m_constructionError) | |
245 | m_state = ParseError; | |
246 | else | |
247 | m_numSubpatterns = pattern.m_numSubpatterns; | |
ba379fdc A |
248 | } |
249 | ||
6fe7ccc8 | 250 | void RegExp::destroy(JSCell* cell) |
ba379fdc | 251 | { |
93a37866 | 252 | RegExp* thisObject = static_cast<RegExp*>(cell); |
6fe7ccc8 A |
253 | #if REGEXP_FUNC_TEST_DATA_GEN |
254 | RegExpFunctionalTestCollector::get()->clearRegExp(this); | |
255 | #endif | |
256 | thisObject->RegExp::~RegExp(); | |
ba379fdc | 257 | } |
ba379fdc | 258 | |
93a37866 | 259 | RegExp* RegExp::createWithoutCaching(VM& vm, const String& patternString, RegExpFlags flags) |
ba379fdc | 260 | { |
93a37866 A |
261 | RegExp* regExp = new (NotNull, allocateCell<RegExp>(vm.heap)) RegExp(vm, patternString, flags); |
262 | regExp->finishCreation(vm); | |
6fe7ccc8 | 263 | return regExp; |
9dae56ea A |
264 | } |
265 | ||
93a37866 | 266 | RegExp* RegExp::create(VM& vm, const String& patternString, RegExpFlags flags) |
6fe7ccc8 | 267 | { |
93a37866 | 268 | return vm.regExpCache()->lookupOrCreate(patternString, flags); |
6fe7ccc8 A |
269 | } |
270 | ||
93a37866 | 271 | void RegExp::compile(VM* vm, Yarr::YarrCharSize charSize) |
9dae56ea | 272 | { |
14957cd0 A |
273 | Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError); |
274 | if (m_constructionError) { | |
93a37866 | 275 | RELEASE_ASSERT_NOT_REACHED(); |
14957cd0 A |
276 | m_state = ParseError; |
277 | return; | |
278 | } | |
14957cd0 | 279 | ASSERT(m_numSubpatterns == pattern.m_numSubpatterns); |
ba379fdc | 280 | |
6fe7ccc8 A |
281 | if (!hasCode()) { |
282 | ASSERT(m_state == NotCompiled); | |
93a37866 | 283 | vm->regExpCache()->addToStrongCache(this); |
6fe7ccc8 A |
284 | m_state = ByteCode; |
285 | } | |
286 | ||
ba379fdc | 287 | #if ENABLE(YARR_JIT) |
81345200 | 288 | if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && vm->canUseRegExpJIT()) { |
93a37866 | 289 | Yarr::jitCompile(pattern, charSize, vm, m_regExpJITCode); |
14957cd0 | 290 | #if ENABLE(YARR_JIT_DEBUG) |
6fe7ccc8 | 291 | if (!m_regExpJITCode.isFallBack()) |
14957cd0 A |
292 | m_state = JITCode; |
293 | else | |
294 | m_state = ByteCode; | |
ba379fdc | 295 | #else |
6fe7ccc8 | 296 | if (!m_regExpJITCode.isFallBack()) { |
14957cd0 A |
297 | m_state = JITCode; |
298 | return; | |
299 | } | |
300 | #endif | |
301 | } | |
6fe7ccc8 A |
302 | #else |
303 | UNUSED_PARAM(charSize); | |
ba379fdc | 304 | #endif |
14957cd0 | 305 | |
93a37866 | 306 | m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator); |
9dae56ea A |
307 | } |
308 | ||
93a37866 | 309 | void RegExp::compileIfNecessary(VM& vm, Yarr::YarrCharSize charSize) |
ba379fdc | 310 | { |
6fe7ccc8 A |
311 | if (hasCode()) { |
312 | #if ENABLE(YARR_JIT) | |
313 | if (m_state != JITCode) | |
314 | return; | |
315 | if ((charSize == Yarr::Char8) && (m_regExpJITCode.has8BitCode())) | |
316 | return; | |
317 | if ((charSize == Yarr::Char16) && (m_regExpJITCode.has16BitCode())) | |
318 | return; | |
319 | #else | |
320 | return; | |
321 | #endif | |
322 | } | |
ba379fdc | 323 | |
93a37866 | 324 | compile(&vm, charSize); |
6fe7ccc8 A |
325 | } |
326 | ||
93a37866 | 327 | int RegExp::match(VM& vm, const String& s, unsigned startOffset, Vector<int, 32>& ovector) |
6fe7ccc8 | 328 | { |
14957cd0 A |
329 | #if ENABLE(REGEXP_TRACING) |
330 | m_rtMatchCallCount++; | |
81345200 | 331 | m_rtMatchTotalSubjectStringLen += (double)(s.length() - startOffset); |
14957cd0 A |
332 | #endif |
333 | ||
6fe7ccc8 | 334 | ASSERT(m_state != ParseError); |
93a37866 | 335 | compileIfNecessary(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16); |
ba379fdc | 336 | |
6fe7ccc8 A |
337 | int offsetVectorSize = (m_numSubpatterns + 1) * 2; |
338 | ovector.resize(offsetVectorSize); | |
339 | int* offsetVector = ovector.data(); | |
14957cd0 | 340 | |
6fe7ccc8 A |
341 | int result; |
342 | #if ENABLE(YARR_JIT) | |
343 | if (m_state == JITCode) { | |
344 | if (s.is8Bit()) | |
345 | result = m_regExpJITCode.execute(s.characters8(), startOffset, s.length(), offsetVector).start; | |
346 | else | |
347 | result = m_regExpJITCode.execute(s.characters16(), startOffset, s.length(), offsetVector).start; | |
348 | #if ENABLE(YARR_JIT_DEBUG) | |
349 | matchCompareWithInterpreter(s, startOffset, offsetVector, result); | |
350 | #endif | |
351 | } else | |
352 | #endif | |
353 | result = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector)); | |
354 | ||
355 | // FIXME: The YARR engine should handle unsigned or size_t length matches. | |
356 | // The YARR Interpreter is "unsigned" clean, while the YARR JIT hasn't been addressed. | |
357 | // The offset vector handling needs to change as well. | |
358 | // Right now we convert a match where the offsets overflowed into match failure. | |
359 | // There are two places in WebCore that call the interpreter directly that need to | |
81345200 A |
360 | // have their offsets changed to int as well. They are yarr/RegularExpression.cpp |
361 | // and inspector/ContentSearchUtilities.cpp | |
6fe7ccc8 A |
362 | if (s.length() > INT_MAX) { |
363 | bool overflowed = false; | |
364 | ||
365 | if (result < -1) | |
366 | overflowed = true; | |
367 | ||
368 | for (unsigned i = 0; i <= m_numSubpatterns; i++) { | |
369 | if ((offsetVector[i*2] < -1) || ((offsetVector[i*2] >= 0) && (offsetVector[i*2+1] < -1))) { | |
370 | overflowed = true; | |
371 | offsetVector[i*2] = -1; | |
372 | offsetVector[i*2+1] = -1; | |
373 | } | |
ba379fdc A |
374 | } |
375 | ||
6fe7ccc8 A |
376 | if (overflowed) |
377 | result = -1; | |
378 | } | |
379 | ||
380 | ASSERT(result >= -1); | |
381 | ||
382 | #if REGEXP_FUNC_TEST_DATA_GEN | |
383 | RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result); | |
384 | #endif | |
385 | ||
386 | #if ENABLE(REGEXP_TRACING) | |
387 | if (result != -1) | |
388 | m_rtMatchFoundCount++; | |
389 | #endif | |
390 | ||
391 | return result; | |
392 | } | |
393 | ||
93a37866 | 394 | void RegExp::compileMatchOnly(VM* vm, Yarr::YarrCharSize charSize) |
6fe7ccc8 A |
395 | { |
396 | Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError); | |
397 | if (m_constructionError) { | |
93a37866 | 398 | RELEASE_ASSERT_NOT_REACHED(); |
6fe7ccc8 A |
399 | m_state = ParseError; |
400 | return; | |
401 | } | |
402 | ASSERT(m_numSubpatterns == pattern.m_numSubpatterns); | |
403 | ||
404 | if (!hasCode()) { | |
405 | ASSERT(m_state == NotCompiled); | |
93a37866 | 406 | vm->regExpCache()->addToStrongCache(this); |
6fe7ccc8 A |
407 | m_state = ByteCode; |
408 | } | |
ba379fdc | 409 | |
ba379fdc | 410 | #if ENABLE(YARR_JIT) |
81345200 | 411 | if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && vm->canUseRegExpJIT()) { |
93a37866 | 412 | Yarr::jitCompile(pattern, charSize, vm, m_regExpJITCode, Yarr::MatchOnly); |
14957cd0 | 413 | #if ENABLE(YARR_JIT_DEBUG) |
6fe7ccc8 A |
414 | if (!m_regExpJITCode.isFallBack()) |
415 | m_state = JITCode; | |
416 | else | |
417 | m_state = ByteCode; | |
418 | #else | |
419 | if (!m_regExpJITCode.isFallBack()) { | |
420 | m_state = JITCode; | |
421 | return; | |
422 | } | |
ba379fdc | 423 | #endif |
6fe7ccc8 A |
424 | } |
425 | #else | |
426 | UNUSED_PARAM(charSize); | |
ba379fdc | 427 | #endif |
4e4e5a6f | 428 | |
93a37866 | 429 | m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator); |
6fe7ccc8 A |
430 | } |
431 | ||
93a37866 | 432 | void RegExp::compileIfNecessaryMatchOnly(VM& vm, Yarr::YarrCharSize charSize) |
6fe7ccc8 A |
433 | { |
434 | if (hasCode()) { | |
435 | #if ENABLE(YARR_JIT) | |
436 | if (m_state != JITCode) | |
437 | return; | |
438 | if ((charSize == Yarr::Char8) && (m_regExpJITCode.has8BitCodeMatchOnly())) | |
439 | return; | |
440 | if ((charSize == Yarr::Char16) && (m_regExpJITCode.has16BitCodeMatchOnly())) | |
441 | return; | |
442 | #else | |
443 | return; | |
444 | #endif | |
445 | } | |
446 | ||
93a37866 | 447 | compileMatchOnly(&vm, charSize); |
6fe7ccc8 A |
448 | } |
449 | ||
93a37866 | 450 | MatchResult RegExp::match(VM& vm, const String& s, unsigned startOffset) |
6fe7ccc8 | 451 | { |
14957cd0 | 452 | #if ENABLE(REGEXP_TRACING) |
81345200 A |
453 | m_rtMatchOnlyCallCount++; |
454 | m_rtMatchOnlyTotalSubjectStringLen += (double)(s.length() - startOffset); | |
14957cd0 | 455 | #endif |
4e4e5a6f | 456 | |
6fe7ccc8 | 457 | ASSERT(m_state != ParseError); |
93a37866 | 458 | compileIfNecessaryMatchOnly(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16); |
6fe7ccc8 A |
459 | |
460 | #if ENABLE(YARR_JIT) | |
461 | if (m_state == JITCode) { | |
462 | MatchResult result = s.is8Bit() ? | |
463 | m_regExpJITCode.execute(s.characters8(), startOffset, s.length()) : | |
464 | m_regExpJITCode.execute(s.characters16(), startOffset, s.length()); | |
465 | #if ENABLE(REGEXP_TRACING) | |
466 | if (!result) | |
81345200 | 467 | m_rtMatchOnlyFoundCount++; |
6fe7ccc8 | 468 | #endif |
ba379fdc A |
469 | return result; |
470 | } | |
6fe7ccc8 A |
471 | #endif |
472 | ||
473 | int offsetVectorSize = (m_numSubpatterns + 1) * 2; | |
474 | int* offsetVector; | |
475 | Vector<int, 32> nonReturnedOvector; | |
476 | nonReturnedOvector.resize(offsetVectorSize); | |
477 | offsetVector = nonReturnedOvector.data(); | |
478 | int r = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector)); | |
479 | #if REGEXP_FUNC_TEST_DATA_GEN | |
480 | RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result); | |
481 | #endif | |
482 | ||
483 | if (r >= 0) { | |
484 | #if ENABLE(REGEXP_TRACING) | |
81345200 | 485 | m_rtMatchOnlyFoundCount++; |
6fe7ccc8 A |
486 | #endif |
487 | return MatchResult(r, reinterpret_cast<unsigned*>(offsetVector)[1]); | |
488 | } | |
ba379fdc | 489 | |
6fe7ccc8 | 490 | return MatchResult::failed(); |
ba379fdc A |
491 | } |
492 | ||
14957cd0 | 493 | void RegExp::invalidateCode() |
ba379fdc | 494 | { |
6fe7ccc8 | 495 | if (!hasCode()) |
14957cd0 A |
496 | return; |
497 | m_state = NotCompiled; | |
6fe7ccc8 A |
498 | #if ENABLE(YARR_JIT) |
499 | m_regExpJITCode.clear(); | |
500 | #endif | |
501 | m_regExpBytecode.clear(); | |
ba379fdc A |
502 | } |
503 | ||
14957cd0 | 504 | #if ENABLE(YARR_JIT_DEBUG) |
93a37866 | 505 | void RegExp::matchCompareWithInterpreter(const String& s, int startOffset, int* offsetVector, int jitResult) |
9dae56ea | 506 | { |
14957cd0 A |
507 | int offsetVectorSize = (m_numSubpatterns + 1) * 2; |
508 | Vector<int, 32> interpreterOvector; | |
509 | interpreterOvector.resize(offsetVectorSize); | |
510 | int* interpreterOffsetVector = interpreterOvector.data(); | |
511 | int interpreterResult = 0; | |
512 | int differences = 0; | |
513 | ||
514 | // Initialize interpreterOffsetVector with the return value (index 0) and the | |
515 | // first subpattern start indicies (even index values) set to -1. | |
516 | // No need to init the subpattern end indicies. | |
517 | for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++) | |
518 | interpreterOffsetVector[j] = -1; | |
519 | ||
6fe7ccc8 | 520 | interpreterResult = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, interpreterOffsetVector); |
14957cd0 A |
521 | |
522 | if (jitResult != interpreterResult) | |
523 | differences++; | |
524 | ||
525 | for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) | |
526 | if ((offsetVector[j] != interpreterOffsetVector[j]) | |
527 | || ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1]))) | |
528 | differences++; | |
529 | ||
530 | if (differences) { | |
93a37866 | 531 | dataLogF("RegExp Discrepency for /%s/\n string input ", pattern().utf8().data()); |
14957cd0 A |
532 | unsigned segmentLen = s.length() - static_cast<unsigned>(startOffset); |
533 | ||
93a37866 | 534 | dataLogF((segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset); |
14957cd0 A |
535 | |
536 | if (jitResult != interpreterResult) { | |
93a37866 | 537 | dataLogF(" JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult); |
14957cd0 | 538 | differences--; |
9dae56ea | 539 | } else { |
93a37866 | 540 | dataLogF(" Correct result = %d\n", jitResult); |
9dae56ea A |
541 | } |
542 | ||
14957cd0 A |
543 | if (differences) { |
544 | for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) { | |
545 | if (offsetVector[j] != interpreterOffsetVector[j]) | |
93a37866 | 546 | dataLogF(" JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]); |
14957cd0 | 547 | if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1])) |
93a37866 | 548 | dataLogF(" JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]); |
14957cd0 | 549 | } |
9dae56ea | 550 | } |
9dae56ea | 551 | } |
9dae56ea | 552 | } |
14957cd0 A |
553 | #endif |
554 | ||
555 | #if ENABLE(REGEXP_TRACING) | |
556 | void RegExp::printTraceData() | |
557 | { | |
558 | char formattedPattern[41]; | |
559 | char rawPattern[41]; | |
560 | ||
561 | strncpy(rawPattern, pattern().utf8().data(), 40); | |
562 | rawPattern[40]= '\0'; | |
9dae56ea | 563 | |
14957cd0 A |
564 | int pattLen = strlen(rawPattern); |
565 | ||
566 | snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern); | |
567 | ||
568 | #if ENABLE(YARR_JIT) | |
6fe7ccc8 | 569 | Yarr::YarrCodeBlock& codeBlock = m_regExpJITCode; |
14957cd0 A |
570 | |
571 | const size_t jitAddrSize = 20; | |
81345200 A |
572 | char jit8BitMatchOnlyAddr[jitAddrSize]; |
573 | char jit16BitMatchOnlyAddr[jitAddrSize]; | |
574 | char jit8BitMatchAddr[jitAddrSize]; | |
575 | char jit16BitMatchAddr[jitAddrSize]; | |
576 | if (m_state == ByteCode) { | |
577 | snprintf(jit8BitMatchOnlyAddr, jitAddrSize, "fallback "); | |
578 | snprintf(jit16BitMatchOnlyAddr, jitAddrSize, "---- "); | |
579 | snprintf(jit8BitMatchAddr, jitAddrSize, "fallback "); | |
580 | snprintf(jit16BitMatchAddr, jitAddrSize, "---- "); | |
581 | } else { | |
582 | snprintf(jit8BitMatchOnlyAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.get8BitMatchOnlyAddr())); | |
583 | snprintf(jit16BitMatchOnlyAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.get16BitMatchOnlyAddr())); | |
584 | snprintf(jit8BitMatchAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.get8BitMatchAddr())); | |
585 | snprintf(jit16BitMatchAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.get16BitMatchAddr())); | |
586 | } | |
14957cd0 | 587 | #else |
81345200 A |
588 | const char* jit8BitMatchOnlyAddr = "JIT Off"; |
589 | const char* jit16BitMatchOnlyAddr = ""; | |
590 | const char* jit8BitMatchAddr = "JIT Off"; | |
591 | const char* jit16BitMatchAddr = ""; | |
ba379fdc | 592 | #endif |
81345200 A |
593 | unsigned averageMatchOnlyStringLen = (unsigned)(m_rtMatchOnlyTotalSubjectStringLen / m_rtMatchOnlyCallCount); |
594 | unsigned averageMatchStringLen = (unsigned)(m_rtMatchTotalSubjectStringLen / m_rtMatchCallCount); | |
ba379fdc | 595 | |
81345200 A |
596 | printf("%-40.40s %16.16s %16.16s %10d %10d %10u\n", formattedPattern, jit8BitMatchOnlyAddr, jit16BitMatchOnlyAddr, m_rtMatchOnlyCallCount, m_rtMatchOnlyFoundCount, averageMatchOnlyStringLen); |
597 | printf(" %16.16s %16.16s %10d %10d %10u\n", jit8BitMatchAddr, jit16BitMatchAddr, m_rtMatchCallCount, m_rtMatchFoundCount, averageMatchStringLen); | |
14957cd0 A |
598 | } |
599 | #endif | |
6fe7ccc8 | 600 | |
9dae56ea | 601 | } // namespace JSC |