]>
Commit | Line | Data |
---|---|---|
9dae56ea A |
1 | /* |
2 | * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org) | |
3 | * Copyright (c) 2007, 2008 Apple Inc. All rights reserved. | |
ba379fdc | 4 | * Copyright (C) 2009 Torch Mobile, Inc. |
14957cd0 | 5 | * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged |
9dae56ea A |
6 | * |
7 | * This library is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2 of the License, or (at your option) any later version. | |
11 | * | |
12 | * This library is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with this library; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | * | |
21 | */ | |
22 | ||
23 | #include "config.h" | |
24 | #include "RegExp.h" | |
14957cd0 | 25 | |
9dae56ea | 26 | #include "Lexer.h" |
14957cd0 A |
27 | #include "RegExpCache.h" |
28 | #include "yarr/Yarr.h" | |
29 | #include "yarr/YarrJIT.h" | |
9dae56ea A |
30 | #include <stdio.h> |
31 | #include <stdlib.h> | |
32 | #include <string.h> | |
33 | #include <wtf/Assertions.h> | |
34 | #include <wtf/OwnArrayPtr.h> | |
35 | ||
36 | namespace JSC { | |
37 | ||
14957cd0 A |
38 | const ClassInfo RegExp::s_info = { "RegExp", 0, 0, 0 }; |
39 | ||
40 | RegExpFlags regExpFlags(const UString& string) | |
9dae56ea | 41 | { |
14957cd0 A |
42 | RegExpFlags flags = NoFlags; |
43 | ||
44 | for (unsigned i = 0; i < string.length(); ++i) { | |
45 | switch (string.characters()[i]) { | |
46 | case 'g': | |
47 | if (flags & FlagGlobal) | |
48 | return InvalidFlags; | |
49 | flags = static_cast<RegExpFlags>(flags | FlagGlobal); | |
50 | break; | |
51 | ||
52 | case 'i': | |
53 | if (flags & FlagIgnoreCase) | |
54 | return InvalidFlags; | |
55 | flags = static_cast<RegExpFlags>(flags | FlagIgnoreCase); | |
56 | break; | |
57 | ||
58 | case 'm': | |
59 | if (flags & FlagMultiline) | |
60 | return InvalidFlags; | |
61 | flags = static_cast<RegExpFlags>(flags | FlagMultiline); | |
62 | break; | |
63 | ||
64 | default: | |
65 | return InvalidFlags; | |
66 | } | |
67 | } | |
9dae56ea | 68 | |
14957cd0 A |
69 | return flags; |
70 | } | |
71 | ||
72 | struct RegExpRepresentation { | |
73 | #if ENABLE(YARR_JIT) | |
74 | Yarr::YarrCodeBlock m_regExpJITCode; | |
75 | #endif | |
76 | OwnPtr<Yarr::BytecodePattern> m_regExpBytecode; | |
77 | }; | |
78 | ||
79 | RegExp::RegExp(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags) | |
80 | : JSCell(*globalData, globalData->regExpStructure.get()) | |
81 | , m_state(NotCompiled) | |
82 | , m_patternString(patternString) | |
83 | , m_flags(flags) | |
9dae56ea A |
84 | , m_constructionError(0) |
85 | , m_numSubpatterns(0) | |
14957cd0 A |
86 | #if ENABLE(REGEXP_TRACING) |
87 | , m_rtMatchCallCount(0) | |
88 | , m_rtMatchFoundCount(0) | |
89 | #endif | |
9dae56ea | 90 | { |
14957cd0 A |
91 | Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError); |
92 | if (m_constructionError) | |
93 | m_state = ParseError; | |
94 | else | |
95 | m_numSubpatterns = pattern.m_numSubpatterns; | |
ba379fdc A |
96 | } |
97 | ||
ba379fdc A |
98 | RegExp::~RegExp() |
99 | { | |
ba379fdc | 100 | } |
ba379fdc | 101 | |
14957cd0 | 102 | RegExp* RegExp::create(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags) |
ba379fdc | 103 | { |
14957cd0 | 104 | return globalData->regExpCache()->lookupOrCreate(patternString, flags); |
9dae56ea A |
105 | } |
106 | ||
14957cd0 | 107 | void RegExp::compile(JSGlobalData* globalData) |
9dae56ea | 108 | { |
14957cd0 A |
109 | ASSERT(m_state == NotCompiled); |
110 | m_representation = adoptPtr(new RegExpRepresentation); | |
111 | m_state = Compiling; | |
112 | Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError); | |
113 | if (m_constructionError) { | |
114 | ASSERT_NOT_REACHED(); | |
115 | m_state = ParseError; | |
116 | return; | |
117 | } | |
9dae56ea | 118 | |
14957cd0 A |
119 | globalData->regExpCache()->addToStrongCache(this); |
120 | ||
121 | ASSERT(m_numSubpatterns == pattern.m_numSubpatterns); | |
ba379fdc | 122 | |
ba379fdc | 123 | #if ENABLE(YARR_JIT) |
14957cd0 A |
124 | if (!pattern.m_containsBackreferences && globalData->canUseJIT()) { |
125 | Yarr::jitCompile(pattern, globalData, m_representation->m_regExpJITCode); | |
126 | #if ENABLE(YARR_JIT_DEBUG) | |
127 | if (!m_representation->m_regExpJITCode.isFallBack()) | |
128 | m_state = JITCode; | |
129 | else | |
130 | m_state = ByteCode; | |
ba379fdc | 131 | #else |
14957cd0 A |
132 | if (!m_representation->m_regExpJITCode.isFallBack()) { |
133 | m_state = JITCode; | |
134 | return; | |
135 | } | |
136 | #endif | |
137 | } | |
ba379fdc | 138 | #endif |
14957cd0 A |
139 | |
140 | m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator); | |
141 | ||
142 | m_state = ByteCode; | |
9dae56ea A |
143 | } |
144 | ||
14957cd0 | 145 | int RegExp::match(JSGlobalData& globalData, const UString& s, int startOffset, Vector<int, 32>* ovector) |
ba379fdc A |
146 | { |
147 | if (startOffset < 0) | |
148 | startOffset = 0; | |
ba379fdc | 149 | |
14957cd0 A |
150 | #if ENABLE(REGEXP_TRACING) |
151 | m_rtMatchCallCount++; | |
152 | #endif | |
153 | ||
154 | if (static_cast<unsigned>(startOffset) > s.length() || s.isNull()) | |
ba379fdc A |
155 | return -1; |
156 | ||
14957cd0 A |
157 | if (m_state != ParseError) { |
158 | compileIfNecessary(globalData); | |
159 | ||
160 | int offsetVectorSize = (m_numSubpatterns + 1) * 2; | |
ba379fdc A |
161 | int* offsetVector; |
162 | Vector<int, 32> nonReturnedOvector; | |
163 | if (ovector) { | |
164 | ovector->resize(offsetVectorSize); | |
165 | offsetVector = ovector->data(); | |
166 | } else { | |
167 | nonReturnedOvector.resize(offsetVectorSize); | |
168 | offsetVector = nonReturnedOvector.data(); | |
169 | } | |
170 | ||
171 | ASSERT(offsetVector); | |
14957cd0 A |
172 | // Initialize offsetVector with the return value (index 0) and the |
173 | // first subpattern start indicies (even index values) set to -1. | |
174 | // No need to init the subpattern end indicies. | |
175 | for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++) | |
ba379fdc A |
176 | offsetVector[j] = -1; |
177 | ||
14957cd0 | 178 | int result; |
ba379fdc | 179 | #if ENABLE(YARR_JIT) |
14957cd0 A |
180 | if (m_state == JITCode) { |
181 | result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector); | |
182 | #if ENABLE(YARR_JIT_DEBUG) | |
183 | matchCompareWithInterpreter(s, startOffset, offsetVector, result); | |
ba379fdc | 184 | #endif |
14957cd0 | 185 | } else |
ba379fdc | 186 | #endif |
14957cd0 A |
187 | result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector); |
188 | ASSERT(result >= -1); | |
4e4e5a6f | 189 | |
14957cd0 A |
190 | #if ENABLE(REGEXP_TRACING) |
191 | if (result != -1) | |
192 | m_rtMatchFoundCount++; | |
193 | #endif | |
4e4e5a6f | 194 | |
ba379fdc A |
195 | return result; |
196 | } | |
197 | ||
198 | return -1; | |
199 | } | |
200 | ||
14957cd0 | 201 | void RegExp::invalidateCode() |
ba379fdc | 202 | { |
14957cd0 A |
203 | if (!m_representation || m_state == Compiling) |
204 | return; | |
205 | m_state = NotCompiled; | |
206 | m_representation.clear(); | |
ba379fdc A |
207 | } |
208 | ||
14957cd0 A |
209 | #if ENABLE(YARR_JIT_DEBUG) |
210 | void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* offsetVector, int jitResult) | |
9dae56ea | 211 | { |
14957cd0 A |
212 | int offsetVectorSize = (m_numSubpatterns + 1) * 2; |
213 | Vector<int, 32> interpreterOvector; | |
214 | interpreterOvector.resize(offsetVectorSize); | |
215 | int* interpreterOffsetVector = interpreterOvector.data(); | |
216 | int interpreterResult = 0; | |
217 | int differences = 0; | |
218 | ||
219 | // Initialize interpreterOffsetVector with the return value (index 0) and the | |
220 | // first subpattern start indicies (even index values) set to -1. | |
221 | // No need to init the subpattern end indicies. | |
222 | for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++) | |
223 | interpreterOffsetVector[j] = -1; | |
224 | ||
225 | interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector); | |
226 | ||
227 | if (jitResult != interpreterResult) | |
228 | differences++; | |
229 | ||
230 | for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) | |
231 | if ((offsetVector[j] != interpreterOffsetVector[j]) | |
232 | || ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1]))) | |
233 | differences++; | |
234 | ||
235 | if (differences) { | |
236 | fprintf(stderr, "RegExp Discrepency for /%s/\n string input ", pattern().utf8().data()); | |
237 | unsigned segmentLen = s.length() - static_cast<unsigned>(startOffset); | |
238 | ||
239 | fprintf(stderr, (segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset); | |
240 | ||
241 | if (jitResult != interpreterResult) { | |
242 | fprintf(stderr, " JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult); | |
243 | differences--; | |
9dae56ea | 244 | } else { |
14957cd0 | 245 | fprintf(stderr, " Correct result = %d\n", jitResult); |
9dae56ea A |
246 | } |
247 | ||
14957cd0 A |
248 | if (differences) { |
249 | for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) { | |
250 | if (offsetVector[j] != interpreterOffsetVector[j]) | |
251 | fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]); | |
252 | if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1])) | |
253 | fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]); | |
254 | } | |
9dae56ea | 255 | } |
9dae56ea | 256 | } |
9dae56ea | 257 | } |
14957cd0 A |
258 | #endif |
259 | ||
260 | #if ENABLE(REGEXP_TRACING) | |
261 | void RegExp::printTraceData() | |
262 | { | |
263 | char formattedPattern[41]; | |
264 | char rawPattern[41]; | |
265 | ||
266 | strncpy(rawPattern, pattern().utf8().data(), 40); | |
267 | rawPattern[40]= '\0'; | |
9dae56ea | 268 | |
14957cd0 A |
269 | int pattLen = strlen(rawPattern); |
270 | ||
271 | snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern); | |
272 | ||
273 | #if ENABLE(YARR_JIT) | |
274 | Yarr::YarrCodeBlock& codeBlock = m_representation->m_regExpJITCode; | |
275 | ||
276 | const size_t jitAddrSize = 20; | |
277 | char jitAddr[jitAddrSize]; | |
278 | if (m_state == JITCode) | |
279 | snprintf(jitAddr, jitAddrSize, "fallback"); | |
280 | else | |
281 | snprintf(jitAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.getAddr())); | |
282 | #else | |
283 | const char* jitAddr = "JIT Off"; | |
ba379fdc A |
284 | #endif |
285 | ||
14957cd0 A |
286 | printf("%-40.40s %16.16s %10d %10d\n", formattedPattern, jitAddr, m_rtMatchCallCount, m_rtMatchFoundCount); |
287 | } | |
288 | #endif | |
289 | ||
9dae56ea | 290 | } // namespace JSC |