]> git.saurik.com Git - apple/javascriptcore.git/blob - runtime/RegExp.cpp
4e958f49d5943127e1e9ed58f29713cf07101358
[apple/javascriptcore.git] / runtime / RegExp.cpp
1 /*
2 * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
3 * Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile, Inc.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 */
21
22 #include "config.h"
23 #include "RegExp.h"
24 #include "Lexer.h"
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <wtf/Assertions.h>
29 #include <wtf/OwnArrayPtr.h>
30
31
32 #if ENABLE(YARR)
33
34 #include "yarr/RegexCompiler.h"
35 #if ENABLE(YARR_JIT)
36 #include "yarr/RegexJIT.h"
37 #else
38 #include "yarr/RegexInterpreter.h"
39 #endif
40
41 #else
42
43 #if ENABLE(WREC)
44 #include "JIT.h"
45 #include "WRECGenerator.h"
46 #endif
47 #include <pcre/pcre.h>
48
49 #endif
50
51 namespace JSC {
52
53 #if ENABLE(WREC)
54 using namespace WREC;
55 #endif
56
57 inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern)
58 : m_pattern(pattern)
59 , m_flagBits(0)
60 , m_constructionError(0)
61 , m_numSubpatterns(0)
62 {
63 compile(globalData);
64 }
65
66 inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern, const UString& flags)
67 : m_pattern(pattern)
68 , m_flagBits(0)
69 , m_constructionError(0)
70 , m_numSubpatterns(0)
71 {
72 // NOTE: The global flag is handled on a case-by-case basis by functions like
73 // String::match and RegExpObject::match.
74 if (flags.find('g') != -1)
75 m_flagBits |= Global;
76 if (flags.find('i') != -1)
77 m_flagBits |= IgnoreCase;
78 if (flags.find('m') != -1)
79 m_flagBits |= Multiline;
80
81 compile(globalData);
82 }
83
84 #if !ENABLE(YARR)
85 RegExp::~RegExp()
86 {
87 jsRegExpFree(m_regExp);
88 }
89 #endif
90
91 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern)
92 {
93 return adoptRef(new RegExp(globalData, pattern));
94 }
95
96 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern, const UString& flags)
97 {
98 return adoptRef(new RegExp(globalData, pattern, flags));
99 }
100
101 #if ENABLE(YARR)
102
103 void RegExp::compile(JSGlobalData* globalData)
104 {
105 #if ENABLE(YARR_JIT)
106 Yarr::jitCompileRegex(globalData, m_regExpJITCode, m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline());
107 #else
108 UNUSED_PARAM(globalData);
109 m_regExpBytecode.set(Yarr::byteCompileRegex(m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline()));
110 #endif
111 }
112
113 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
114 {
115 if (startOffset < 0)
116 startOffset = 0;
117 if (ovector)
118 ovector->clear();
119
120 if (startOffset > s.size() || s.isNull())
121 return -1;
122
123 #if ENABLE(YARR_JIT)
124 if (!!m_regExpJITCode) {
125 #else
126 if (m_regExpBytecode) {
127 #endif
128 int offsetVectorSize = (m_numSubpatterns + 1) * 3; // FIXME: should be 2 - but adding temporary fallback to pcre.
129 int* offsetVector;
130 Vector<int, 32> nonReturnedOvector;
131 if (ovector) {
132 ovector->resize(offsetVectorSize);
133 offsetVector = ovector->data();
134 } else {
135 nonReturnedOvector.resize(offsetVectorSize);
136 offsetVector = nonReturnedOvector.data();
137 }
138
139 ASSERT(offsetVector);
140 for (int j = 0; j < offsetVectorSize; ++j)
141 offsetVector[j] = -1;
142
143
144 #if ENABLE(YARR_JIT)
145 int result = Yarr::executeRegex(m_regExpJITCode, s.data(), startOffset, s.size(), offsetVector, offsetVectorSize);
146 #else
147 int result = Yarr::interpretRegex(m_regExpBytecode.get(), s.data(), startOffset, s.size(), offsetVector);
148 #endif
149
150 if (result < 0) {
151 #ifndef NDEBUG
152 // TODO: define up a symbol, rather than magic -1
153 if (result != -1)
154 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
155 #endif
156 if (ovector)
157 ovector->clear();
158 }
159 return result;
160 }
161
162 return -1;
163 }
164
165 #else
166
167 void RegExp::compile(JSGlobalData* globalData)
168 {
169 m_regExp = 0;
170 #if ENABLE(WREC)
171 m_wrecFunction = Generator::compileRegExp(globalData, m_pattern, &m_numSubpatterns, &m_constructionError, m_executablePool, ignoreCase(), multiline());
172 if (m_wrecFunction || m_constructionError)
173 return;
174 // Fall through to non-WREC case.
175 #else
176 UNUSED_PARAM(globalData);
177 #endif
178
179 JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
180 JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine;
181 m_regExp = jsRegExpCompile(reinterpret_cast<const UChar*>(m_pattern.data()), m_pattern.size(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError);
182 }
183
184 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
185 {
186 if (startOffset < 0)
187 startOffset = 0;
188 if (ovector)
189 ovector->clear();
190
191 if (startOffset > s.size() || s.isNull())
192 return -1;
193
194 #if ENABLE(WREC)
195 if (m_wrecFunction) {
196 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
197 int* offsetVector;
198 Vector<int, 32> nonReturnedOvector;
199 if (ovector) {
200 ovector->resize(offsetVectorSize);
201 offsetVector = ovector->data();
202 } else {
203 nonReturnedOvector.resize(offsetVectorSize);
204 offsetVector = nonReturnedOvector.data();
205 }
206 ASSERT(offsetVector);
207 for (int j = 0; j < offsetVectorSize; ++j)
208 offsetVector[j] = -1;
209
210 int result = m_wrecFunction(s.data(), startOffset, s.size(), offsetVector);
211
212 if (result < 0) {
213 #ifndef NDEBUG
214 // TODO: define up a symbol, rather than magic -1
215 if (result != -1)
216 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
217 #endif
218 if (ovector)
219 ovector->clear();
220 }
221 return result;
222 } else
223 #endif
224 if (m_regExp) {
225 // Set up the offset vector for the result.
226 // First 2/3 used for result, the last third used by PCRE.
227 int* offsetVector;
228 int offsetVectorSize;
229 int fixedSizeOffsetVector[3];
230 if (!ovector) {
231 offsetVectorSize = 3;
232 offsetVector = fixedSizeOffsetVector;
233 } else {
234 offsetVectorSize = (m_numSubpatterns + 1) * 3;
235 ovector->resize(offsetVectorSize);
236 offsetVector = ovector->data();
237 }
238
239 int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const UChar*>(s.data()), s.size(), startOffset, offsetVector, offsetVectorSize);
240
241 if (numMatches < 0) {
242 #ifndef NDEBUG
243 if (numMatches != JSRegExpErrorNoMatch)
244 fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches);
245 #endif
246 if (ovector)
247 ovector->clear();
248 return -1;
249 }
250
251 return offsetVector[0];
252 }
253
254 return -1;
255 }
256
257 #endif
258
259 } // namespace JSC