]> git.saurik.com Git - apple/javascriptcore.git/blob - runtime/RegExp.cpp
7dd4a8ffdede0b04eead102d02bffca35b2eb10f
[apple/javascriptcore.git] / runtime / RegExp.cpp
1 /*
2 * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
3 * Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile, Inc.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 */
21
22 #include "config.h"
23 #include "RegExp.h"
24 #include "Lexer.h"
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <wtf/Assertions.h>
29 #include <wtf/OwnArrayPtr.h>
30
31
32 #if ENABLE(YARR)
33
34 #include "yarr/RegexCompiler.h"
35 #if ENABLE(YARR_JIT)
36 #include "yarr/RegexJIT.h"
37 #else
38 #include "yarr/RegexInterpreter.h"
39 #endif
40
41 #else
42
43 #if ENABLE(WREC)
44 #include "JIT.h"
45 #include "WRECGenerator.h"
46 #endif
47 #include <pcre/pcre.h>
48
49 #endif
50
51 namespace JSC {
52
53 #if ENABLE(WREC)
54 using namespace WREC;
55 #endif
56
57 inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern)
58 : m_pattern(pattern)
59 , m_flagBits(0)
60 , m_constructionError(0)
61 , m_numSubpatterns(0)
62 {
63 compile(globalData);
64 }
65
66 inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern, const UString& flags)
67 : m_pattern(pattern)
68 , m_flags(flags)
69 , m_flagBits(0)
70 , m_constructionError(0)
71 , m_numSubpatterns(0)
72 {
73 // NOTE: The global flag is handled on a case-by-case basis by functions like
74 // String::match and RegExpObject::match.
75 if (flags.find('g') != -1)
76 m_flagBits |= Global;
77 if (flags.find('i') != -1)
78 m_flagBits |= IgnoreCase;
79 if (flags.find('m') != -1)
80 m_flagBits |= Multiline;
81
82 compile(globalData);
83 }
84
85 #if !ENABLE(YARR)
86 RegExp::~RegExp()
87 {
88 jsRegExpFree(m_regExp);
89 }
90 #endif
91
92 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern)
93 {
94 return adoptRef(new RegExp(globalData, pattern));
95 }
96
97 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern, const UString& flags)
98 {
99 return adoptRef(new RegExp(globalData, pattern, flags));
100 }
101
102 #if ENABLE(YARR)
103
104 void RegExp::compile(JSGlobalData* globalData)
105 {
106 #if ENABLE(YARR_JIT)
107 Yarr::jitCompileRegex(globalData, m_regExpJITCode, m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline());
108 #else
109 UNUSED_PARAM(globalData);
110 m_regExpBytecode.set(Yarr::byteCompileRegex(m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline()));
111 #endif
112 }
113
114 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
115 {
116 if (startOffset < 0)
117 startOffset = 0;
118 if (ovector)
119 ovector->clear();
120
121 if (startOffset > s.size() || s.isNull())
122 return -1;
123
124 #if ENABLE(YARR_JIT)
125 if (!!m_regExpJITCode) {
126 #else
127 if (m_regExpBytecode) {
128 #endif
129 int offsetVectorSize = (m_numSubpatterns + 1) * 3; // FIXME: should be 2 - but adding temporary fallback to pcre.
130 int* offsetVector;
131 Vector<int, 32> nonReturnedOvector;
132 if (ovector) {
133 ovector->resize(offsetVectorSize);
134 offsetVector = ovector->data();
135 } else {
136 nonReturnedOvector.resize(offsetVectorSize);
137 offsetVector = nonReturnedOvector.data();
138 }
139
140 ASSERT(offsetVector);
141 for (int j = 0; j < offsetVectorSize; ++j)
142 offsetVector[j] = -1;
143
144
145 #if ENABLE(YARR_JIT)
146 int result = Yarr::executeRegex(m_regExpJITCode, s.data(), startOffset, s.size(), offsetVector, offsetVectorSize);
147 #else
148 int result = Yarr::interpretRegex(m_regExpBytecode.get(), s.data(), startOffset, s.size(), offsetVector);
149 #endif
150
151 if (result < 0) {
152 #ifndef NDEBUG
153 // TODO: define up a symbol, rather than magic -1
154 if (result != -1)
155 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
156 #endif
157 if (ovector)
158 ovector->clear();
159 }
160 return result;
161 }
162
163 return -1;
164 }
165
166 #else
167
168 void RegExp::compile(JSGlobalData* globalData)
169 {
170 m_regExp = 0;
171 #if ENABLE(WREC)
172 m_wrecFunction = Generator::compileRegExp(globalData, m_pattern, &m_numSubpatterns, &m_constructionError, m_executablePool, ignoreCase(), multiline());
173 if (m_wrecFunction || m_constructionError)
174 return;
175 // Fall through to non-WREC case.
176 #else
177 UNUSED_PARAM(globalData);
178 #endif
179
180 JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
181 JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine;
182 m_regExp = jsRegExpCompile(reinterpret_cast<const UChar*>(m_pattern.data()), m_pattern.size(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError);
183 }
184
185 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
186 {
187 if (startOffset < 0)
188 startOffset = 0;
189 if (ovector)
190 ovector->clear();
191
192 if (startOffset > s.size() || s.isNull())
193 return -1;
194
195 #if ENABLE(WREC)
196 if (m_wrecFunction) {
197 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
198 int* offsetVector;
199 Vector<int, 32> nonReturnedOvector;
200 if (ovector) {
201 ovector->resize(offsetVectorSize);
202 offsetVector = ovector->data();
203 } else {
204 nonReturnedOvector.resize(offsetVectorSize);
205 offsetVector = nonReturnedOvector.data();
206 }
207 ASSERT(offsetVector);
208 for (int j = 0; j < offsetVectorSize; ++j)
209 offsetVector[j] = -1;
210
211 int result = m_wrecFunction(s.data(), startOffset, s.size(), offsetVector);
212
213 if (result < 0) {
214 #ifndef NDEBUG
215 // TODO: define up a symbol, rather than magic -1
216 if (result != -1)
217 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
218 #endif
219 if (ovector)
220 ovector->clear();
221 }
222 return result;
223 } else
224 #endif
225 if (m_regExp) {
226 // Set up the offset vector for the result.
227 // First 2/3 used for result, the last third used by PCRE.
228 int* offsetVector;
229 int offsetVectorSize;
230 int fixedSizeOffsetVector[3];
231 if (!ovector) {
232 offsetVectorSize = 3;
233 offsetVector = fixedSizeOffsetVector;
234 } else {
235 offsetVectorSize = (m_numSubpatterns + 1) * 3;
236 ovector->resize(offsetVectorSize);
237 offsetVector = ovector->data();
238 }
239
240 int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const UChar*>(s.data()), s.size(), startOffset, offsetVector, offsetVectorSize);
241
242 if (numMatches < 0) {
243 #ifndef NDEBUG
244 if (numMatches != JSRegExpErrorNoMatch)
245 fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches);
246 #endif
247 if (ovector)
248 ovector->clear();
249 return -1;
250 }
251
252 return offsetVector[0];
253 }
254
255 return -1;
256 }
257
258 #endif
259
260 } // namespace JSC