]> git.saurik.com Git - apple/javascriptcore.git/blob - runtime/RegExp.cpp
JavaScriptCore-721.26.tar.gz
[apple/javascriptcore.git] / runtime / RegExp.cpp
1 /*
2 * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
3 * Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile, Inc.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 */
21
22 #include "config.h"
23 #include "RegExp.h"
24 #include "Lexer.h"
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <wtf/Assertions.h>
29 #include <wtf/OwnArrayPtr.h>
30
31
32 #if ENABLE(YARR)
33
34 #include "yarr/RegexCompiler.h"
35 #if ENABLE(YARR_JIT)
36 #include "yarr/RegexJIT.h"
37 #else
38 #include "yarr/RegexInterpreter.h"
39 #endif
40
41 #else
42
43 #include <pcre/pcre.h>
44
45 #endif
46
47 namespace JSC {
48
49 inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern)
50 : m_pattern(pattern)
51 , m_flagBits(0)
52 , m_constructionError(0)
53 , m_numSubpatterns(0)
54 , m_lastMatchStart(-1)
55 {
56 compile(globalData);
57 }
58
59 inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern, const UString& flags)
60 : m_pattern(pattern)
61 , m_flagBits(0)
62 , m_constructionError(0)
63 , m_numSubpatterns(0)
64 , m_lastMatchStart(-1)
65 {
66 // NOTE: The global flag is handled on a case-by-case basis by functions like
67 // String::match and RegExpObject::match.
68 if (flags.find('g') != UString::NotFound)
69 m_flagBits |= Global;
70 if (flags.find('i') != UString::NotFound)
71 m_flagBits |= IgnoreCase;
72 if (flags.find('m') != UString::NotFound)
73 m_flagBits |= Multiline;
74
75 compile(globalData);
76 }
77
78 #if !ENABLE(YARR)
79 RegExp::~RegExp()
80 {
81 jsRegExpFree(m_regExp);
82 }
83 #endif
84
85 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern)
86 {
87 return adoptRef(new RegExp(globalData, pattern));
88 }
89
90 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern, const UString& flags)
91 {
92 return adoptRef(new RegExp(globalData, pattern, flags));
93 }
94
95 #if ENABLE(YARR)
96
97 void RegExp::compile(JSGlobalData* globalData)
98 {
99 #if ENABLE(YARR_JIT)
100 Yarr::jitCompileRegex(globalData, m_regExpJITCode, m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline());
101 #else
102 UNUSED_PARAM(globalData);
103 m_regExpBytecode.set(Yarr::byteCompileRegex(m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline()));
104 #endif
105 }
106
107 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
108 {
109 if (startOffset < 0)
110 startOffset = 0;
111 if (ovector)
112 ovector->clear();
113
114 if (static_cast<unsigned>(startOffset) > s.size() || s.isNull()) {
115 m_lastMatchString = UString();
116 m_lastMatchStart = -1;
117 m_lastOVector.shrink(0);
118 return -1;
119 }
120
121 // Perform check to see if this match call is the same as the last match invocation
122 // and if it is return the prior result.
123 if ((startOffset == m_lastMatchStart) && (s.rep() == m_lastMatchString.rep())) {
124 if (ovector)
125 *ovector = m_lastOVector;
126
127 if (m_lastOVector.isEmpty())
128 return -1;
129
130 return m_lastOVector.at(0);
131 }
132
133 #if ENABLE(YARR_JIT)
134 if (!!m_regExpJITCode) {
135 #else
136 if (m_regExpBytecode) {
137 #endif
138 int offsetVectorSize = (m_numSubpatterns + 1) * 3; // FIXME: should be 2 - but adding temporary fallback to pcre.
139 int* offsetVector;
140 Vector<int, 32> nonReturnedOvector;
141 if (ovector) {
142 ovector->resize(offsetVectorSize);
143 offsetVector = ovector->data();
144 } else {
145 nonReturnedOvector.resize(offsetVectorSize);
146 offsetVector = nonReturnedOvector.data();
147 }
148
149 ASSERT(offsetVector);
150 for (int j = 0; j < offsetVectorSize; ++j)
151 offsetVector[j] = -1;
152
153
154 #if ENABLE(YARR_JIT)
155 int result = Yarr::executeRegex(m_regExpJITCode, s.data(), startOffset, s.size(), offsetVector, offsetVectorSize);
156 #else
157 int result = Yarr::interpretRegex(m_regExpBytecode.get(), s.data(), startOffset, s.size(), offsetVector);
158 #endif
159
160 if (result < 0) {
161 #ifndef NDEBUG
162 // TODO: define up a symbol, rather than magic -1
163 if (result != -1)
164 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
165 #endif
166 if (ovector)
167 ovector->clear();
168 }
169
170 m_lastMatchString = s;
171 m_lastMatchStart = startOffset;
172
173 if (ovector)
174 m_lastOVector = *ovector;
175 else
176 m_lastOVector = nonReturnedOvector;
177
178 return result;
179 }
180
181 m_lastMatchString = UString();
182 m_lastMatchStart = -1;
183 m_lastOVector.shrink(0);
184
185 return -1;
186 }
187
188 #else
189
190 void RegExp::compile(JSGlobalData*)
191 {
192 m_regExp = 0;
193 JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
194 JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine;
195 m_regExp = jsRegExpCompile(reinterpret_cast<const UChar*>(m_pattern.data()), m_pattern.size(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError);
196 }
197
198 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
199 {
200 if (startOffset < 0)
201 startOffset = 0;
202 if (ovector)
203 ovector->clear();
204
205 if (static_cast<unsigned>(startOffset) > s.size() || s.isNull())
206 return -1;
207
208 if (m_regExp) {
209 // Set up the offset vector for the result.
210 // First 2/3 used for result, the last third used by PCRE.
211 int* offsetVector;
212 int offsetVectorSize;
213 int fixedSizeOffsetVector[3];
214 if (!ovector) {
215 offsetVectorSize = 3;
216 offsetVector = fixedSizeOffsetVector;
217 } else {
218 offsetVectorSize = (m_numSubpatterns + 1) * 3;
219 ovector->resize(offsetVectorSize);
220 offsetVector = ovector->data();
221 }
222
223 int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const UChar*>(s.data()), s.size(), startOffset, offsetVector, offsetVectorSize);
224
225 if (numMatches < 0) {
226 #ifndef NDEBUG
227 if (numMatches != JSRegExpErrorNoMatch)
228 fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches);
229 #endif
230 if (ovector)
231 ovector->clear();
232 return -1;
233 }
234
235 return offsetVector[0];
236 }
237
238 return -1;
239 }
240
241 #endif
242
243 } // namespace JSC