]>
Commit | Line | Data |
---|---|---|
9dae56ea A |
1 | /* |
2 | * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org) | |
3 | * Copyright (c) 2007, 2008 Apple Inc. All rights reserved. | |
ba379fdc | 4 | * Copyright (C) 2009 Torch Mobile, Inc. |
9dae56ea A |
5 | * |
6 | * This library is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2 of the License, or (at your option) any later version. | |
10 | * | |
11 | * This library is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with this library; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | * | |
20 | */ | |
21 | ||
22 | #include "config.h" | |
23 | #include "RegExp.h" | |
9dae56ea | 24 | #include "Lexer.h" |
9dae56ea A |
25 | #include <stdio.h> |
26 | #include <stdlib.h> | |
27 | #include <string.h> | |
28 | #include <wtf/Assertions.h> | |
29 | #include <wtf/OwnArrayPtr.h> | |
30 | ||
ba379fdc A |
31 | |
32 | #if ENABLE(YARR) | |
33 | ||
34 | #include "yarr/RegexCompiler.h" | |
35 | #if ENABLE(YARR_JIT) | |
36 | #include "yarr/RegexJIT.h" | |
37 | #else | |
38 | #include "yarr/RegexInterpreter.h" | |
39 | #endif | |
40 | ||
41 | #else | |
42 | ||
ba379fdc A |
43 | #include <pcre/pcre.h> |
44 | ||
45 | #endif | |
46 | ||
9dae56ea A |
47 | namespace JSC { |
48 | ||
9dae56ea A |
49 | inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern) |
50 | : m_pattern(pattern) | |
51 | , m_flagBits(0) | |
9dae56ea A |
52 | , m_constructionError(0) |
53 | , m_numSubpatterns(0) | |
4e4e5a6f | 54 | , m_lastMatchStart(-1) |
9dae56ea | 55 | { |
ba379fdc | 56 | compile(globalData); |
9dae56ea A |
57 | } |
58 | ||
59 | inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern, const UString& flags) | |
60 | : m_pattern(pattern) | |
9dae56ea | 61 | , m_flagBits(0) |
9dae56ea A |
62 | , m_constructionError(0) |
63 | , m_numSubpatterns(0) | |
4e4e5a6f | 64 | , m_lastMatchStart(-1) |
9dae56ea A |
65 | { |
66 | // NOTE: The global flag is handled on a case-by-case basis by functions like | |
67 | // String::match and RegExpObject::match. | |
4e4e5a6f | 68 | if (flags.find('g') != UString::NotFound) |
9dae56ea | 69 | m_flagBits |= Global; |
4e4e5a6f | 70 | if (flags.find('i') != UString::NotFound) |
9dae56ea | 71 | m_flagBits |= IgnoreCase; |
4e4e5a6f | 72 | if (flags.find('m') != UString::NotFound) |
9dae56ea | 73 | m_flagBits |= Multiline; |
9dae56ea | 74 | |
ba379fdc A |
75 | compile(globalData); |
76 | } | |
77 | ||
78 | #if !ENABLE(YARR) | |
79 | RegExp::~RegExp() | |
80 | { | |
81 | jsRegExpFree(m_regExp); | |
82 | } | |
9dae56ea | 83 | #endif |
ba379fdc A |
84 | |
85 | PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern) | |
86 | { | |
87 | return adoptRef(new RegExp(globalData, pattern)); | |
9dae56ea A |
88 | } |
89 | ||
90 | PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern, const UString& flags) | |
91 | { | |
92 | return adoptRef(new RegExp(globalData, pattern, flags)); | |
93 | } | |
94 | ||
ba379fdc A |
95 | #if ENABLE(YARR) |
96 | ||
97 | void RegExp::compile(JSGlobalData* globalData) | |
9dae56ea | 98 | { |
ba379fdc A |
99 | #if ENABLE(YARR_JIT) |
100 | Yarr::jitCompileRegex(globalData, m_regExpJITCode, m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline()); | |
101 | #else | |
102 | UNUSED_PARAM(globalData); | |
103 | m_regExpBytecode.set(Yarr::byteCompileRegex(m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline())); | |
104 | #endif | |
9dae56ea A |
105 | } |
106 | ||
ba379fdc A |
107 | int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector) |
108 | { | |
109 | if (startOffset < 0) | |
110 | startOffset = 0; | |
111 | if (ovector) | |
112 | ovector->clear(); | |
113 | ||
4e4e5a6f A |
114 | if (static_cast<unsigned>(startOffset) > s.size() || s.isNull()) { |
115 | m_lastMatchString = UString(); | |
116 | m_lastMatchStart = -1; | |
117 | m_lastOVector.shrink(0); | |
ba379fdc | 118 | return -1; |
4e4e5a6f A |
119 | } |
120 | ||
121 | // Perform check to see if this match call is the same as the last match invocation | |
122 | // and if it is return the prior result. | |
123 | if ((startOffset == m_lastMatchStart) && (s.rep() == m_lastMatchString.rep())) { | |
124 | if (ovector) | |
125 | *ovector = m_lastOVector; | |
126 | ||
127 | if (m_lastOVector.isEmpty()) | |
128 | return -1; | |
129 | ||
130 | return m_lastOVector.at(0); | |
131 | } | |
ba379fdc A |
132 | |
133 | #if ENABLE(YARR_JIT) | |
134 | if (!!m_regExpJITCode) { | |
135 | #else | |
136 | if (m_regExpBytecode) { | |
137 | #endif | |
138 | int offsetVectorSize = (m_numSubpatterns + 1) * 3; // FIXME: should be 2 - but adding temporary fallback to pcre. | |
139 | int* offsetVector; | |
140 | Vector<int, 32> nonReturnedOvector; | |
141 | if (ovector) { | |
142 | ovector->resize(offsetVectorSize); | |
143 | offsetVector = ovector->data(); | |
144 | } else { | |
145 | nonReturnedOvector.resize(offsetVectorSize); | |
146 | offsetVector = nonReturnedOvector.data(); | |
147 | } | |
148 | ||
149 | ASSERT(offsetVector); | |
150 | for (int j = 0; j < offsetVectorSize; ++j) | |
151 | offsetVector[j] = -1; | |
152 | ||
153 | ||
154 | #if ENABLE(YARR_JIT) | |
155 | int result = Yarr::executeRegex(m_regExpJITCode, s.data(), startOffset, s.size(), offsetVector, offsetVectorSize); | |
156 | #else | |
157 | int result = Yarr::interpretRegex(m_regExpBytecode.get(), s.data(), startOffset, s.size(), offsetVector); | |
158 | #endif | |
159 | ||
160 | if (result < 0) { | |
161 | #ifndef NDEBUG | |
162 | // TODO: define up a symbol, rather than magic -1 | |
163 | if (result != -1) | |
164 | fprintf(stderr, "jsRegExpExecute failed with result %d\n", result); | |
165 | #endif | |
166 | if (ovector) | |
167 | ovector->clear(); | |
168 | } | |
4e4e5a6f A |
169 | |
170 | m_lastMatchString = s; | |
171 | m_lastMatchStart = startOffset; | |
172 | ||
173 | if (ovector) | |
174 | m_lastOVector = *ovector; | |
175 | else | |
176 | m_lastOVector = nonReturnedOvector; | |
177 | ||
ba379fdc A |
178 | return result; |
179 | } | |
180 | ||
4e4e5a6f A |
181 | m_lastMatchString = UString(); |
182 | m_lastMatchStart = -1; | |
183 | m_lastOVector.shrink(0); | |
184 | ||
ba379fdc A |
185 | return -1; |
186 | } | |
187 | ||
188 | #else | |
189 | ||
4e4e5a6f | 190 | void RegExp::compile(JSGlobalData*) |
ba379fdc A |
191 | { |
192 | m_regExp = 0; | |
ba379fdc A |
193 | JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase; |
194 | JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine; | |
195 | m_regExp = jsRegExpCompile(reinterpret_cast<const UChar*>(m_pattern.data()), m_pattern.size(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError); | |
196 | } | |
197 | ||
198 | int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector) | |
9dae56ea A |
199 | { |
200 | if (startOffset < 0) | |
201 | startOffset = 0; | |
202 | if (ovector) | |
203 | ovector->clear(); | |
204 | ||
4e4e5a6f | 205 | if (static_cast<unsigned>(startOffset) > s.size() || s.isNull()) |
9dae56ea A |
206 | return -1; |
207 | ||
9dae56ea A |
208 | if (m_regExp) { |
209 | // Set up the offset vector for the result. | |
210 | // First 2/3 used for result, the last third used by PCRE. | |
211 | int* offsetVector; | |
212 | int offsetVectorSize; | |
213 | int fixedSizeOffsetVector[3]; | |
214 | if (!ovector) { | |
215 | offsetVectorSize = 3; | |
216 | offsetVector = fixedSizeOffsetVector; | |
217 | } else { | |
218 | offsetVectorSize = (m_numSubpatterns + 1) * 3; | |
ba379fdc A |
219 | ovector->resize(offsetVectorSize); |
220 | offsetVector = ovector->data(); | |
9dae56ea A |
221 | } |
222 | ||
223 | int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const UChar*>(s.data()), s.size(), startOffset, offsetVector, offsetVectorSize); | |
224 | ||
225 | if (numMatches < 0) { | |
226 | #ifndef NDEBUG | |
227 | if (numMatches != JSRegExpErrorNoMatch) | |
228 | fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches); | |
229 | #endif | |
230 | if (ovector) | |
231 | ovector->clear(); | |
232 | return -1; | |
233 | } | |
234 | ||
235 | return offsetVector[0]; | |
236 | } | |
237 | ||
238 | return -1; | |
239 | } | |
240 | ||
ba379fdc A |
241 | #endif |
242 | ||
9dae56ea | 243 | } // namespace JSC |