]> git.saurik.com Git - apple/javascriptcore.git/blame - runtime/RegExp.cpp
JavaScriptCore-721.26.tar.gz
[apple/javascriptcore.git] / runtime / RegExp.cpp
CommitLineData
9dae56ea
A
1/*
2 * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
3 * Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
ba379fdc 4 * Copyright (C) 2009 Torch Mobile, Inc.
9dae56ea
A
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 */
21
22#include "config.h"
23#include "RegExp.h"
9dae56ea 24#include "Lexer.h"
9dae56ea
A
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <wtf/Assertions.h>
29#include <wtf/OwnArrayPtr.h>
30
ba379fdc
A
31
32#if ENABLE(YARR)
33
34#include "yarr/RegexCompiler.h"
35#if ENABLE(YARR_JIT)
36#include "yarr/RegexJIT.h"
37#else
38#include "yarr/RegexInterpreter.h"
39#endif
40
41#else
42
ba379fdc
A
43#include <pcre/pcre.h>
44
45#endif
46
9dae56ea
A
47namespace JSC {
48
9dae56ea
A
49inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern)
50 : m_pattern(pattern)
51 , m_flagBits(0)
9dae56ea
A
52 , m_constructionError(0)
53 , m_numSubpatterns(0)
4e4e5a6f 54 , m_lastMatchStart(-1)
9dae56ea 55{
ba379fdc 56 compile(globalData);
9dae56ea
A
57}
58
59inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern, const UString& flags)
60 : m_pattern(pattern)
9dae56ea 61 , m_flagBits(0)
9dae56ea
A
62 , m_constructionError(0)
63 , m_numSubpatterns(0)
4e4e5a6f 64 , m_lastMatchStart(-1)
9dae56ea
A
65{
66 // NOTE: The global flag is handled on a case-by-case basis by functions like
67 // String::match and RegExpObject::match.
4e4e5a6f 68 if (flags.find('g') != UString::NotFound)
9dae56ea 69 m_flagBits |= Global;
4e4e5a6f 70 if (flags.find('i') != UString::NotFound)
9dae56ea 71 m_flagBits |= IgnoreCase;
4e4e5a6f 72 if (flags.find('m') != UString::NotFound)
9dae56ea 73 m_flagBits |= Multiline;
9dae56ea 74
ba379fdc
A
75 compile(globalData);
76}
77
78#if !ENABLE(YARR)
79RegExp::~RegExp()
80{
81 jsRegExpFree(m_regExp);
82}
9dae56ea 83#endif
ba379fdc
A
84
85PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern)
86{
87 return adoptRef(new RegExp(globalData, pattern));
9dae56ea
A
88}
89
90PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern, const UString& flags)
91{
92 return adoptRef(new RegExp(globalData, pattern, flags));
93}
94
ba379fdc
A
95#if ENABLE(YARR)
96
97void RegExp::compile(JSGlobalData* globalData)
9dae56ea 98{
ba379fdc
A
99#if ENABLE(YARR_JIT)
100 Yarr::jitCompileRegex(globalData, m_regExpJITCode, m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline());
101#else
102 UNUSED_PARAM(globalData);
103 m_regExpBytecode.set(Yarr::byteCompileRegex(m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline()));
104#endif
9dae56ea
A
105}
106
ba379fdc
A
107int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
108{
109 if (startOffset < 0)
110 startOffset = 0;
111 if (ovector)
112 ovector->clear();
113
4e4e5a6f
A
114 if (static_cast<unsigned>(startOffset) > s.size() || s.isNull()) {
115 m_lastMatchString = UString();
116 m_lastMatchStart = -1;
117 m_lastOVector.shrink(0);
ba379fdc 118 return -1;
4e4e5a6f
A
119 }
120
121 // Perform check to see if this match call is the same as the last match invocation
122 // and if it is return the prior result.
123 if ((startOffset == m_lastMatchStart) && (s.rep() == m_lastMatchString.rep())) {
124 if (ovector)
125 *ovector = m_lastOVector;
126
127 if (m_lastOVector.isEmpty())
128 return -1;
129
130 return m_lastOVector.at(0);
131 }
ba379fdc
A
132
133#if ENABLE(YARR_JIT)
134 if (!!m_regExpJITCode) {
135#else
136 if (m_regExpBytecode) {
137#endif
138 int offsetVectorSize = (m_numSubpatterns + 1) * 3; // FIXME: should be 2 - but adding temporary fallback to pcre.
139 int* offsetVector;
140 Vector<int, 32> nonReturnedOvector;
141 if (ovector) {
142 ovector->resize(offsetVectorSize);
143 offsetVector = ovector->data();
144 } else {
145 nonReturnedOvector.resize(offsetVectorSize);
146 offsetVector = nonReturnedOvector.data();
147 }
148
149 ASSERT(offsetVector);
150 for (int j = 0; j < offsetVectorSize; ++j)
151 offsetVector[j] = -1;
152
153
154#if ENABLE(YARR_JIT)
155 int result = Yarr::executeRegex(m_regExpJITCode, s.data(), startOffset, s.size(), offsetVector, offsetVectorSize);
156#else
157 int result = Yarr::interpretRegex(m_regExpBytecode.get(), s.data(), startOffset, s.size(), offsetVector);
158#endif
159
160 if (result < 0) {
161#ifndef NDEBUG
162 // TODO: define up a symbol, rather than magic -1
163 if (result != -1)
164 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
165#endif
166 if (ovector)
167 ovector->clear();
168 }
4e4e5a6f
A
169
170 m_lastMatchString = s;
171 m_lastMatchStart = startOffset;
172
173 if (ovector)
174 m_lastOVector = *ovector;
175 else
176 m_lastOVector = nonReturnedOvector;
177
ba379fdc
A
178 return result;
179 }
180
4e4e5a6f
A
181 m_lastMatchString = UString();
182 m_lastMatchStart = -1;
183 m_lastOVector.shrink(0);
184
ba379fdc
A
185 return -1;
186}
187
188#else
189
4e4e5a6f 190void RegExp::compile(JSGlobalData*)
ba379fdc
A
191{
192 m_regExp = 0;
ba379fdc
A
193 JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
194 JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine;
195 m_regExp = jsRegExpCompile(reinterpret_cast<const UChar*>(m_pattern.data()), m_pattern.size(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError);
196}
197
198int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
9dae56ea
A
199{
200 if (startOffset < 0)
201 startOffset = 0;
202 if (ovector)
203 ovector->clear();
204
4e4e5a6f 205 if (static_cast<unsigned>(startOffset) > s.size() || s.isNull())
9dae56ea
A
206 return -1;
207
9dae56ea
A
208 if (m_regExp) {
209 // Set up the offset vector for the result.
210 // First 2/3 used for result, the last third used by PCRE.
211 int* offsetVector;
212 int offsetVectorSize;
213 int fixedSizeOffsetVector[3];
214 if (!ovector) {
215 offsetVectorSize = 3;
216 offsetVector = fixedSizeOffsetVector;
217 } else {
218 offsetVectorSize = (m_numSubpatterns + 1) * 3;
ba379fdc
A
219 ovector->resize(offsetVectorSize);
220 offsetVector = ovector->data();
9dae56ea
A
221 }
222
223 int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const UChar*>(s.data()), s.size(), startOffset, offsetVector, offsetVectorSize);
224
225 if (numMatches < 0) {
226#ifndef NDEBUG
227 if (numMatches != JSRegExpErrorNoMatch)
228 fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches);
229#endif
230 if (ovector)
231 ovector->clear();
232 return -1;
233 }
234
235 return offsetVector[0];
236 }
237
238 return -1;
239}
240
ba379fdc
A
241#endif
242
9dae56ea 243} // namespace JSC