]> git.saurik.com Git - apple/javascriptcore.git/blob - kjs/regexp_object.cpp
JavaScriptCore-466.1.tar.gz
[apple/javascriptcore.git] / kjs / regexp_object.cpp
1 /*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2003, 2007, 2008 Apple Inc. All Rights Reserved.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 */
20
21 #include "config.h"
22 #include "regexp_object.h"
23 #include "regexp_object.lut.h"
24
25 #include "array_instance.h"
26 #include "array_object.h"
27 #include "error_object.h"
28 #include "internal.h"
29 #include "object.h"
30 #include "operations.h"
31 #include "regexp.h"
32 #include "types.h"
33 #include "value.h"
34 #include "UnusedParam.h"
35
36 #include <stdio.h>
37
38 namespace KJS {
39
40 // ------------------------------ RegExpPrototype ---------------------------
41
42 static JSValue* regExpProtoFuncTest(ExecState*, JSObject*, const List&);
43 static JSValue* regExpProtoFuncExec(ExecState*, JSObject*, const List&);
44 static JSValue* regExpProtoFuncCompile(ExecState*, JSObject*, const List&);
45 static JSValue* regExpProtoFuncToString(ExecState*, JSObject*, const List&);
46
47 // ECMA 15.10.5
48
49 const ClassInfo RegExpPrototype::info = { "RegExpPrototype", 0, 0 };
50
51 RegExpPrototype::RegExpPrototype(ExecState* exec, ObjectPrototype* objectPrototype, FunctionPrototype* functionPrototype)
52 : JSObject(objectPrototype)
53 {
54 static const Identifier* compilePropertyName = new Identifier("compile");
55 static const Identifier* execPropertyName = new Identifier("exec");
56 static const Identifier* testPropertyName = new Identifier("test");
57
58 putDirectFunction(new PrototypeFunction(exec, functionPrototype, 0, *compilePropertyName, regExpProtoFuncCompile), DontEnum);
59 putDirectFunction(new PrototypeFunction(exec, functionPrototype, 0, *execPropertyName, regExpProtoFuncExec), DontEnum);
60 putDirectFunction(new PrototypeFunction(exec, functionPrototype, 0, *testPropertyName, regExpProtoFuncTest), DontEnum);
61 putDirectFunction(new PrototypeFunction(exec, functionPrototype, 0, exec->propertyNames().toString, regExpProtoFuncToString), DontEnum);
62 }
63
64 // ------------------------------ Functions ---------------------------
65
66 JSValue* regExpProtoFuncTest(ExecState* exec, JSObject* thisObj, const List& args)
67 {
68 if (!thisObj->inherits(&RegExpImp::info))
69 return throwError(exec, TypeError);
70
71 return static_cast<RegExpImp*>(thisObj)->test(exec, args);
72 }
73
74 JSValue* regExpProtoFuncExec(ExecState* exec, JSObject* thisObj, const List& args)
75 {
76 if (!thisObj->inherits(&RegExpImp::info))
77 return throwError(exec, TypeError);
78
79 return static_cast<RegExpImp*>(thisObj)->exec(exec, args);
80 }
81
82 JSValue* regExpProtoFuncCompile(ExecState* exec, JSObject* thisObj, const List& args)
83 {
84 if (!thisObj->inherits(&RegExpImp::info))
85 return throwError(exec, TypeError);
86
87 RefPtr<RegExp> regExp;
88 JSValue* arg0 = args[0];
89 JSValue* arg1 = args[1];
90
91 if (arg0->isObject(&RegExpImp::info)) {
92 if (!arg1->isUndefined())
93 return throwError(exec, TypeError, "Cannot supply flags when constructing one RegExp from another.");
94 regExp = static_cast<RegExpImp*>(arg0)->regExp();
95 } else {
96 UString pattern = args.isEmpty() ? UString("") : arg0->toString(exec);
97 UString flags = arg1->isUndefined() ? UString("") : arg1->toString(exec);
98 regExp = new RegExp(pattern, flags);
99 }
100
101 if (!regExp->isValid())
102 return throwError(exec, SyntaxError, UString("Invalid regular expression: ").append(regExp->errorMessage()));
103
104 static_cast<RegExpImp*>(thisObj)->setRegExp(regExp.release());
105 static_cast<RegExpImp*>(thisObj)->setLastIndex(0);
106 return jsUndefined();
107 }
108
109 JSValue* regExpProtoFuncToString(ExecState* exec, JSObject* thisObj, const List&)
110 {
111 if (!thisObj->inherits(&RegExpImp::info)) {
112 if (thisObj->inherits(&RegExpPrototype::info))
113 return jsString("//");
114 return throwError(exec, TypeError);
115 }
116
117 UString result = "/" + thisObj->get(exec, exec->propertyNames().source)->toString(exec) + "/";
118 if (thisObj->get(exec, exec->propertyNames().global)->toBoolean(exec))
119 result += "g";
120 if (thisObj->get(exec, exec->propertyNames().ignoreCase)->toBoolean(exec))
121 result += "i";
122 if (thisObj->get(exec, exec->propertyNames().multiline)->toBoolean(exec))
123 result += "m";
124 return jsString(result);
125 }
126
127 // ------------------------------ RegExpImp ------------------------------------
128
129 const ClassInfo RegExpImp::info = { "RegExp", 0, &RegExpImpTable };
130
131 /* Source for regexp_object.lut.h
132 @begin RegExpImpTable 5
133 global RegExpImp::Global DontDelete|ReadOnly|DontEnum
134 ignoreCase RegExpImp::IgnoreCase DontDelete|ReadOnly|DontEnum
135 multiline RegExpImp::Multiline DontDelete|ReadOnly|DontEnum
136 source RegExpImp::Source DontDelete|ReadOnly|DontEnum
137 lastIndex RegExpImp::LastIndex DontDelete|DontEnum
138 @end
139 */
140
141 RegExpImp::RegExpImp(RegExpPrototype* regexpProto, PassRefPtr<RegExp> regExp)
142 : JSObject(regexpProto)
143 , m_regExp(regExp)
144 , m_lastIndex(0)
145 {
146 }
147
148 RegExpImp::~RegExpImp()
149 {
150 }
151
152 bool RegExpImp::getOwnPropertySlot(ExecState* exec, const Identifier& propertyName, PropertySlot& slot)
153 {
154 return getStaticValueSlot<RegExpImp, JSObject>(exec, &RegExpImpTable, this, propertyName, slot);
155 }
156
157 JSValue* RegExpImp::getValueProperty(ExecState*, int token) const
158 {
159 switch (token) {
160 case Global:
161 return jsBoolean(m_regExp->global());
162 case IgnoreCase:
163 return jsBoolean(m_regExp->ignoreCase());
164 case Multiline:
165 return jsBoolean(m_regExp->multiline());
166 case Source:
167 return jsString(m_regExp->pattern());
168 case LastIndex:
169 return jsNumber(m_lastIndex);
170 }
171
172 ASSERT_NOT_REACHED();
173 return 0;
174 }
175
176 void RegExpImp::put(ExecState* exec, const Identifier& propertyName, JSValue* value, int attributes)
177 {
178 lookupPut<RegExpImp, JSObject>(exec, propertyName, value, attributes, &RegExpImpTable, this);
179 }
180
181 void RegExpImp::putValueProperty(ExecState* exec, int token, JSValue* value, int)
182 {
183 UNUSED_PARAM(token);
184 ASSERT(token == LastIndex);
185 m_lastIndex = value->toInteger(exec);
186 }
187
188 bool RegExpImp::match(ExecState* exec, const List& args)
189 {
190 RegExpObjectImp* regExpObj = exec->lexicalGlobalObject()->regExpConstructor();
191
192 UString input;
193 if (!args.isEmpty())
194 input = args[0]->toString(exec);
195 else {
196 input = regExpObj->input();
197 if (input.isNull()) {
198 throwError(exec, GeneralError, "No input.");
199 return false;
200 }
201 }
202
203 bool global = get(exec, exec->propertyNames().global)->toBoolean(exec);
204 int lastIndex = 0;
205 if (global) {
206 if (m_lastIndex < 0 || m_lastIndex > input.size()) {
207 m_lastIndex = 0;
208 return false;
209 }
210 lastIndex = static_cast<int>(m_lastIndex);
211 }
212
213 int foundIndex;
214 int foundLength;
215 regExpObj->performMatch(m_regExp.get(), input, lastIndex, foundIndex, foundLength);
216
217 if (global) {
218 lastIndex = foundIndex < 0 ? 0 : foundIndex + foundLength;
219 m_lastIndex = lastIndex;
220 }
221
222 return foundIndex >= 0;
223 }
224
225 JSValue* RegExpImp::test(ExecState* exec, const List& args)
226 {
227 return jsBoolean(match(exec, args));
228 }
229
230 JSValue* RegExpImp::exec(ExecState* exec, const List& args)
231 {
232 return match(exec, args)
233 ? exec->lexicalGlobalObject()->regExpConstructor()->arrayOfMatches(exec)
234 : jsNull();
235 }
236
237 bool RegExpImp::implementsCall() const
238 {
239 return true;
240 }
241
242 JSValue* RegExpImp::callAsFunction(ExecState* exec, JSObject*, const List& args)
243 {
244 return RegExpImp::exec(exec, args);
245 }
246
247 // ------------------------------ RegExpObjectImp ------------------------------
248
249 const ClassInfo RegExpObjectImp::info = { "Function", &InternalFunctionImp::info, &RegExpObjectImpTable };
250
251 /* Source for regexp_object.lut.h
252 @begin RegExpObjectImpTable 21
253 input RegExpObjectImp::Input None
254 $_ RegExpObjectImp::Input DontEnum
255 multiline RegExpObjectImp::Multiline None
256 $* RegExpObjectImp::Multiline DontEnum
257 lastMatch RegExpObjectImp::LastMatch DontDelete|ReadOnly
258 $& RegExpObjectImp::LastMatch DontDelete|ReadOnly|DontEnum
259 lastParen RegExpObjectImp::LastParen DontDelete|ReadOnly
260 $+ RegExpObjectImp::LastParen DontDelete|ReadOnly|DontEnum
261 leftContext RegExpObjectImp::LeftContext DontDelete|ReadOnly
262 $` RegExpObjectImp::LeftContext DontDelete|ReadOnly|DontEnum
263 rightContext RegExpObjectImp::RightContext DontDelete|ReadOnly
264 $' RegExpObjectImp::RightContext DontDelete|ReadOnly|DontEnum
265 $1 RegExpObjectImp::Dollar1 DontDelete|ReadOnly
266 $2 RegExpObjectImp::Dollar2 DontDelete|ReadOnly
267 $3 RegExpObjectImp::Dollar3 DontDelete|ReadOnly
268 $4 RegExpObjectImp::Dollar4 DontDelete|ReadOnly
269 $5 RegExpObjectImp::Dollar5 DontDelete|ReadOnly
270 $6 RegExpObjectImp::Dollar6 DontDelete|ReadOnly
271 $7 RegExpObjectImp::Dollar7 DontDelete|ReadOnly
272 $8 RegExpObjectImp::Dollar8 DontDelete|ReadOnly
273 $9 RegExpObjectImp::Dollar9 DontDelete|ReadOnly
274 @end
275 */
276
277 struct RegExpObjectImpPrivate {
278 // Global search cache / settings
279 RegExpObjectImpPrivate() : lastNumSubPatterns(0), multiline(false) { }
280 UString lastInput;
281 OwnArrayPtr<int> lastOvector;
282 unsigned lastNumSubPatterns : 31;
283 bool multiline : 1;
284 };
285
286 RegExpObjectImp::RegExpObjectImp(ExecState* exec, FunctionPrototype* funcProto, RegExpPrototype* regProto)
287 : InternalFunctionImp(funcProto, "RegExp")
288 , d(new RegExpObjectImpPrivate)
289 {
290 // ECMA 15.10.5.1 RegExp.prototype
291 putDirect(exec->propertyNames().prototype, regProto, DontEnum | DontDelete | ReadOnly);
292
293 // no. of arguments for constructor
294 putDirect(exec->propertyNames().length, jsNumber(2), ReadOnly | DontDelete | DontEnum);
295 }
296
297 /*
298 To facilitate result caching, exec(), test(), match(), search(), and replace() dipatch regular
299 expression matching through the performMatch function. We use cached results to calculate,
300 e.g., RegExp.lastMatch and RegExp.leftParen.
301 */
302 void RegExpObjectImp::performMatch(RegExp* r, const UString& s, int startOffset, int& position, int& length, int** ovector)
303 {
304 OwnArrayPtr<int> tmpOvector;
305 position = r->match(s, startOffset, &tmpOvector);
306
307 if (ovector)
308 *ovector = tmpOvector.get();
309
310 if (position != -1) {
311 ASSERT(tmpOvector);
312
313 length = tmpOvector[1] - tmpOvector[0];
314
315 d->lastInput = s;
316 d->lastOvector.set(tmpOvector.release());
317 d->lastNumSubPatterns = r->numSubpatterns();
318 }
319 }
320
321 JSObject* RegExpObjectImp::arrayOfMatches(ExecState* exec) const
322 {
323 unsigned lastNumSubpatterns = d->lastNumSubPatterns;
324 ArrayInstance* arr = new ArrayInstance(exec->lexicalGlobalObject()->arrayPrototype(), lastNumSubpatterns + 1);
325 for (unsigned i = 0; i <= lastNumSubpatterns; ++i) {
326 int start = d->lastOvector[2 * i];
327 if (start >= 0)
328 arr->put(exec, i, jsString(d->lastInput.substr(start, d->lastOvector[2 * i + 1] - start)));
329 }
330 arr->put(exec, exec->propertyNames().index, jsNumber(d->lastOvector[0]));
331 arr->put(exec, exec->propertyNames().input, jsString(d->lastInput));
332 return arr;
333 }
334
335 JSValue* RegExpObjectImp::getBackref(unsigned i) const
336 {
337 if (d->lastOvector && i <= d->lastNumSubPatterns)
338 return jsString(d->lastInput.substr(d->lastOvector[2 * i], d->lastOvector[2 * i + 1] - d->lastOvector[2 * i]));
339 return jsString("");
340 }
341
342 JSValue* RegExpObjectImp::getLastParen() const
343 {
344 unsigned i = d->lastNumSubPatterns;
345 if (i > 0) {
346 ASSERT(d->lastOvector);
347 return jsString(d->lastInput.substr(d->lastOvector[2 * i], d->lastOvector[2 * i + 1] - d->lastOvector[2 * i]));
348 }
349 return jsString("");
350 }
351
352 JSValue *RegExpObjectImp::getLeftContext() const
353 {
354 if (d->lastOvector)
355 return jsString(d->lastInput.substr(0, d->lastOvector[0]));
356 return jsString("");
357 }
358
359 JSValue *RegExpObjectImp::getRightContext() const
360 {
361 if (d->lastOvector) {
362 UString s = d->lastInput;
363 return jsString(s.substr(d->lastOvector[1], s.size() - d->lastOvector[1]));
364 }
365 return jsString("");
366 }
367
368 bool RegExpObjectImp::getOwnPropertySlot(ExecState *exec, const Identifier& propertyName, PropertySlot& slot)
369 {
370 return getStaticValueSlot<RegExpObjectImp, InternalFunctionImp>(exec, &RegExpObjectImpTable, this, propertyName, slot);
371 }
372
373 JSValue *RegExpObjectImp::getValueProperty(ExecState*, int token) const
374 {
375 switch (token) {
376 case Dollar1:
377 return getBackref(1);
378 case Dollar2:
379 return getBackref(2);
380 case Dollar3:
381 return getBackref(3);
382 case Dollar4:
383 return getBackref(4);
384 case Dollar5:
385 return getBackref(5);
386 case Dollar6:
387 return getBackref(6);
388 case Dollar7:
389 return getBackref(7);
390 case Dollar8:
391 return getBackref(8);
392 case Dollar9:
393 return getBackref(9);
394 case Input:
395 return jsString(d->lastInput);
396 case Multiline:
397 return jsBoolean(d->multiline);
398 case LastMatch:
399 return getBackref(0);
400 case LastParen:
401 return getLastParen();
402 case LeftContext:
403 return getLeftContext();
404 case RightContext:
405 return getRightContext();
406 default:
407 ASSERT(0);
408 }
409
410 return jsString("");
411 }
412
413 void RegExpObjectImp::put(ExecState *exec, const Identifier &propertyName, JSValue *value, int attr)
414 {
415 lookupPut<RegExpObjectImp, InternalFunctionImp>(exec, propertyName, value, attr, &RegExpObjectImpTable, this);
416 }
417
418 void RegExpObjectImp::putValueProperty(ExecState *exec, int token, JSValue *value, int)
419 {
420 switch (token) {
421 case Input:
422 d->lastInput = value->toString(exec);
423 break;
424 case Multiline:
425 d->multiline = value->toBoolean(exec);
426 break;
427 default:
428 ASSERT(0);
429 }
430 }
431
432 bool RegExpObjectImp::implementsConstruct() const
433 {
434 return true;
435 }
436
437 // ECMA 15.10.4
438 JSObject *RegExpObjectImp::construct(ExecState *exec, const List &args)
439 {
440 JSValue* arg0 = args[0];
441 JSValue* arg1 = args[1];
442
443 if (arg0->isObject(&RegExpImp::info)) {
444 if (!arg1->isUndefined())
445 return throwError(exec, TypeError, "Cannot supply flags when constructing one RegExp from another.");
446 return static_cast<JSObject*>(arg0);
447 }
448
449 UString pattern = arg0->isUndefined() ? UString("") : arg0->toString(exec);
450 UString flags = arg1->isUndefined() ? UString("") : arg1->toString(exec);
451
452 return createRegExpImp(exec, new RegExp(pattern, flags));
453 }
454
455 JSObject* RegExpObjectImp::createRegExpImp(ExecState* exec, PassRefPtr<RegExp> regExp)
456 {
457 return regExp->isValid()
458 ? new RegExpImp(static_cast<RegExpPrototype*>(exec->lexicalGlobalObject()->regExpPrototype()), regExp)
459 : throwError(exec, SyntaxError, UString("Invalid regular expression: ").append(regExp->errorMessage()));
460 }
461
462 // ECMA 15.10.3
463 JSValue *RegExpObjectImp::callAsFunction(ExecState *exec, JSObject * /*thisObj*/, const List &args)
464 {
465 return construct(exec, args);
466 }
467
468 const UString& RegExpObjectImp::input() const
469 {
470 // Can detect a distinct initial state that is invisible to JavaScript, by checking for null
471 // state (since jsString turns null strings to empty strings).
472 return d->lastInput;
473 }
474
475 }