]> git.saurik.com Git - apple/javascriptcore.git/blame - yarr/RegexCompiler.cpp
JavaScriptCore-621.1.tar.gz
[apple/javascriptcore.git] / yarr / RegexCompiler.cpp
CommitLineData
ba379fdc
A
1/*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "RegexCompiler.h"
28
29#include "RegexInterpreter.h"
30#include "RegexPattern.h"
31#include <wtf/Vector.h>
32
33#if ENABLE(YARR)
34
35using namespace WTF;
36
37namespace JSC { namespace Yarr {
38
4e4e5a6f
A
39#include "RegExpJitTables.h"
40
ba379fdc
A
41class CharacterClassConstructor {
42public:
43 CharacterClassConstructor(bool isCaseInsensitive = false)
44 : m_isCaseInsensitive(isCaseInsensitive)
45 {
46 }
47
48 void reset()
49 {
50 m_matches.clear();
51 m_ranges.clear();
52 m_matchesUnicode.clear();
53 m_rangesUnicode.clear();
54 }
55
56 void append(const CharacterClass* other)
57 {
58 for (size_t i = 0; i < other->m_matches.size(); ++i)
59 addSorted(m_matches, other->m_matches[i]);
60 for (size_t i = 0; i < other->m_ranges.size(); ++i)
61 addSortedRange(m_ranges, other->m_ranges[i].begin, other->m_ranges[i].end);
62 for (size_t i = 0; i < other->m_matchesUnicode.size(); ++i)
63 addSorted(m_matchesUnicode, other->m_matchesUnicode[i]);
64 for (size_t i = 0; i < other->m_rangesUnicode.size(); ++i)
65 addSortedRange(m_rangesUnicode, other->m_rangesUnicode[i].begin, other->m_rangesUnicode[i].end);
66 }
67
68 void putChar(UChar ch)
69 {
70 if (ch <= 0x7f) {
71 if (m_isCaseInsensitive && isASCIIAlpha(ch)) {
72 addSorted(m_matches, toASCIIUpper(ch));
73 addSorted(m_matches, toASCIILower(ch));
74 } else
75 addSorted(m_matches, ch);
76 } else {
77 UChar upper, lower;
78 if (m_isCaseInsensitive && ((upper = Unicode::toUpper(ch)) != (lower = Unicode::toLower(ch)))) {
79 addSorted(m_matchesUnicode, upper);
80 addSorted(m_matchesUnicode, lower);
81 } else
82 addSorted(m_matchesUnicode, ch);
83 }
84 }
85
86 // returns true if this character has another case, and 'ch' is the upper case form.
87 static inline bool isUnicodeUpper(UChar ch)
88 {
89 return ch != Unicode::toLower(ch);
90 }
91
92 // returns true if this character has another case, and 'ch' is the lower case form.
93 static inline bool isUnicodeLower(UChar ch)
94 {
95 return ch != Unicode::toUpper(ch);
96 }
97
98 void putRange(UChar lo, UChar hi)
99 {
100 if (lo <= 0x7f) {
101 char asciiLo = lo;
102 char asciiHi = std::min(hi, (UChar)0x7f);
103 addSortedRange(m_ranges, lo, asciiHi);
104
105 if (m_isCaseInsensitive) {
106 if ((asciiLo <= 'Z') && (asciiHi >= 'A'))
107 addSortedRange(m_ranges, std::max(asciiLo, 'A')+('a'-'A'), std::min(asciiHi, 'Z')+('a'-'A'));
108 if ((asciiLo <= 'z') && (asciiHi >= 'a'))
109 addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a'));
110 }
111 }
112 if (hi >= 0x80) {
113 uint32_t unicodeCurr = std::max(lo, (UChar)0x80);
114 addSortedRange(m_rangesUnicode, unicodeCurr, hi);
115
116 if (m_isCaseInsensitive) {
117 while (unicodeCurr <= hi) {
118 // If the upper bound of the range (hi) is 0xffff, the increments to
119 // unicodeCurr in this loop may take it to 0x10000. This is fine
120 // (if so we won't re-enter the loop, since the loop condition above
121 // will definitely fail) - but this does mean we cannot use a UChar
122 // to represent unicodeCurr, we must use a 32-bit value instead.
123 ASSERT(unicodeCurr <= 0xffff);
124
125 if (isUnicodeUpper(unicodeCurr)) {
126 UChar lowerCaseRangeBegin = Unicode::toLower(unicodeCurr);
127 UChar lowerCaseRangeEnd = lowerCaseRangeBegin;
128 while ((++unicodeCurr <= hi) && isUnicodeUpper(unicodeCurr) && (Unicode::toLower(unicodeCurr) == (lowerCaseRangeEnd + 1)))
129 lowerCaseRangeEnd++;
130 addSortedRange(m_rangesUnicode, lowerCaseRangeBegin, lowerCaseRangeEnd);
131 } else if (isUnicodeLower(unicodeCurr)) {
132 UChar upperCaseRangeBegin = Unicode::toUpper(unicodeCurr);
133 UChar upperCaseRangeEnd = upperCaseRangeBegin;
134 while ((++unicodeCurr <= hi) && isUnicodeLower(unicodeCurr) && (Unicode::toUpper(unicodeCurr) == (upperCaseRangeEnd + 1)))
135 upperCaseRangeEnd++;
136 addSortedRange(m_rangesUnicode, upperCaseRangeBegin, upperCaseRangeEnd);
137 } else
138 ++unicodeCurr;
139 }
140 }
141 }
142 }
143
144 CharacterClass* charClass()
145 {
4e4e5a6f 146 CharacterClass* characterClass = new CharacterClass(0);
ba379fdc
A
147
148 characterClass->m_matches.append(m_matches);
149 characterClass->m_ranges.append(m_ranges);
150 characterClass->m_matchesUnicode.append(m_matchesUnicode);
151 characterClass->m_rangesUnicode.append(m_rangesUnicode);
152
153 reset();
154
155 return characterClass;
156 }
157
158private:
159 void addSorted(Vector<UChar>& matches, UChar ch)
160 {
161 unsigned pos = 0;
162 unsigned range = matches.size();
163
164 // binary chop, find position to insert char.
165 while (range) {
166 unsigned index = range >> 1;
167
168 int val = matches[pos+index] - ch;
169 if (!val)
170 return;
171 else if (val > 0)
172 range = index;
173 else {
174 pos += (index+1);
175 range -= (index+1);
176 }
177 }
178
179 if (pos == matches.size())
180 matches.append(ch);
181 else
182 matches.insert(pos, ch);
183 }
184
185 void addSortedRange(Vector<CharacterRange>& ranges, UChar lo, UChar hi)
186 {
187 unsigned end = ranges.size();
188
189 // Simple linear scan - I doubt there are that many ranges anyway...
190 // feel free to fix this with something faster (eg binary chop).
191 for (unsigned i = 0; i < end; ++i) {
192 // does the new range fall before the current position in the array
193 if (hi < ranges[i].begin) {
194 // optional optimization: concatenate appending ranges? - may not be worthwhile.
195 if (hi == (ranges[i].begin - 1)) {
196 ranges[i].begin = lo;
197 return;
198 }
199 ranges.insert(i, CharacterRange(lo, hi));
200 return;
201 }
202 // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining
203 // If the new range start at or before the end of the last range, then the overlap (if it starts one after the
204 // end of the last range they concatenate, which is just as good.
205 if (lo <= (ranges[i].end + 1)) {
206 // found an intersect! we'll replace this entry in the array.
207 ranges[i].begin = std::min(ranges[i].begin, lo);
208 ranges[i].end = std::max(ranges[i].end, hi);
209
210 // now check if the new range can subsume any subsequent ranges.
211 unsigned next = i+1;
212 // each iteration of the loop we will either remove something from the list, or break the loop.
213 while (next < ranges.size()) {
214 if (ranges[next].begin <= (ranges[i].end + 1)) {
215 // the next entry now overlaps / concatenates this one.
216 ranges[i].end = std::max(ranges[i].end, ranges[next].end);
217 ranges.remove(next);
218 } else
219 break;
220 }
221
222 return;
223 }
224 }
225
226 // CharacterRange comes after all existing ranges.
227 ranges.append(CharacterRange(lo, hi));
228 }
229
230 bool m_isCaseInsensitive;
231
232 Vector<UChar> m_matches;
233 Vector<CharacterRange> m_ranges;
234 Vector<UChar> m_matchesUnicode;
235 Vector<CharacterRange> m_rangesUnicode;
236};
237
ba379fdc
A
238class RegexPatternConstructor {
239public:
240 RegexPatternConstructor(RegexPattern& pattern)
241 : m_pattern(pattern)
242 , m_characterClassConstructor(pattern.m_ignoreCase)
243 {
244 }
245
246 ~RegexPatternConstructor()
247 {
248 }
249
250 void reset()
251 {
252 m_pattern.reset();
253 m_characterClassConstructor.reset();
254 }
255
256 void assertionBOL()
257 {
258 m_alternative->m_terms.append(PatternTerm::BOL());
259 }
260 void assertionEOL()
261 {
262 m_alternative->m_terms.append(PatternTerm::EOL());
263 }
264 void assertionWordBoundary(bool invert)
265 {
266 m_alternative->m_terms.append(PatternTerm::WordBoundary(invert));
267 }
268
269 void atomPatternCharacter(UChar ch)
270 {
271 // We handle case-insensitive checking of unicode characters which do have both
272 // cases by handling them as if they were defined using a CharacterClass.
273 if (m_pattern.m_ignoreCase && !isASCII(ch) && (Unicode::toUpper(ch) != Unicode::toLower(ch))) {
274 atomCharacterClassBegin();
275 atomCharacterClassAtom(ch);
276 atomCharacterClassEnd();
277 } else
278 m_alternative->m_terms.append(PatternTerm(ch));
279 }
280
281 void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
282 {
283 switch (classID) {
284 case DigitClassID:
285 m_alternative->m_terms.append(PatternTerm(m_pattern.digitsCharacterClass(), invert));
286 break;
287 case SpaceClassID:
288 m_alternative->m_terms.append(PatternTerm(m_pattern.spacesCharacterClass(), invert));
289 break;
290 case WordClassID:
291 m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert));
292 break;
293 case NewlineClassID:
294 m_alternative->m_terms.append(PatternTerm(m_pattern.newlineCharacterClass(), invert));
295 break;
296 }
297 }
298
299 void atomCharacterClassBegin(bool invert = false)
300 {
301 m_invertCharacterClass = invert;
302 }
303
304 void atomCharacterClassAtom(UChar ch)
305 {
306 m_characterClassConstructor.putChar(ch);
307 }
308
309 void atomCharacterClassRange(UChar begin, UChar end)
310 {
311 m_characterClassConstructor.putRange(begin, end);
312 }
313
314 void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
315 {
316 ASSERT(classID != NewlineClassID);
317
318 switch (classID) {
319 case DigitClassID:
320 m_characterClassConstructor.append(invert ? m_pattern.nondigitsCharacterClass() : m_pattern.digitsCharacterClass());
321 break;
322
323 case SpaceClassID:
324 m_characterClassConstructor.append(invert ? m_pattern.nonspacesCharacterClass() : m_pattern.spacesCharacterClass());
325 break;
326
327 case WordClassID:
328 m_characterClassConstructor.append(invert ? m_pattern.nonwordcharCharacterClass() : m_pattern.wordcharCharacterClass());
329 break;
330
331 default:
332 ASSERT_NOT_REACHED();
333 }
334 }
335
336 void atomCharacterClassEnd()
337 {
338 CharacterClass* newCharacterClass = m_characterClassConstructor.charClass();
339 m_pattern.m_userCharacterClasses.append(newCharacterClass);
340 m_alternative->m_terms.append(PatternTerm(newCharacterClass, m_invertCharacterClass));
341 }
342
343 void atomParenthesesSubpatternBegin(bool capture = true)
344 {
345 unsigned subpatternId = m_pattern.m_numSubpatterns + 1;
346 if (capture)
347 m_pattern.m_numSubpatterns++;
348
349 PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative);
350 m_pattern.m_disjunctions.append(parenthesesDisjunction);
351 m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture));
352 m_alternative = parenthesesDisjunction->addNewAlternative();
353 }
354
355 void atomParentheticalAssertionBegin(bool invert = false)
356 {
357 PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative);
358 m_pattern.m_disjunctions.append(parenthesesDisjunction);
359 m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, invert));
360 m_alternative = parenthesesDisjunction->addNewAlternative();
361 }
362
363 void atomParenthesesEnd()
364 {
365 ASSERT(m_alternative->m_parent);
366 ASSERT(m_alternative->m_parent->m_parent);
367 m_alternative = m_alternative->m_parent->m_parent;
368
369 m_alternative->lastTerm().parentheses.lastSubpatternId = m_pattern.m_numSubpatterns;
370 }
371
372 void atomBackReference(unsigned subpatternId)
373 {
374 ASSERT(subpatternId);
4e4e5a6f 375 m_pattern.m_shouldFallBack = true;
ba379fdc
A
376 m_pattern.m_maxBackReference = std::max(m_pattern.m_maxBackReference, subpatternId);
377
378 if (subpatternId > m_pattern.m_numSubpatterns) {
379 m_alternative->m_terms.append(PatternTerm::ForwardReference());
380 return;
381 }
382
383 PatternAlternative* currentAlternative = m_alternative;
384 ASSERT(currentAlternative);
385
386 // Note to self: if we waited until the AST was baked, we could also remove forwards refs
387 while ((currentAlternative = currentAlternative->m_parent->m_parent)) {
388 PatternTerm& term = currentAlternative->lastTerm();
389 ASSERT((term.type == PatternTerm::TypeParenthesesSubpattern) || (term.type == PatternTerm::TypeParentheticalAssertion));
390
391 if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.invertOrCapture && (subpatternId == term.subpatternId)) {
392 m_alternative->m_terms.append(PatternTerm::ForwardReference());
393 return;
394 }
395 }
396
397 m_alternative->m_terms.append(PatternTerm(subpatternId));
398 }
399
400 PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction)
401 {
402 PatternDisjunction* newDisjunction = new PatternDisjunction();
403
404 newDisjunction->m_parent = disjunction->m_parent;
405 for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
406 PatternAlternative* alternative = disjunction->m_alternatives[alt];
407 PatternAlternative* newAlternative = newDisjunction->addNewAlternative();
408 for (unsigned i = 0; i < alternative->m_terms.size(); ++i)
409 newAlternative->m_terms.append(copyTerm(alternative->m_terms[i]));
410 }
411
412 m_pattern.m_disjunctions.append(newDisjunction);
413 return newDisjunction;
414 }
415
416 PatternTerm copyTerm(PatternTerm& term)
417 {
418 if ((term.type != PatternTerm::TypeParenthesesSubpattern) && (term.type != PatternTerm::TypeParentheticalAssertion))
419 return PatternTerm(term);
420
421 PatternTerm termCopy = term;
422 termCopy.parentheses.disjunction = copyDisjunction(termCopy.parentheses.disjunction);
423 return termCopy;
424 }
425
426 void quantifyAtom(unsigned min, unsigned max, bool greedy)
427 {
428 ASSERT(min <= max);
429 ASSERT(m_alternative->m_terms.size());
430
431 if (!max) {
432 m_alternative->removeLastTerm();
433 return;
434 }
435
436 PatternTerm& term = m_alternative->lastTerm();
437 ASSERT(term.type > PatternTerm::TypeAssertionWordBoundary);
438 ASSERT((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount));
439
440 // For any assertion with a zero minimum, not matching is valid and has no effect,
441 // remove it. Otherwise, we need to match as least once, but there is no point
442 // matching more than once, so remove the quantifier. It is not entirely clear
443 // from the spec whether or not this behavior is correct, but I believe this
444 // matches Firefox. :-/
445 if (term.type == PatternTerm::TypeParentheticalAssertion) {
446 if (!min)
447 m_alternative->removeLastTerm();
448 return;
449 }
450
4e4e5a6f
A
451 if (max > 1 && term.type == PatternTerm::TypeParenthesesSubpattern)
452 m_pattern.m_shouldFallBack = true;
453
ba379fdc
A
454 if (min == 0)
455 term.quantify(max, greedy ? QuantifierGreedy : QuantifierNonGreedy);
456 else if (min == max)
457 term.quantify(min, QuantifierFixedCount);
458 else {
459 term.quantify(min, QuantifierFixedCount);
460 m_alternative->m_terms.append(copyTerm(term));
461 // NOTE: this term is interesting from an analysis perspective, in that it can be ignored.....
462 m_alternative->lastTerm().quantify((max == UINT_MAX) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy);
463 if (m_alternative->lastTerm().type == PatternTerm::TypeParenthesesSubpattern)
464 m_alternative->lastTerm().parentheses.isCopy = true;
465 }
466 }
467
468 void disjunction()
469 {
470 m_alternative = m_alternative->m_parent->addNewAlternative();
471 }
472
473 void regexBegin()
474 {
475 m_pattern.m_body = new PatternDisjunction();
476 m_alternative = m_pattern.m_body->addNewAlternative();
477 m_pattern.m_disjunctions.append(m_pattern.m_body);
478 }
479 void regexEnd()
480 {
481 }
482 void regexError()
483 {
484 }
485
486 unsigned setupAlternativeOffsets(PatternAlternative* alternative, unsigned currentCallFrameSize, unsigned initialInputPosition)
487 {
488 alternative->m_hasFixedSize = true;
489 unsigned currentInputPosition = initialInputPosition;
490
491 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
492 PatternTerm& term = alternative->m_terms[i];
493
494 switch (term.type) {
495 case PatternTerm::TypeAssertionBOL:
496 case PatternTerm::TypeAssertionEOL:
497 case PatternTerm::TypeAssertionWordBoundary:
498 term.inputPosition = currentInputPosition;
499 break;
500
501 case PatternTerm::TypeBackReference:
502 term.inputPosition = currentInputPosition;
503 term.frameLocation = currentCallFrameSize;
504 currentCallFrameSize += RegexStackSpaceForBackTrackInfoBackReference;
505 alternative->m_hasFixedSize = false;
506 break;
507
508 case PatternTerm::TypeForwardReference:
509 break;
510
511 case PatternTerm::TypePatternCharacter:
512 term.inputPosition = currentInputPosition;
513 if (term.quantityType != QuantifierFixedCount) {
514 term.frameLocation = currentCallFrameSize;
515 currentCallFrameSize += RegexStackSpaceForBackTrackInfoPatternCharacter;
516 alternative->m_hasFixedSize = false;
517 } else
518 currentInputPosition += term.quantityCount;
519 break;
520
521 case PatternTerm::TypeCharacterClass:
522 term.inputPosition = currentInputPosition;
523 if (term.quantityType != QuantifierFixedCount) {
524 term.frameLocation = currentCallFrameSize;
525 currentCallFrameSize += RegexStackSpaceForBackTrackInfoCharacterClass;
526 alternative->m_hasFixedSize = false;
527 } else
528 currentInputPosition += term.quantityCount;
529 break;
530
531 case PatternTerm::TypeParenthesesSubpattern:
532 // Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own.
533 term.frameLocation = currentCallFrameSize;
534 if ((term.quantityCount == 1) && !term.parentheses.isCopy) {
535 if (term.quantityType == QuantifierFixedCount) {
536 currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
537 currentInputPosition += term.parentheses.disjunction->m_minimumSize;
538 } else {
539 currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesOnce;
540 currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
541 }
542 term.inputPosition = currentInputPosition;
543 } else {
544 term.inputPosition = currentInputPosition;
545 setupDisjunctionOffsets(term.parentheses.disjunction, 0, currentInputPosition);
546 currentCallFrameSize += RegexStackSpaceForBackTrackInfoParentheses;
547 }
548 // Fixed count of 1 could be accepted, if they have a fixed size *AND* if all alternatives are of the same length.
549 alternative->m_hasFixedSize = false;
550 break;
551
552 case PatternTerm::TypeParentheticalAssertion:
553 term.inputPosition = currentInputPosition;
554 term.frameLocation = currentCallFrameSize;
555 currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + RegexStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition);
556 break;
557 }
558 }
559
560 alternative->m_minimumSize = currentInputPosition - initialInputPosition;
561 return currentCallFrameSize;
562 }
563
564 unsigned setupDisjunctionOffsets(PatternDisjunction* disjunction, unsigned initialCallFrameSize, unsigned initialInputPosition)
565 {
566 if ((disjunction != m_pattern.m_body) && (disjunction->m_alternatives.size() > 1))
567 initialCallFrameSize += RegexStackSpaceForBackTrackInfoAlternative;
568
569 unsigned minimumInputSize = UINT_MAX;
570 unsigned maximumCallFrameSize = 0;
571 bool hasFixedSize = true;
572
573 for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
574 PatternAlternative* alternative = disjunction->m_alternatives[alt];
575 unsigned currentAlternativeCallFrameSize = setupAlternativeOffsets(alternative, initialCallFrameSize, initialInputPosition);
576 minimumInputSize = min(minimumInputSize, alternative->m_minimumSize);
577 maximumCallFrameSize = max(maximumCallFrameSize, currentAlternativeCallFrameSize);
578 hasFixedSize &= alternative->m_hasFixedSize;
579 }
580
581 ASSERT(minimumInputSize != UINT_MAX);
582 ASSERT(maximumCallFrameSize >= initialCallFrameSize);
583
584 disjunction->m_hasFixedSize = hasFixedSize;
585 disjunction->m_minimumSize = minimumInputSize;
586 disjunction->m_callFrameSize = maximumCallFrameSize;
587 return maximumCallFrameSize;
588 }
589
590 void setupOffsets()
591 {
592 setupDisjunctionOffsets(m_pattern.m_body, 0, 0);
593 }
594
595private:
596 RegexPattern& m_pattern;
597 PatternAlternative* m_alternative;
598 CharacterClassConstructor m_characterClassConstructor;
599 bool m_invertCharacterClass;
600};
601
602
603const char* compileRegex(const UString& patternString, RegexPattern& pattern)
604{
605 RegexPatternConstructor constructor(pattern);
606
607 if (const char* error = parse(constructor, patternString))
608 return error;
609
610 // If the pattern contains illegal backreferences reset & reparse.
611 // Quoting Netscape's "What's new in JavaScript 1.2",
612 // "Note: if the number of left parentheses is less than the number specified
613 // in \#, the \# is taken as an octal escape as described in the next row."
614 if (pattern.containsIllegalBackReference()) {
615 unsigned numSubpatterns = pattern.m_numSubpatterns;
616
617 constructor.reset();
f9bf01c6 618#if !ASSERT_DISABLED
ba379fdc
A
619 const char* error =
620#endif
621 parse(constructor, patternString, numSubpatterns);
622
623 ASSERT(!error);
624 ASSERT(numSubpatterns == pattern.m_numSubpatterns);
625 }
626
627 constructor.setupOffsets();
628
629 return false;
630};
631
632
633} }
634
635#endif