]> git.saurik.com Git - apple/javascriptcore.git/blame - yarr/YarrInterpreter.cpp
JavaScriptCore-7600.1.4.13.1.tar.gz
[apple/javascriptcore.git] / yarr / YarrInterpreter.cpp
CommitLineData
ba379fdc
A
1/*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
14957cd0 3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
ba379fdc
A
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
f9bf01c6 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ba379fdc
A
25 */
26
27#include "config.h"
14957cd0 28#include "YarrInterpreter.h"
ba379fdc 29
14957cd0 30#include "Yarr.h"
6fe7ccc8 31#include "YarrCanonicalizeUCS2.h"
14957cd0 32#include <wtf/BumpPointerAllocator.h>
6fe7ccc8
A
33#include <wtf/DataLog.h>
34#include <wtf/text/CString.h>
93a37866 35#include <wtf/text/WTFString.h>
ba379fdc 36
ba379fdc
A
37using namespace WTF;
38
39namespace JSC { namespace Yarr {
40
6fe7ccc8 41template<typename CharType>
ba379fdc
A
42class Interpreter {
43public:
44 struct ParenthesesDisjunctionContext;
45
46 struct BackTrackInfoPatternCharacter {
47 uintptr_t matchAmount;
48 };
49 struct BackTrackInfoCharacterClass {
50 uintptr_t matchAmount;
51 };
52 struct BackTrackInfoBackReference {
53 uintptr_t begin; // Not really needed for greedy quantifiers.
54 uintptr_t matchAmount; // Not really needed for fixed quantifiers.
55 };
56 struct BackTrackInfoAlternative {
57 uintptr_t offset;
58 };
59 struct BackTrackInfoParentheticalAssertion {
60 uintptr_t begin;
61 };
62 struct BackTrackInfoParenthesesOnce {
14957cd0
A
63 uintptr_t begin;
64 };
65 struct BackTrackInfoParenthesesTerminal {
66 uintptr_t begin;
ba379fdc
A
67 };
68 struct BackTrackInfoParentheses {
69 uintptr_t matchAmount;
70 ParenthesesDisjunctionContext* lastContext;
ba379fdc
A
71 };
72
73 static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
74 {
75 context->next = backTrack->lastContext;
76 backTrack->lastContext = context;
77 ++backTrack->matchAmount;
78 }
79
80 static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
81 {
93a37866
A
82 RELEASE_ASSERT(backTrack->matchAmount);
83 RELEASE_ASSERT(backTrack->lastContext);
ba379fdc
A
84 backTrack->lastContext = backTrack->lastContext->next;
85 --backTrack->matchAmount;
86 }
87
88 struct DisjunctionContext
89 {
90 DisjunctionContext()
91 : term(0)
92 {
93 }
f9bf01c6 94
ba379fdc
A
95 void* operator new(size_t, void* where)
96 {
97 return where;
98 }
99
100 int term;
101 unsigned matchBegin;
102 unsigned matchEnd;
103 uintptr_t frame[1];
104 };
105
106 DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
107 {
14957cd0
A
108 size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
109 allocatorPool = allocatorPool->ensureCapacity(size);
93a37866 110 RELEASE_ASSERT(allocatorPool);
6fe7ccc8 111 return new (allocatorPool->alloc(size)) DisjunctionContext();
ba379fdc
A
112 }
113
114 void freeDisjunctionContext(DisjunctionContext* context)
115 {
14957cd0 116 allocatorPool = allocatorPool->dealloc(context);
ba379fdc
A
117 }
118
119 struct ParenthesesDisjunctionContext
120 {
6fe7ccc8 121 ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
ba379fdc
A
122 : next(0)
123 {
124 unsigned firstSubpatternId = term.atom.subpatternId;
125 unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
126
127 for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
128 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
6fe7ccc8 129 output[(firstSubpatternId << 1) + i] = offsetNoMatch;
ba379fdc 130 }
f9bf01c6 131
6fe7ccc8 132 new (getDisjunctionContext(term)) DisjunctionContext();
ba379fdc
A
133 }
134
135 void* operator new(size_t, void* where)
136 {
137 return where;
138 }
139
6fe7ccc8 140 void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
ba379fdc
A
141 {
142 for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
143 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
144 }
f9bf01c6 145
ba379fdc
A
146 DisjunctionContext* getDisjunctionContext(ByteTerm& term)
147 {
148 return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
149 }
150
151 ParenthesesDisjunctionContext* next;
6fe7ccc8 152 unsigned subpatternBackup[1];
ba379fdc
A
153 };
154
6fe7ccc8 155 ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
ba379fdc 156 {
6fe7ccc8 157 size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
14957cd0 158 allocatorPool = allocatorPool->ensureCapacity(size);
93a37866 159 RELEASE_ASSERT(allocatorPool);
6fe7ccc8 160 return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
ba379fdc
A
161 }
162
163 void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
164 {
14957cd0 165 allocatorPool = allocatorPool->dealloc(context);
ba379fdc
A
166 }
167
168 class InputStream {
169 public:
6fe7ccc8 170 InputStream(const CharType* input, unsigned start, unsigned length)
ba379fdc
A
171 : input(input)
172 , pos(start)
173 , length(length)
174 {
175 }
176
177 void next()
178 {
179 ++pos;
180 }
181
182 void rewind(unsigned amount)
183 {
184 ASSERT(pos >= amount);
185 pos -= amount;
186 }
187
188 int read()
189 {
190 ASSERT(pos < length);
191 if (pos < length)
192 return input[pos];
193 return -1;
194 }
195
14957cd0
A
196 int readPair()
197 {
198 ASSERT(pos + 1 < length);
199 return input[pos] | input[pos + 1] << 16;
200 }
201
6fe7ccc8 202 int readChecked(unsigned negativePositionOffest)
ba379fdc 203 {
93a37866 204 RELEASE_ASSERT(pos >= negativePositionOffest);
6fe7ccc8 205 unsigned p = pos - negativePositionOffest;
ba379fdc
A
206 ASSERT(p < length);
207 return input[p];
208 }
209
210 int reread(unsigned from)
211 {
212 ASSERT(from < length);
213 return input[from];
214 }
215
216 int prev()
217 {
218 ASSERT(!(pos > length));
219 if (pos && length)
220 return input[pos - 1];
221 return -1;
222 }
f9bf01c6 223
ba379fdc
A
224 unsigned getPos()
225 {
226 return pos;
227 }
228
229 void setPos(unsigned p)
230 {
231 pos = p;
232 }
f9bf01c6 233
ba379fdc
A
234 bool atStart()
235 {
236 return pos == 0;
237 }
238
239 bool atEnd()
240 {
241 return pos == length;
242 }
243
14957cd0
A
244 unsigned end()
245 {
246 return length;
247 }
248
6fe7ccc8 249 bool checkInput(unsigned count)
ba379fdc 250 {
6fe7ccc8 251 if (((pos + count) <= length) && ((pos + count) >= pos)) {
ba379fdc
A
252 pos += count;
253 return true;
14957cd0
A
254 }
255 return false;
ba379fdc
A
256 }
257
6fe7ccc8 258 void uncheckInput(unsigned count)
ba379fdc 259 {
93a37866 260 RELEASE_ASSERT(pos >= count);
ba379fdc
A
261 pos -= count;
262 }
263
6fe7ccc8 264 bool atStart(unsigned negativePositionOffest)
ba379fdc 265 {
6fe7ccc8 266 return pos == negativePositionOffest;
ba379fdc
A
267 }
268
6fe7ccc8 269 bool atEnd(unsigned negativePositionOffest)
ba379fdc 270 {
93a37866 271 RELEASE_ASSERT(pos >= negativePositionOffest);
6fe7ccc8 272 return (pos - negativePositionOffest) == length;
ba379fdc
A
273 }
274
6fe7ccc8 275 bool isAvailableInput(unsigned offset)
14957cd0 276 {
6fe7ccc8 277 return (((pos + offset) <= length) && ((pos + offset) >= pos));
14957cd0
A
278 }
279
ba379fdc 280 private:
6fe7ccc8 281 const CharType* input;
ba379fdc
A
282 unsigned pos;
283 unsigned length;
284 };
285
286 bool testCharacterClass(CharacterClass* characterClass, int ch)
287 {
288 if (ch & 0xFF80) {
289 for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
290 if (ch == characterClass->m_matchesUnicode[i])
291 return true;
292 for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
293 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
294 return true;
295 } else {
296 for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
297 if (ch == characterClass->m_matches[i])
298 return true;
299 for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
300 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
301 return true;
302 }
303
304 return false;
305 }
306
6fe7ccc8 307 bool checkCharacter(int testChar, unsigned negativeInputOffset)
ba379fdc 308 {
6fe7ccc8 309 return testChar == input.readChecked(negativeInputOffset);
ba379fdc
A
310 }
311
6fe7ccc8 312 bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
ba379fdc 313 {
6fe7ccc8 314 int ch = input.readChecked(negativeInputOffset);
ba379fdc
A
315 return (loChar == ch) || (hiChar == ch);
316 }
317
6fe7ccc8 318 bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
ba379fdc 319 {
6fe7ccc8 320 bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
ba379fdc
A
321 return invert ? !match : match;
322 }
323
6fe7ccc8 324 bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
ba379fdc 325 {
6fe7ccc8 326 unsigned matchSize = (unsigned)(matchEnd - matchBegin);
ba379fdc
A
327
328 if (!input.checkInput(matchSize))
329 return false;
330
14957cd0 331 if (pattern->m_ignoreCase) {
6fe7ccc8
A
332 for (unsigned i = 0; i < matchSize; ++i) {
333 int oldCh = input.reread(matchBegin + i);
334 int ch = input.readChecked(negativeInputOffset + matchSize - i);
335
336 if (oldCh == ch)
337 continue;
338
339 // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that
340 // unicode values are never allowed to match against ascii ones.
341 if (isASCII(oldCh) || isASCII(ch)) {
342 if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
343 continue;
344 } else if (areCanonicallyEquivalent(oldCh, ch))
345 continue;
346
347 input.uncheckInput(matchSize);
348 return false;
14957cd0
A
349 }
350 } else {
6fe7ccc8
A
351 for (unsigned i = 0; i < matchSize; ++i) {
352 if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) {
14957cd0
A
353 input.uncheckInput(matchSize);
354 return false;
355 }
ba379fdc
A
356 }
357 }
f9bf01c6 358
ba379fdc
A
359 return true;
360 }
361
362 bool matchAssertionBOL(ByteTerm& term)
363 {
6fe7ccc8 364 return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
ba379fdc
A
365 }
366
367 bool matchAssertionEOL(ByteTerm& term)
368 {
369 if (term.inputPosition)
370 return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
14957cd0
A
371
372 return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
ba379fdc
A
373 }
374
375 bool matchAssertionWordBoundary(ByteTerm& term)
376 {
6fe7ccc8 377 bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1));
ba379fdc
A
378 bool readIsWordchar;
379 if (term.inputPosition)
380 readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
381 else
382 readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
383
384 bool wordBoundary = prevIsWordchar != readIsWordchar;
385 return term.invert() ? !wordBoundary : wordBoundary;
386 }
387
388 bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
389 {
390 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
391
392 switch (term.atom.quantityType) {
393 case QuantifierFixedCount:
394 break;
395
396 case QuantifierGreedy:
397 if (backTrack->matchAmount) {
398 --backTrack->matchAmount;
399 input.uncheckInput(1);
400 return true;
401 }
402 break;
403
404 case QuantifierNonGreedy:
405 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
406 ++backTrack->matchAmount;
6fe7ccc8 407 if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))
ba379fdc
A
408 return true;
409 }
410 input.uncheckInput(backTrack->matchAmount);
411 break;
412 }
413
414 return false;
415 }
416
417 bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
418 {
419 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
420
421 switch (term.atom.quantityType) {
422 case QuantifierFixedCount:
423 break;
424
425 case QuantifierGreedy:
426 if (backTrack->matchAmount) {
427 --backTrack->matchAmount;
428 input.uncheckInput(1);
429 return true;
430 }
431 break;
432
433 case QuantifierNonGreedy:
434 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
435 ++backTrack->matchAmount;
6fe7ccc8 436 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))
ba379fdc
A
437 return true;
438 }
439 input.uncheckInput(backTrack->matchAmount);
440 break;
441 }
442
443 return false;
444 }
445
446 bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
447 {
448 ASSERT(term.type == ByteTerm::TypeCharacterClass);
449 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
450
451 switch (term.atom.quantityType) {
452 case QuantifierFixedCount: {
453 for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
6fe7ccc8 454 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
ba379fdc
A
455 return false;
456 }
457 return true;
458 }
459
460 case QuantifierGreedy: {
461 unsigned matchAmount = 0;
462 while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
6fe7ccc8 463 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
ba379fdc
A
464 input.uncheckInput(1);
465 break;
466 }
467 ++matchAmount;
468 }
469 backTrack->matchAmount = matchAmount;
470
471 return true;
472 }
473
474 case QuantifierNonGreedy:
475 backTrack->matchAmount = 0;
476 return true;
477 }
478
93a37866 479 RELEASE_ASSERT_NOT_REACHED();
ba379fdc
A
480 return false;
481 }
482
483 bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
484 {
485 ASSERT(term.type == ByteTerm::TypeCharacterClass);
486 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
487
488 switch (term.atom.quantityType) {
489 case QuantifierFixedCount:
490 break;
491
492 case QuantifierGreedy:
493 if (backTrack->matchAmount) {
494 --backTrack->matchAmount;
495 input.uncheckInput(1);
496 return true;
497 }
498 break;
499
500 case QuantifierNonGreedy:
501 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
502 ++backTrack->matchAmount;
6fe7ccc8 503 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))
ba379fdc
A
504 return true;
505 }
506 input.uncheckInput(backTrack->matchAmount);
507 break;
508 }
509
510 return false;
511 }
512
513 bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
514 {
515 ASSERT(term.type == ByteTerm::TypeBackReference);
516 BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
517
6fe7ccc8
A
518 unsigned matchBegin = output[(term.atom.subpatternId << 1)];
519 unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
14957cd0
A
520
521 // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
522 // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
523 // Eg.: /(a\1)/
6fe7ccc8 524 if (matchEnd == offsetNoMatch)
14957cd0
A
525 return true;
526
6fe7ccc8
A
527 if (matchBegin == offsetNoMatch)
528 return true;
529
530 ASSERT(matchBegin <= matchEnd);
ba379fdc
A
531
532 if (matchBegin == matchEnd)
533 return true;
534
535 switch (term.atom.quantityType) {
536 case QuantifierFixedCount: {
537 backTrack->begin = input.getPos();
538 for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
539 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
540 input.setPos(backTrack->begin);
541 return false;
542 }
543 }
544 return true;
545 }
546
547 case QuantifierGreedy: {
548 unsigned matchAmount = 0;
549 while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
550 ++matchAmount;
551 backTrack->matchAmount = matchAmount;
552 return true;
553 }
554
555 case QuantifierNonGreedy:
556 backTrack->begin = input.getPos();
557 backTrack->matchAmount = 0;
558 return true;
559 }
560
93a37866 561 RELEASE_ASSERT_NOT_REACHED();
ba379fdc
A
562 return false;
563 }
564
565 bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
566 {
567 ASSERT(term.type == ByteTerm::TypeBackReference);
568 BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
569
6fe7ccc8
A
570 unsigned matchBegin = output[(term.atom.subpatternId << 1)];
571 unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
572
573 if (matchBegin == offsetNoMatch)
574 return false;
575
576 ASSERT(matchBegin <= matchEnd);
ba379fdc
A
577
578 if (matchBegin == matchEnd)
579 return false;
580
581 switch (term.atom.quantityType) {
582 case QuantifierFixedCount:
583 // for quantityCount == 1, could rewind.
584 input.setPos(backTrack->begin);
585 break;
586
587 case QuantifierGreedy:
588 if (backTrack->matchAmount) {
589 --backTrack->matchAmount;
590 input.rewind(matchEnd - matchBegin);
591 return true;
592 }
593 break;
594
595 case QuantifierNonGreedy:
596 if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
597 ++backTrack->matchAmount;
598 return true;
14957cd0
A
599 }
600 input.setPos(backTrack->begin);
ba379fdc
A
601 break;
602 }
603
604 return false;
605 }
606
607 void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
608 {
609 if (term.capture()) {
610 unsigned subpatternId = term.atom.subpatternId;
611 output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
612 output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
613 }
614 }
615 void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
616 {
617 unsigned firstSubpatternId = term.atom.subpatternId;
618 unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
619 context->restoreOutput(output, firstSubpatternId, count);
620 }
14957cd0 621 JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
ba379fdc
A
622 {
623 while (backTrack->matchAmount) {
624 ParenthesesDisjunctionContext* context = backTrack->lastContext;
625
14957cd0
A
626 JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
627 if (result == JSRegExpMatch)
628 return JSRegExpMatch;
f9bf01c6 629
ba379fdc 630 resetMatches(term, context);
ba379fdc 631 popParenthesesDisjunctionContext(backTrack);
f9bf01c6 632 freeParenthesesDisjunctionContext(context);
14957cd0
A
633
634 if (result != JSRegExpNoMatch)
635 return result;
ba379fdc
A
636 }
637
14957cd0 638 return JSRegExpNoMatch;
ba379fdc
A
639 }
640
641 bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
642 {
643 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
644 ASSERT(term.atom.quantityCount == 1);
645
646 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
647
648 switch (term.atom.quantityType) {
649 case QuantifierGreedy: {
650 // set this speculatively; if we get to the parens end this will be true.
14957cd0 651 backTrack->begin = input.getPos();
ba379fdc
A
652 break;
653 }
654 case QuantifierNonGreedy: {
14957cd0 655 backTrack->begin = notFound;
ba379fdc
A
656 context->term += term.atom.parenthesesWidth;
657 return true;
658 }
659 case QuantifierFixedCount:
660 break;
661 }
662
663 if (term.capture()) {
664 unsigned subpatternId = term.atom.subpatternId;
6fe7ccc8 665 output[(subpatternId << 1)] = input.getPos() - term.inputPosition;
ba379fdc
A
666 }
667
668 return true;
669 }
670
14957cd0 671 bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
ba379fdc
A
672 {
673 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
674 ASSERT(term.atom.quantityCount == 1);
675
676 if (term.capture()) {
677 unsigned subpatternId = term.atom.subpatternId;
678 output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
679 }
14957cd0
A
680
681 if (term.atom.quantityType == QuantifierFixedCount)
682 return true;
683
684 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
685 return backTrack->begin != input.getPos();
ba379fdc
A
686 }
687
688 bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
689 {
690 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
691 ASSERT(term.atom.quantityCount == 1);
692
693 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
694
695 if (term.capture()) {
696 unsigned subpatternId = term.atom.subpatternId;
6fe7ccc8
A
697 output[(subpatternId << 1)] = offsetNoMatch;
698 output[(subpatternId << 1) + 1] = offsetNoMatch;
ba379fdc
A
699 }
700
701 switch (term.atom.quantityType) {
702 case QuantifierGreedy:
703 // if we backtrack to this point, there is another chance - try matching nothing.
14957cd0
A
704 ASSERT(backTrack->begin != notFound);
705 backTrack->begin = notFound;
ba379fdc
A
706 context->term += term.atom.parenthesesWidth;
707 return true;
708 case QuantifierNonGreedy:
14957cd0 709 ASSERT(backTrack->begin != notFound);
81345200 710 FALLTHROUGH;
ba379fdc
A
711 case QuantifierFixedCount:
712 break;
713 }
714
715 return false;
716 }
717
718 bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
719 {
720 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
721 ASSERT(term.atom.quantityCount == 1);
722
723 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
724
725 switch (term.atom.quantityType) {
726 case QuantifierGreedy:
14957cd0 727 if (backTrack->begin == notFound) {
ba379fdc
A
728 context->term -= term.atom.parenthesesWidth;
729 return false;
730 }
81345200 731 FALLTHROUGH;
ba379fdc 732 case QuantifierNonGreedy:
14957cd0
A
733 if (backTrack->begin == notFound) {
734 backTrack->begin = input.getPos();
ba379fdc 735 if (term.capture()) {
14957cd0
A
736 // Technically this access to inputPosition should be accessing the begin term's
737 // inputPosition, but for repeats other than fixed these values should be
738 // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
739 ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
740 ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
ba379fdc 741 unsigned subpatternId = term.atom.subpatternId;
14957cd0 742 output[subpatternId << 1] = input.getPos() + term.inputPosition;
ba379fdc
A
743 }
744 context->term -= term.atom.parenthesesWidth;
745 return true;
746 }
81345200 747 FALLTHROUGH;
ba379fdc
A
748 case QuantifierFixedCount:
749 break;
750 }
751
752 return false;
753 }
754
14957cd0
A
755 bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
756 {
757 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
758 ASSERT(term.atom.quantityType == QuantifierGreedy);
759 ASSERT(term.atom.quantityCount == quantifyInfinite);
760 ASSERT(!term.capture());
761
762 BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
763 backTrack->begin = input.getPos();
764 return true;
765 }
766
767 bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
768 {
769 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
770
771 BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
772 // Empty match is a failed match.
773 if (backTrack->begin == input.getPos())
774 return false;
775
776 // Successful match! Okay, what's next? - loop around and try to match moar!
777 context->term -= (term.atom.parenthesesWidth + 1);
778 return true;
779 }
780
781 bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
782 {
783 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
784 ASSERT(term.atom.quantityType == QuantifierGreedy);
785 ASSERT(term.atom.quantityCount == quantifyInfinite);
786 ASSERT(!term.capture());
787
788 // If we backtrack to this point, we have failed to match this iteration of the parens.
789 // Since this is greedy / zero minimum a failed is also accepted as a match!
790 context->term += term.atom.parenthesesWidth;
791 return true;
792 }
793
794 bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
795 {
796 // 'Terminal' parentheses are at the end of the regex, and as such a match past end
797 // should always be returned as a successful match - we should never backtrack to here.
93a37866 798 RELEASE_ASSERT_NOT_REACHED();
14957cd0
A
799 return false;
800 }
801
ba379fdc
A
802 bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
803 {
804 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
805 ASSERT(term.atom.quantityCount == 1);
806
807 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
808
809 backTrack->begin = input.getPos();
810 return true;
811 }
812
813 bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
814 {
815 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
816 ASSERT(term.atom.quantityCount == 1);
817
818 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
819
820 input.setPos(backTrack->begin);
821
822 // We've reached the end of the parens; if they are inverted, this is failure.
823 if (term.invert()) {
824 context->term -= term.atom.parenthesesWidth;
825 return false;
826 }
827
828 return true;
829 }
830
831 bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
832 {
833 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
834 ASSERT(term.atom.quantityCount == 1);
835
836 // We've failed to match parens; if they are inverted, this is win!
837 if (term.invert()) {
838 context->term += term.atom.parenthesesWidth;
839 return true;
840 }
841
842 return false;
843 }
844
845 bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
846 {
847 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
848 ASSERT(term.atom.quantityCount == 1);
849
850 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
851
852 input.setPos(backTrack->begin);
853
854 context->term -= term.atom.parenthesesWidth;
855 return false;
856 }
857
14957cd0 858 JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
ba379fdc
A
859 {
860 ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
861
862 BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
ba379fdc
A
863 ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
864
ba379fdc
A
865 backTrack->matchAmount = 0;
866 backTrack->lastContext = 0;
867
868 switch (term.atom.quantityType) {
869 case QuantifierFixedCount: {
870 // While we haven't yet reached our fixed limit,
871 while (backTrack->matchAmount < term.atom.quantityCount) {
872 // Try to do a match, and it it succeeds, add it to the list.
873 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
14957cd0
A
874 JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
875 if (result == JSRegExpMatch)
ba379fdc
A
876 appendParenthesesDisjunctionContext(backTrack, context);
877 else {
878 // The match failed; try to find an alternate point to carry on from.
879 resetMatches(term, context);
880 freeParenthesesDisjunctionContext(context);
14957cd0 881
6fe7ccc8 882 if (result != JSRegExpNoMatch)
14957cd0 883 return result;
6fe7ccc8
A
884 JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
885 if (backtrackResult != JSRegExpMatch)
886 return backtrackResult;
ba379fdc
A
887 }
888 }
889
890 ASSERT(backTrack->matchAmount == term.atom.quantityCount);
891 ParenthesesDisjunctionContext* context = backTrack->lastContext;
892 recordParenthesesMatch(term, context);
14957cd0 893 return JSRegExpMatch;
ba379fdc
A
894 }
895
896 case QuantifierGreedy: {
897 while (backTrack->matchAmount < term.atom.quantityCount) {
898 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
14957cd0
A
899 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
900 if (result == JSRegExpMatch)
ba379fdc
A
901 appendParenthesesDisjunctionContext(backTrack, context);
902 else {
903 resetMatches(term, context);
904 freeParenthesesDisjunctionContext(context);
14957cd0
A
905
906 if (result != JSRegExpNoMatch)
907 return result;
908
ba379fdc
A
909 break;
910 }
911 }
912
913 if (backTrack->matchAmount) {
914 ParenthesesDisjunctionContext* context = backTrack->lastContext;
915 recordParenthesesMatch(term, context);
916 }
14957cd0 917 return JSRegExpMatch;
ba379fdc
A
918 }
919
920 case QuantifierNonGreedy:
14957cd0 921 return JSRegExpMatch;
ba379fdc
A
922 }
923
93a37866 924 RELEASE_ASSERT_NOT_REACHED();
14957cd0 925 return JSRegExpErrorNoMatch;
ba379fdc
A
926 }
927
928 // Rules for backtracking differ depending on whether this is greedy or non-greedy.
929 //
930 // Greedy matches never should try just adding more - you should already have done
931 // the 'more' cases. Always backtrack, at least a leetle bit. However cases where
932 // you backtrack an item off the list needs checking, since we'll never have matched
933 // the one less case. Tracking forwards, still add as much as possible.
934 //
935 // Non-greedy, we've already done the one less case, so don't match on popping.
936 // We haven't done the one more case, so always try to add that.
937 //
14957cd0 938 JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
ba379fdc
A
939 {
940 ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
941
942 BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
ba379fdc
A
943 ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
944
945 switch (term.atom.quantityType) {
946 case QuantifierFixedCount: {
947 ASSERT(backTrack->matchAmount == term.atom.quantityCount);
948
949 ParenthesesDisjunctionContext* context = 0;
14957cd0 950 JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
ba379fdc 951
14957cd0
A
952 if (result != JSRegExpMatch)
953 return result;
ba379fdc
A
954
955 // While we haven't yet reached our fixed limit,
956 while (backTrack->matchAmount < term.atom.quantityCount) {
957 // Try to do a match, and it it succeeds, add it to the list.
958 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
14957cd0
A
959 result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
960
961 if (result == JSRegExpMatch)
ba379fdc
A
962 appendParenthesesDisjunctionContext(backTrack, context);
963 else {
964 // The match failed; try to find an alternate point to carry on from.
965 resetMatches(term, context);
966 freeParenthesesDisjunctionContext(context);
14957cd0 967
6fe7ccc8 968 if (result != JSRegExpNoMatch)
14957cd0 969 return result;
6fe7ccc8
A
970 JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
971 if (backtrackResult != JSRegExpMatch)
972 return backtrackResult;
ba379fdc
A
973 }
974 }
975
976 ASSERT(backTrack->matchAmount == term.atom.quantityCount);
977 context = backTrack->lastContext;
978 recordParenthesesMatch(term, context);
14957cd0 979 return JSRegExpMatch;
ba379fdc
A
980 }
981
982 case QuantifierGreedy: {
983 if (!backTrack->matchAmount)
14957cd0 984 return JSRegExpNoMatch;
ba379fdc
A
985
986 ParenthesesDisjunctionContext* context = backTrack->lastContext;
14957cd0
A
987 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
988 if (result == JSRegExpMatch) {
ba379fdc
A
989 while (backTrack->matchAmount < term.atom.quantityCount) {
990 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
14957cd0
A
991 JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
992 if (parenthesesResult == JSRegExpMatch)
ba379fdc
A
993 appendParenthesesDisjunctionContext(backTrack, context);
994 else {
995 resetMatches(term, context);
996 freeParenthesesDisjunctionContext(context);
14957cd0
A
997
998 if (parenthesesResult != JSRegExpNoMatch)
999 return parenthesesResult;
1000
ba379fdc
A
1001 break;
1002 }
1003 }
1004 } else {
1005 resetMatches(term, context);
ba379fdc 1006 popParenthesesDisjunctionContext(backTrack);
f9bf01c6 1007 freeParenthesesDisjunctionContext(context);
14957cd0
A
1008
1009 if (result != JSRegExpNoMatch)
1010 return result;
ba379fdc
A
1011 }
1012
1013 if (backTrack->matchAmount) {
1014 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1015 recordParenthesesMatch(term, context);
1016 }
14957cd0 1017 return JSRegExpMatch;
ba379fdc
A
1018 }
1019
1020 case QuantifierNonGreedy: {
1021 // If we've not reached the limit, try to add one more match.
1022 if (backTrack->matchAmount < term.atom.quantityCount) {
1023 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
14957cd0
A
1024 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1025 if (result == JSRegExpMatch) {
ba379fdc
A
1026 appendParenthesesDisjunctionContext(backTrack, context);
1027 recordParenthesesMatch(term, context);
14957cd0 1028 return JSRegExpMatch;
ba379fdc 1029 }
14957cd0
A
1030
1031 resetMatches(term, context);
1032 freeParenthesesDisjunctionContext(context);
1033
1034 if (result != JSRegExpNoMatch)
1035 return result;
ba379fdc
A
1036 }
1037
1038 // Nope - okay backtrack looking for an alternative.
1039 while (backTrack->matchAmount) {
1040 ParenthesesDisjunctionContext* context = backTrack->lastContext;
14957cd0
A
1041 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1042 if (result == JSRegExpMatch) {
ba379fdc
A
1043 // successful backtrack! we're back in the game!
1044 if (backTrack->matchAmount) {
1045 context = backTrack->lastContext;
1046 recordParenthesesMatch(term, context);
1047 }
14957cd0 1048 return JSRegExpMatch;
ba379fdc 1049 }
f9bf01c6 1050
ba379fdc
A
1051 // pop a match off the stack
1052 resetMatches(term, context);
ba379fdc 1053 popParenthesesDisjunctionContext(backTrack);
f9bf01c6 1054 freeParenthesesDisjunctionContext(context);
14957cd0 1055
6fe7ccc8
A
1056 if (result != JSRegExpNoMatch)
1057 return result;
ba379fdc
A
1058 }
1059
14957cd0 1060 return JSRegExpNoMatch;
ba379fdc
A
1061 }
1062 }
1063
93a37866 1064 RELEASE_ASSERT_NOT_REACHED();
14957cd0
A
1065 return JSRegExpErrorNoMatch;
1066 }
1067
1068 bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
1069 {
1070 UNUSED_PARAM(term);
1071 unsigned matchBegin = context->matchBegin;
1072
1073 if (matchBegin) {
1074 for (matchBegin--; true; matchBegin--) {
1075 if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
1076 ++matchBegin;
1077 break;
1078 }
1079
1080 if (!matchBegin)
1081 break;
1082 }
1083 }
1084
1085 unsigned matchEnd = input.getPos();
1086
1087 for (; (matchEnd != input.end())
1088 && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
1089
1090 if (((matchBegin && term.anchors.m_bol)
1091 || ((matchEnd != input.end()) && term.anchors.m_eol))
1092 && !pattern->m_multiline)
1093 return false;
1094
1095 context->matchBegin = matchBegin;
1096 context->matchEnd = matchEnd;
1097 return true;
ba379fdc
A
1098 }
1099
1100#define MATCH_NEXT() { ++context->term; goto matchAgain; }
1101#define BACKTRACK() { --context->term; goto backtrack; }
1102#define currentTerm() (disjunction->terms[context->term])
14957cd0 1103 JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
ba379fdc 1104 {
14957cd0
A
1105 if (!--remainingMatchCount)
1106 return JSRegExpErrorHitLimit;
1107
ba379fdc
A
1108 if (btrack)
1109 BACKTRACK();
1110
1111 context->matchBegin = input.getPos();
1112 context->term = 0;
1113
1114 matchAgain:
1115 ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1116
1117 switch (currentTerm().type) {
1118 case ByteTerm::TypeSubpatternBegin:
1119 MATCH_NEXT();
1120 case ByteTerm::TypeSubpatternEnd:
1121 context->matchEnd = input.getPos();
14957cd0 1122 return JSRegExpMatch;
ba379fdc
A
1123
1124 case ByteTerm::TypeBodyAlternativeBegin:
1125 MATCH_NEXT();
1126 case ByteTerm::TypeBodyAlternativeDisjunction:
1127 case ByteTerm::TypeBodyAlternativeEnd:
1128 context->matchEnd = input.getPos();
14957cd0 1129 return JSRegExpMatch;
ba379fdc
A
1130
1131 case ByteTerm::TypeAlternativeBegin:
1132 MATCH_NEXT();
1133 case ByteTerm::TypeAlternativeDisjunction:
1134 case ByteTerm::TypeAlternativeEnd: {
1135 int offset = currentTerm().alternative.end;
1136 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1137 backTrack->offset = offset;
1138 context->term += offset;
1139 MATCH_NEXT();
1140 }
1141
1142 case ByteTerm::TypeAssertionBOL:
1143 if (matchAssertionBOL(currentTerm()))
1144 MATCH_NEXT();
1145 BACKTRACK();
1146 case ByteTerm::TypeAssertionEOL:
1147 if (matchAssertionEOL(currentTerm()))
1148 MATCH_NEXT();
1149 BACKTRACK();
1150 case ByteTerm::TypeAssertionWordBoundary:
1151 if (matchAssertionWordBoundary(currentTerm()))
1152 MATCH_NEXT();
1153 BACKTRACK();
1154
1155 case ByteTerm::TypePatternCharacterOnce:
1156 case ByteTerm::TypePatternCharacterFixed: {
1157 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
6fe7ccc8 1158 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount))
ba379fdc
A
1159 BACKTRACK();
1160 }
1161 MATCH_NEXT();
1162 }
1163 case ByteTerm::TypePatternCharacterGreedy: {
1164 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1165 unsigned matchAmount = 0;
1166 while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
6fe7ccc8 1167 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {
ba379fdc
A
1168 input.uncheckInput(1);
1169 break;
1170 }
1171 ++matchAmount;
1172 }
1173 backTrack->matchAmount = matchAmount;
1174
1175 MATCH_NEXT();
1176 }
1177 case ByteTerm::TypePatternCharacterNonGreedy: {
1178 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1179 backTrack->matchAmount = 0;
1180 MATCH_NEXT();
1181 }
1182
1183 case ByteTerm::TypePatternCasedCharacterOnce:
1184 case ByteTerm::TypePatternCasedCharacterFixed: {
1185 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
6fe7ccc8 1186 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
ba379fdc
A
1187 BACKTRACK();
1188 }
1189 MATCH_NEXT();
1190 }
1191 case ByteTerm::TypePatternCasedCharacterGreedy: {
1192 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1193 unsigned matchAmount = 0;
1194 while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
6fe7ccc8 1195 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {
ba379fdc
A
1196 input.uncheckInput(1);
1197 break;
1198 }
1199 ++matchAmount;
1200 }
1201 backTrack->matchAmount = matchAmount;
1202
1203 MATCH_NEXT();
1204 }
1205 case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1206 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1207 backTrack->matchAmount = 0;
1208 MATCH_NEXT();
1209 }
1210
1211 case ByteTerm::TypeCharacterClass:
1212 if (matchCharacterClass(currentTerm(), context))
1213 MATCH_NEXT();
1214 BACKTRACK();
1215 case ByteTerm::TypeBackReference:
1216 if (matchBackReference(currentTerm(), context))
1217 MATCH_NEXT();
1218 BACKTRACK();
14957cd0
A
1219 case ByteTerm::TypeParenthesesSubpattern: {
1220 JSRegExpResult result = matchParentheses(currentTerm(), context);
1221
1222 if (result == JSRegExpMatch) {
ba379fdc 1223 MATCH_NEXT();
14957cd0
A
1224 } else if (result != JSRegExpNoMatch)
1225 return result;
1226
ba379fdc 1227 BACKTRACK();
14957cd0 1228 }
ba379fdc
A
1229 case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1230 if (matchParenthesesOnceBegin(currentTerm(), context))
1231 MATCH_NEXT();
1232 BACKTRACK();
1233 case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1234 if (matchParenthesesOnceEnd(currentTerm(), context))
1235 MATCH_NEXT();
1236 BACKTRACK();
14957cd0
A
1237 case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1238 if (matchParenthesesTerminalBegin(currentTerm(), context))
1239 MATCH_NEXT();
1240 BACKTRACK();
1241 case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1242 if (matchParenthesesTerminalEnd(currentTerm(), context))
1243 MATCH_NEXT();
1244 BACKTRACK();
ba379fdc
A
1245 case ByteTerm::TypeParentheticalAssertionBegin:
1246 if (matchParentheticalAssertionBegin(currentTerm(), context))
1247 MATCH_NEXT();
1248 BACKTRACK();
1249 case ByteTerm::TypeParentheticalAssertionEnd:
1250 if (matchParentheticalAssertionEnd(currentTerm(), context))
1251 MATCH_NEXT();
1252 BACKTRACK();
1253
1254 case ByteTerm::TypeCheckInput:
1255 if (input.checkInput(currentTerm().checkInputCount))
1256 MATCH_NEXT();
1257 BACKTRACK();
14957cd0
A
1258
1259 case ByteTerm::TypeUncheckInput:
1260 input.uncheckInput(currentTerm().checkInputCount);
1261 MATCH_NEXT();
1262
1263 case ByteTerm::TypeDotStarEnclosure:
1264 if (matchDotStarEnclosure(currentTerm(), context))
1265 return JSRegExpMatch;
1266 BACKTRACK();
ba379fdc
A
1267 }
1268
1269 // We should never fall-through to here.
93a37866 1270 RELEASE_ASSERT_NOT_REACHED();
ba379fdc
A
1271
1272 backtrack:
1273 ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1274
1275 switch (currentTerm().type) {
1276 case ByteTerm::TypeSubpatternBegin:
14957cd0 1277 return JSRegExpNoMatch;
ba379fdc 1278 case ByteTerm::TypeSubpatternEnd:
93a37866 1279 RELEASE_ASSERT_NOT_REACHED();
ba379fdc
A
1280
1281 case ByteTerm::TypeBodyAlternativeBegin:
1282 case ByteTerm::TypeBodyAlternativeDisjunction: {
1283 int offset = currentTerm().alternative.next;
1284 context->term += offset;
1285 if (offset > 0)
1286 MATCH_NEXT();
1287
1288 if (input.atEnd())
14957cd0 1289 return JSRegExpNoMatch;
ba379fdc
A
1290
1291 input.next();
14957cd0 1292
ba379fdc 1293 context->matchBegin = input.getPos();
14957cd0
A
1294
1295 if (currentTerm().alternative.onceThrough)
1296 context->term += currentTerm().alternative.next;
1297
ba379fdc
A
1298 MATCH_NEXT();
1299 }
1300 case ByteTerm::TypeBodyAlternativeEnd:
93a37866 1301 RELEASE_ASSERT_NOT_REACHED();
ba379fdc 1302
14957cd0
A
1303 case ByteTerm::TypeAlternativeBegin:
1304 case ByteTerm::TypeAlternativeDisjunction: {
1305 int offset = currentTerm().alternative.next;
1306 context->term += offset;
1307 if (offset > 0)
1308 MATCH_NEXT();
1309 BACKTRACK();
1310 }
1311 case ByteTerm::TypeAlternativeEnd: {
1312 // We should never backtrack back into an alternative of the main body of the regex.
1313 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1314 unsigned offset = backTrack->offset;
1315 context->term -= offset;
1316 BACKTRACK();
1317 }
1318
1319 case ByteTerm::TypeAssertionBOL:
1320 case ByteTerm::TypeAssertionEOL:
1321 case ByteTerm::TypeAssertionWordBoundary:
1322 BACKTRACK();
1323
1324 case ByteTerm::TypePatternCharacterOnce:
1325 case ByteTerm::TypePatternCharacterFixed:
1326 case ByteTerm::TypePatternCharacterGreedy:
1327 case ByteTerm::TypePatternCharacterNonGreedy:
1328 if (backtrackPatternCharacter(currentTerm(), context))
1329 MATCH_NEXT();
1330 BACKTRACK();
1331 case ByteTerm::TypePatternCasedCharacterOnce:
1332 case ByteTerm::TypePatternCasedCharacterFixed:
1333 case ByteTerm::TypePatternCasedCharacterGreedy:
1334 case ByteTerm::TypePatternCasedCharacterNonGreedy:
1335 if (backtrackPatternCasedCharacter(currentTerm(), context))
1336 MATCH_NEXT();
1337 BACKTRACK();
1338 case ByteTerm::TypeCharacterClass:
1339 if (backtrackCharacterClass(currentTerm(), context))
1340 MATCH_NEXT();
1341 BACKTRACK();
1342 case ByteTerm::TypeBackReference:
1343 if (backtrackBackReference(currentTerm(), context))
1344 MATCH_NEXT();
1345 BACKTRACK();
1346 case ByteTerm::TypeParenthesesSubpattern: {
1347 JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1348
1349 if (result == JSRegExpMatch) {
1350 MATCH_NEXT();
1351 } else if (result != JSRegExpNoMatch)
1352 return result;
1353
1354 BACKTRACK();
1355 }
1356 case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1357 if (backtrackParenthesesOnceBegin(currentTerm(), context))
1358 MATCH_NEXT();
1359 BACKTRACK();
1360 case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1361 if (backtrackParenthesesOnceEnd(currentTerm(), context))
1362 MATCH_NEXT();
1363 BACKTRACK();
1364 case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1365 if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1366 MATCH_NEXT();
1367 BACKTRACK();
1368 case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1369 if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1370 MATCH_NEXT();
1371 BACKTRACK();
1372 case ByteTerm::TypeParentheticalAssertionBegin:
1373 if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1374 MATCH_NEXT();
1375 BACKTRACK();
1376 case ByteTerm::TypeParentheticalAssertionEnd:
1377 if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1378 MATCH_NEXT();
1379 BACKTRACK();
1380
1381 case ByteTerm::TypeCheckInput:
1382 input.uncheckInput(currentTerm().checkInputCount);
1383 BACKTRACK();
ba379fdc 1384
14957cd0
A
1385 case ByteTerm::TypeUncheckInput:
1386 input.checkInput(currentTerm().checkInputCount);
1387 BACKTRACK();
1388
1389 case ByteTerm::TypeDotStarEnclosure:
93a37866 1390 RELEASE_ASSERT_NOT_REACHED();
ba379fdc
A
1391 }
1392
93a37866 1393 RELEASE_ASSERT_NOT_REACHED();
14957cd0 1394 return JSRegExpErrorNoMatch;
ba379fdc
A
1395 }
1396
14957cd0 1397 JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
ba379fdc 1398 {
14957cd0
A
1399 JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1400
1401 if (result == JSRegExpMatch) {
ba379fdc 1402 while (context->matchBegin == context->matchEnd) {
14957cd0
A
1403 result = matchDisjunction(disjunction, context, true);
1404 if (result != JSRegExpMatch)
1405 return result;
ba379fdc 1406 }
14957cd0 1407 return JSRegExpMatch;
ba379fdc
A
1408 }
1409
14957cd0 1410 return result;
ba379fdc
A
1411 }
1412
6fe7ccc8 1413 unsigned interpret()
ba379fdc 1414 {
6fe7ccc8
A
1415 if (!input.isAvailableInput(0))
1416 return offsetNoMatch;
1417
1418 for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)
1419 output[i << 1] = offsetNoMatch;
1420
14957cd0 1421 allocatorPool = pattern->m_allocator->startAllocator();
93a37866 1422 RELEASE_ASSERT(allocatorPool);
14957cd0 1423
ba379fdc
A
1424 DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1425
14957cd0
A
1426 JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
1427 if (result == JSRegExpMatch) {
ba379fdc
A
1428 output[0] = context->matchBegin;
1429 output[1] = context->matchEnd;
1430 }
1431
1432 freeDisjunctionContext(context);
1433
14957cd0
A
1434 pattern->m_allocator->stopAllocator();
1435
6fe7ccc8 1436 ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));
ba379fdc
A
1437 return output[0];
1438 }
1439
6fe7ccc8 1440 Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
ba379fdc
A
1441 : pattern(pattern)
1442 , output(output)
6fe7ccc8 1443 , input(input, start, length)
14957cd0
A
1444 , allocatorPool(0)
1445 , remainingMatchCount(matchLimit)
ba379fdc
A
1446 {
1447 }
1448
1449private:
14957cd0 1450 BytecodePattern* pattern;
6fe7ccc8 1451 unsigned* output;
ba379fdc 1452 InputStream input;
14957cd0
A
1453 BumpPointerPool* allocatorPool;
1454 unsigned remainingMatchCount;
ba379fdc
A
1455};
1456
ba379fdc
A
1457class ByteCompiler {
1458 struct ParenthesesStackEntry {
1459 unsigned beginTerm;
1460 unsigned savedAlternativeIndex;
1461 ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
1462 : beginTerm(beginTerm)
1463 , savedAlternativeIndex(savedAlternativeIndex)
1464 {
1465 }
1466 };
1467
1468public:
14957cd0 1469 ByteCompiler(YarrPattern& pattern)
ba379fdc
A
1470 : m_pattern(pattern)
1471 {
f9bf01c6 1472 m_currentAlternativeIndex = 0;
ba379fdc 1473 }
f9bf01c6 1474
14957cd0 1475 PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
ba379fdc 1476 {
14957cd0 1477 regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
ba379fdc
A
1478 emitDisjunction(m_pattern.m_body);
1479 regexEnd();
1480
14957cd0 1481 return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator));
ba379fdc 1482 }
f9bf01c6 1483
ba379fdc
A
1484 void checkInput(unsigned count)
1485 {
f9bf01c6 1486 m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
ba379fdc
A
1487 }
1488
14957cd0
A
1489 void uncheckInput(unsigned count)
1490 {
1491 m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
1492 }
1493
6fe7ccc8 1494 void assertionBOL(unsigned inputPosition)
ba379fdc 1495 {
f9bf01c6 1496 m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
ba379fdc
A
1497 }
1498
6fe7ccc8 1499 void assertionEOL(unsigned inputPosition)
ba379fdc 1500 {
f9bf01c6 1501 m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
ba379fdc
A
1502 }
1503
6fe7ccc8 1504 void assertionWordBoundary(bool invert, unsigned inputPosition)
ba379fdc 1505 {
f9bf01c6 1506 m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
ba379fdc
A
1507 }
1508
6fe7ccc8 1509 void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
ba379fdc
A
1510 {
1511 if (m_pattern.m_ignoreCase) {
81345200
A
1512 ASSERT(u_tolower(ch) <= 0xFFFF);
1513 ASSERT(u_toupper(ch) <= 0xFFFF);
1514
1515 UChar lo = u_tolower(ch);
1516 UChar hi = u_toupper(ch);
f9bf01c6 1517
ba379fdc 1518 if (lo != hi) {
f9bf01c6 1519 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
ba379fdc
A
1520 return;
1521 }
1522 }
1523
f9bf01c6 1524 m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
ba379fdc 1525 }
f9bf01c6 1526
6fe7ccc8 1527 void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
ba379fdc 1528 {
f9bf01c6 1529 m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
ba379fdc 1530
1df5f87f 1531 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
f9bf01c6
A
1532 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1533 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
ba379fdc
A
1534 }
1535
6fe7ccc8 1536 void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
ba379fdc
A
1537 {
1538 ASSERT(subpatternId);
1539
f9bf01c6 1540 m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
ba379fdc 1541
1df5f87f 1542 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
f9bf01c6
A
1543 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1544 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
ba379fdc
A
1545 }
1546
6fe7ccc8 1547 void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
14957cd0
A
1548 {
1549 int beginTerm = m_bodyDisjunction->terms.size();
1550
1551 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1552 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1553 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1554 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1555
1556 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1557 m_currentAlternativeIndex = beginTerm + 1;
1558 }
1559
6fe7ccc8 1560 void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
14957cd0
A
1561 {
1562 int beginTerm = m_bodyDisjunction->terms.size();
1563
1564 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1565 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1566 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1567 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1568
1569 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1570 m_currentAlternativeIndex = beginTerm + 1;
1571 }
1572
6fe7ccc8 1573 void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
ba379fdc 1574 {
14957cd0
A
1575 // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1576 // then fix this up at the end! - simplifying this should make it much clearer.
1577 // https://bugs.webkit.org/show_bug.cgi?id=50136
1578
f9bf01c6 1579 int beginTerm = m_bodyDisjunction->terms.size();
ba379fdc 1580
14957cd0 1581 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
f9bf01c6
A
1582 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1583 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1584 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
ba379fdc 1585
f9bf01c6
A
1586 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1587 m_currentAlternativeIndex = beginTerm + 1;
ba379fdc
A
1588 }
1589
1590 void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1591 {
f9bf01c6 1592 int beginTerm = m_bodyDisjunction->terms.size();
ba379fdc 1593
14957cd0 1594 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
f9bf01c6
A
1595 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1596 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1597 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
ba379fdc 1598
f9bf01c6
A
1599 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1600 m_currentAlternativeIndex = beginTerm + 1;
ba379fdc
A
1601 }
1602
6fe7ccc8 1603 void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
14957cd0
A
1604 {
1605 unsigned beginTerm = popParenthesesStack();
1606 closeAlternative(beginTerm + 1);
1607 unsigned endTerm = m_bodyDisjunction->terms.size();
1608
1609 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1610
1611 bool invert = m_bodyDisjunction->terms[beginTerm].invert();
1612 unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1613
1614 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1615 m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1616 m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1617 m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1618
1df5f87f 1619 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
14957cd0 1620 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1df5f87f 1621 m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
14957cd0
A
1622 m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1623 }
1624
1625 void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
1626 {
1627 m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
1628 }
1629
ba379fdc
A
1630 unsigned popParenthesesStack()
1631 {
1632 ASSERT(m_parenthesesStack.size());
1633 int stackEnd = m_parenthesesStack.size() - 1;
1634 unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
f9bf01c6 1635 m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
ba379fdc
A
1636 m_parenthesesStack.shrink(stackEnd);
1637
f9bf01c6
A
1638 ASSERT(beginTerm < m_bodyDisjunction->terms.size());
1639 ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
1640
ba379fdc
A
1641 return beginTerm;
1642 }
1643
1644#ifndef NDEBUG
1645 void dumpDisjunction(ByteDisjunction* disjunction)
1646 {
93a37866 1647 dataLogF("ByteDisjunction(%p):\n\t", disjunction);
ba379fdc 1648 for (unsigned i = 0; i < disjunction->terms.size(); ++i)
93a37866
A
1649 dataLogF("{ %d } ", disjunction->terms[i].type);
1650 dataLogF("\n");
ba379fdc
A
1651 }
1652#endif
1653
1654 void closeAlternative(int beginTerm)
1655 {
1656 int origBeginTerm = beginTerm;
f9bf01c6
A
1657 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1658 int endIndex = m_bodyDisjunction->terms.size();
ba379fdc 1659
f9bf01c6 1660 unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
ba379fdc 1661
f9bf01c6
A
1662 if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
1663 m_bodyDisjunction->terms.remove(beginTerm);
ba379fdc 1664 else {
f9bf01c6
A
1665 while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1666 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1667 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1668 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1669 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
ba379fdc 1670 }
ba379fdc 1671
f9bf01c6
A
1672 m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1673
1674 m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
1675 m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
ba379fdc
A
1676 }
1677 }
1678
1679 void closeBodyAlternative()
1680 {
1681 int beginTerm = 0;
1682 int origBeginTerm = 0;
f9bf01c6
A
1683 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1684 int endIndex = m_bodyDisjunction->terms.size();
ba379fdc 1685
f9bf01c6 1686 unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
ba379fdc 1687
f9bf01c6
A
1688 while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1689 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1690 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1691 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1692 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
ba379fdc 1693 }
ba379fdc 1694
f9bf01c6
A
1695 m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1696
1697 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
1698 m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
ba379fdc
A
1699 }
1700
1df5f87f 1701 void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
14957cd0
A
1702 {
1703 unsigned beginTerm = popParenthesesStack();
1704 closeAlternative(beginTerm + 1);
1705 unsigned endTerm = m_bodyDisjunction->terms.size();
1706
1707 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1708
1709 ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
1710
1711 bool capture = parenthesesBegin.capture();
1712 unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1713
1714 unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
93a37866
A
1715 OwnPtr<ByteDisjunction> parenthesesDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
1716
1717 unsigned firstTermInParentheses = beginTerm + 1;
1718 parenthesesDisjunction->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + 2);
14957cd0
A
1719
1720 parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
93a37866 1721 for (unsigned termInParentheses = firstTermInParentheses; termInParentheses < endTerm; ++termInParentheses)
14957cd0
A
1722 parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
1723 parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
1724
1725 m_bodyDisjunction->terms.shrink(beginTerm);
1726
93a37866
A
1727 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition));
1728 m_allParenthesesInfo.append(parenthesesDisjunction.release());
14957cd0 1729
1df5f87f 1730 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
14957cd0
A
1731 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1732 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1733 }
1734
1df5f87f 1735 void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
ba379fdc
A
1736 {
1737 unsigned beginTerm = popParenthesesStack();
1738 closeAlternative(beginTerm + 1);
f9bf01c6 1739 unsigned endTerm = m_bodyDisjunction->terms.size();
ba379fdc 1740
14957cd0
A
1741 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1742
1743 bool capture = m_bodyDisjunction->terms[beginTerm].capture();
f9bf01c6 1744 unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
ba379fdc 1745
14957cd0 1746 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
f9bf01c6
A
1747 m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1748 m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1749 m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
ba379fdc 1750
1df5f87f 1751 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
14957cd0 1752 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1df5f87f 1753 m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
14957cd0
A
1754 m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1755 }
ba379fdc 1756
1df5f87f 1757 void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
14957cd0
A
1758 {
1759 unsigned beginTerm = popParenthesesStack();
1760 closeAlternative(beginTerm + 1);
1761 unsigned endTerm = m_bodyDisjunction->terms.size();
ba379fdc 1762
14957cd0 1763 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
ba379fdc 1764
14957cd0
A
1765 bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1766 unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
ba379fdc 1767
14957cd0
A
1768 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1769 m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1770 m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1771 m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
ba379fdc 1772
1df5f87f 1773 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
14957cd0 1774 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1df5f87f 1775 m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
14957cd0 1776 m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
ba379fdc
A
1777 }
1778
14957cd0 1779 void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
ba379fdc 1780 {
14957cd0
A
1781 m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
1782 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
f9bf01c6
A
1783 m_bodyDisjunction->terms[0].frameLocation = 0;
1784 m_currentAlternativeIndex = 0;
ba379fdc
A
1785 }
1786
1787 void regexEnd()
1788 {
1789 closeBodyAlternative();
1790 }
1791
14957cd0 1792 void alternativeBodyDisjunction(bool onceThrough)
ba379fdc 1793 {
f9bf01c6
A
1794 int newAlternativeIndex = m_bodyDisjunction->terms.size();
1795 m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
14957cd0 1796 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
ba379fdc 1797
f9bf01c6 1798 m_currentAlternativeIndex = newAlternativeIndex;
ba379fdc
A
1799 }
1800
f9bf01c6 1801 void alternativeDisjunction()
ba379fdc 1802 {
f9bf01c6
A
1803 int newAlternativeIndex = m_bodyDisjunction->terms.size();
1804 m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1805 m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
ba379fdc 1806
f9bf01c6 1807 m_currentAlternativeIndex = newAlternativeIndex;
ba379fdc
A
1808 }
1809
1810 void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
1811 {
1812 for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
1813 unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
f9bf01c6 1814
93a37866 1815 PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
14957cd0 1816
ba379fdc
A
1817 if (alt) {
1818 if (disjunction == m_pattern.m_body)
14957cd0 1819 alternativeBodyDisjunction(alternative->onceThrough());
ba379fdc 1820 else
f9bf01c6 1821 alternativeDisjunction();
ba379fdc
A
1822 }
1823
ba379fdc 1824 unsigned minimumSize = alternative->m_minimumSize;
ba379fdc
A
1825 ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
1826 unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
14957cd0
A
1827
1828 if (countToCheck) {
ba379fdc 1829 checkInput(countToCheck);
14957cd0
A
1830 currentCountAlreadyChecked += countToCheck;
1831 }
ba379fdc
A
1832
1833 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
1834 PatternTerm& term = alternative->m_terms[i];
1835
1836 switch (term.type) {
1837 case PatternTerm::TypeAssertionBOL:
6fe7ccc8 1838 assertionBOL(currentCountAlreadyChecked - term.inputPosition);
ba379fdc
A
1839 break;
1840
1841 case PatternTerm::TypeAssertionEOL:
6fe7ccc8 1842 assertionEOL(currentCountAlreadyChecked - term.inputPosition);
ba379fdc
A
1843 break;
1844
1845 case PatternTerm::TypeAssertionWordBoundary:
6fe7ccc8 1846 assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);
ba379fdc
A
1847 break;
1848
1849 case PatternTerm::TypePatternCharacter:
6fe7ccc8 1850 atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
ba379fdc
A
1851 break;
1852
1853 case PatternTerm::TypeCharacterClass:
6fe7ccc8 1854 atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
ba379fdc
A
1855 break;
1856
1857 case PatternTerm::TypeBackReference:
6fe7ccc8 1858 atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
ba379fdc
A
1859 break;
1860
1861 case PatternTerm::TypeForwardReference:
1862 break;
1863
1864 case PatternTerm::TypeParenthesesSubpattern: {
1865 unsigned disjunctionAlreadyCheckedCount = 0;
14957cd0
A
1866 if (term.quantityCount == 1 && !term.parentheses.isCopy) {
1867 unsigned alternativeFrameLocation = term.frameLocation;
1868 // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
1869 if (term.quantityType == QuantifierFixedCount)
ba379fdc 1870 disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
14957cd0
A
1871 else
1872 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1873 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
6fe7ccc8 1874 atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
14957cd0
A
1875 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1876 atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1877 } else if (term.parentheses.isTerminal) {
1878 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
6fe7ccc8 1879 atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
14957cd0
A
1880 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1881 atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
ba379fdc
A
1882 } else {
1883 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
6fe7ccc8 1884 atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0);
ba379fdc 1885 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
14957cd0 1886 atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
ba379fdc
A
1887 }
1888 break;
1889 }
1890
1891 case PatternTerm::TypeParentheticalAssertion: {
14957cd0
A
1892 unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
1893
1894 ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
1895 unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition);
1896 unsigned uncheckAmount = 0;
1897 if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
1898 uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
1899 uncheckInput(uncheckAmount);
1900 currentCountAlreadyChecked -= uncheckAmount;
1901 }
f9bf01c6 1902
14957cd0
A
1903 atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
1904 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
1905 atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
1906 if (uncheckAmount) {
1907 checkInput(uncheckAmount);
1908 currentCountAlreadyChecked += uncheckAmount;
1909 }
ba379fdc
A
1910 break;
1911 }
14957cd0
A
1912
1913 case PatternTerm::TypeDotStarEnclosure:
1914 assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
1915 break;
ba379fdc
A
1916 }
1917 }
1918 }
1919 }
1920
1921private:
14957cd0
A
1922 YarrPattern& m_pattern;
1923 OwnPtr<ByteDisjunction> m_bodyDisjunction;
f9bf01c6 1924 unsigned m_currentAlternativeIndex;
ba379fdc 1925 Vector<ParenthesesStackEntry> m_parenthesesStack;
81345200 1926 Vector<OwnPtr<ByteDisjunction>> m_allParenthesesInfo;
ba379fdc
A
1927};
1928
14957cd0 1929PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
ba379fdc 1930{
14957cd0 1931 return ByteCompiler(pattern).compile(allocator);
ba379fdc
A
1932}
1933
93a37866 1934unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
6fe7ccc8
A
1935{
1936 if (input.is8Bit())
1937 return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret();
1938 return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret();
1939}
1940
1941unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
1942{
1943 return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
1944}
1945
1946unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
ba379fdc 1947{
6fe7ccc8 1948 return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
ba379fdc
A
1949}
1950
6fe7ccc8
A
1951// These should be the same for both UChar & LChar.
1952COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
1953COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
1954COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
1955COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
1956COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
1957COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
1958COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
ba379fdc
A
1959
1960
1961} }