]> git.saurik.com Git - apple/javascriptcore.git/blob - yarr/RegexParser.h
JavaScriptCore-554.1.tar.gz
[apple/javascriptcore.git] / yarr / RegexParser.h
1 /*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #ifndef RegexParser_h
27 #define RegexParser_h
28
29 #include <wtf/Platform.h>
30
31 #if ENABLE(YARR)
32
33 #include <UString.h>
34 #include <wtf/ASCIICType.h>
35 #include <wtf/unicode/Unicode.h>
36 #include <limits.h>
37
38 namespace JSC { namespace Yarr {
39
40 enum BuiltInCharacterClassID {
41 DigitClassID,
42 SpaceClassID,
43 WordClassID,
44 NewlineClassID,
45 };
46
47 // The Parser class should not be used directly - only via the Yarr::parse() method.
48 template<class Delegate>
49 class Parser {
50 private:
51 template<class FriendDelegate>
52 friend const char* parse(FriendDelegate& delegate, const UString& pattern, unsigned backReferenceLimit);
53
54 enum ErrorCode {
55 NoError,
56 PatternTooLarge,
57 QuantifierOutOfOrder,
58 QuantifierWithoutAtom,
59 MissingParentheses,
60 ParenthesesUnmatched,
61 ParenthesesTypeInvalid,
62 CharacterClassUnmatched,
63 CharacterClassOutOfOrder,
64 EscapeUnterminated,
65 NumberOfErrorCodes
66 };
67
68 /*
69 * CharacterClassParserDelegate:
70 *
71 * The class CharacterClassParserDelegate is used in the parsing of character
72 * classes. This class handles detection of character ranges. This class
73 * implements enough of the delegate interface such that it can be passed to
74 * parseEscape() as an EscapeDelegate. This allows parseEscape() to be reused
75 * to perform the parsing of escape characters in character sets.
76 */
77 class CharacterClassParserDelegate {
78 public:
79 CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err)
80 : m_delegate(delegate)
81 , m_err(err)
82 , m_state(empty)
83 {
84 }
85
86 /*
87 * begin():
88 *
89 * Called at beginning of construction.
90 */
91 void begin(bool invert)
92 {
93 m_delegate.atomCharacterClassBegin(invert);
94 }
95
96 /*
97 * atomPatternCharacterUnescaped():
98 *
99 * This method is called directly from parseCharacterClass(), to report a new
100 * pattern character token. This method differs from atomPatternCharacter(),
101 * which will be called from parseEscape(), since a hypen provided via this
102 * method may be indicating a character range, but a hyphen parsed by
103 * parseEscape() cannot be interpreted as doing so.
104 */
105 void atomPatternCharacterUnescaped(UChar ch)
106 {
107 switch (m_state) {
108 case empty:
109 m_character = ch;
110 m_state = cachedCharacter;
111 break;
112
113 case cachedCharacter:
114 if (ch == '-')
115 m_state = cachedCharacterHyphen;
116 else {
117 m_delegate.atomCharacterClassAtom(m_character);
118 m_character = ch;
119 }
120 break;
121
122 case cachedCharacterHyphen:
123 if (ch >= m_character)
124 m_delegate.atomCharacterClassRange(m_character, ch);
125 else
126 m_err = CharacterClassOutOfOrder;
127 m_state = empty;
128 }
129 }
130
131 /*
132 * atomPatternCharacter():
133 *
134 * Adds a pattern character, called by parseEscape(), as such will not
135 * interpret a hyphen as indicating a character range.
136 */
137 void atomPatternCharacter(UChar ch)
138 {
139 // Flush if a character is already pending to prevent the
140 // hyphen from begin interpreted as indicating a range.
141 if((ch == '-') && (m_state == cachedCharacter))
142 flush();
143
144 atomPatternCharacterUnescaped(ch);
145 }
146
147 /*
148 * atomBuiltInCharacterClass():
149 *
150 * Adds a built-in character class, called by parseEscape().
151 */
152 void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
153 {
154 flush();
155 m_delegate.atomCharacterClassBuiltIn(classID, invert);
156 }
157
158 /*
159 * end():
160 *
161 * Called at end of construction.
162 */
163 void end()
164 {
165 flush();
166 m_delegate.atomCharacterClassEnd();
167 }
168
169 // parseEscape() should never call these delegate methods when
170 // invoked with inCharacterClass set.
171 void assertionWordBoundary(bool) { ASSERT_NOT_REACHED(); }
172 void atomBackReference(unsigned) { ASSERT_NOT_REACHED(); }
173
174 private:
175 void flush()
176 {
177 if (m_state != empty) // either cachedCharacter or cachedCharacterHyphen
178 m_delegate.atomCharacterClassAtom(m_character);
179 if (m_state == cachedCharacterHyphen)
180 m_delegate.atomCharacterClassAtom('-');
181 m_state = empty;
182 }
183
184 Delegate& m_delegate;
185 ErrorCode& m_err;
186 enum CharacterClassConstructionState {
187 empty,
188 cachedCharacter,
189 cachedCharacterHyphen,
190 } m_state;
191 UChar m_character;
192 };
193
194 Parser(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit)
195 : m_delegate(delegate)
196 , m_backReferenceLimit(backReferenceLimit)
197 , m_err(NoError)
198 , m_data(pattern.data())
199 , m_size(pattern.size())
200 , m_index(0)
201 , m_parenthesesNestingDepth(0)
202 {
203 }
204
205 /*
206 * parseEscape():
207 *
208 * Helper for parseTokens() AND parseCharacterClass().
209 * Unlike the other parser methods, this function does not report tokens
210 * directly to the member delegate (m_delegate), instead tokens are
211 * emitted to the delegate provided as an argument. In the case of atom
212 * escapes, parseTokens() will call parseEscape() passing m_delegate as
213 * an argument, and as such the escape will be reported to the delegate.
214 *
215 * However this method may also be used by parseCharacterClass(), in which
216 * case a CharacterClassParserDelegate will be passed as the delegate that
217 * tokens should be added to. A boolean flag is also provided to indicate
218 * whether that an escape in a CharacterClass is being parsed (some parsing
219 * rules change in this context).
220 *
221 * The boolean value returned by this method indicates whether the token
222 * parsed was an atom (outside of a characted class \b and \B will be
223 * interpreted as assertions).
224 */
225 template<bool inCharacterClass, class EscapeDelegate>
226 bool parseEscape(EscapeDelegate& delegate)
227 {
228 ASSERT(!m_err);
229 ASSERT(peek() == '\\');
230 consume();
231
232 if (atEndOfPattern()) {
233 m_err = EscapeUnterminated;
234 return false;
235 }
236
237 switch (peek()) {
238 // Assertions
239 case 'b':
240 consume();
241 if (inCharacterClass)
242 delegate.atomPatternCharacter('\b');
243 else {
244 delegate.assertionWordBoundary(false);
245 return false;
246 }
247 break;
248 case 'B':
249 consume();
250 if (inCharacterClass)
251 delegate.atomPatternCharacter('B');
252 else {
253 delegate.assertionWordBoundary(true);
254 return false;
255 }
256 break;
257
258 // CharacterClassEscape
259 case 'd':
260 consume();
261 delegate.atomBuiltInCharacterClass(DigitClassID, false);
262 break;
263 case 's':
264 consume();
265 delegate.atomBuiltInCharacterClass(SpaceClassID, false);
266 break;
267 case 'w':
268 consume();
269 delegate.atomBuiltInCharacterClass(WordClassID, false);
270 break;
271 case 'D':
272 consume();
273 delegate.atomBuiltInCharacterClass(DigitClassID, true);
274 break;
275 case 'S':
276 consume();
277 delegate.atomBuiltInCharacterClass(SpaceClassID, true);
278 break;
279 case 'W':
280 consume();
281 delegate.atomBuiltInCharacterClass(WordClassID, true);
282 break;
283
284 // DecimalEscape
285 case '1':
286 case '2':
287 case '3':
288 case '4':
289 case '5':
290 case '6':
291 case '7':
292 case '8':
293 case '9': {
294 // To match Firefox, we parse an invalid backreference in the range [1-7] as an octal escape.
295 // First, try to parse this as backreference.
296 if (!inCharacterClass) {
297 ParseState state = saveState();
298
299 unsigned backReference = consumeNumber();
300 if (backReference <= m_backReferenceLimit) {
301 delegate.atomBackReference(backReference);
302 break;
303 }
304
305 restoreState(state);
306 }
307
308 // Not a backreference, and not octal.
309 if (peek() >= '8') {
310 delegate.atomPatternCharacter('\\');
311 break;
312 }
313
314 // Fall-through to handle this as an octal escape.
315 }
316
317 // Octal escape
318 case '0':
319 delegate.atomPatternCharacter(consumeOctal());
320 break;
321
322 // ControlEscape
323 case 'f':
324 consume();
325 delegate.atomPatternCharacter('\f');
326 break;
327 case 'n':
328 consume();
329 delegate.atomPatternCharacter('\n');
330 break;
331 case 'r':
332 consume();
333 delegate.atomPatternCharacter('\r');
334 break;
335 case 't':
336 consume();
337 delegate.atomPatternCharacter('\t');
338 break;
339 case 'v':
340 consume();
341 delegate.atomPatternCharacter('\v');
342 break;
343
344 // ControlLetter
345 case 'c': {
346 ParseState state = saveState();
347 consume();
348 if (!atEndOfPattern()) {
349 int control = consume();
350
351 // To match Firefox, inside a character class, we also accept numbers and '_' as control characters.
352 if (inCharacterClass ? WTF::isASCIIAlphanumeric(control) || (control == '_') : WTF::isASCIIAlpha(control)) {
353 delegate.atomPatternCharacter(control & 0x1f);
354 break;
355 }
356 }
357 restoreState(state);
358 delegate.atomPatternCharacter('\\');
359 break;
360 }
361
362 // HexEscape
363 case 'x': {
364 consume();
365 int x = tryConsumeHex(2);
366 if (x == -1)
367 delegate.atomPatternCharacter('x');
368 else
369 delegate.atomPatternCharacter(x);
370 break;
371 }
372
373 // UnicodeEscape
374 case 'u': {
375 consume();
376 int u = tryConsumeHex(4);
377 if (u == -1)
378 delegate.atomPatternCharacter('u');
379 else
380 delegate.atomPatternCharacter(u);
381 break;
382 }
383
384 // IdentityEscape
385 default:
386 delegate.atomPatternCharacter(consume());
387 }
388
389 return true;
390 }
391
392 /*
393 * parseAtomEscape(), parseCharacterClassEscape():
394 *
395 * These methods alias to parseEscape().
396 */
397 bool parseAtomEscape()
398 {
399 return parseEscape<false>(m_delegate);
400 }
401 void parseCharacterClassEscape(CharacterClassParserDelegate& delegate)
402 {
403 parseEscape<true>(delegate);
404 }
405
406 /*
407 * parseCharacterClass():
408 *
409 * Helper for parseTokens(); calls dirctly and indirectly (via parseCharacterClassEscape)
410 * to an instance of CharacterClassParserDelegate, to describe the character class to the
411 * delegate.
412 */
413 void parseCharacterClass()
414 {
415 ASSERT(!m_err);
416 ASSERT(peek() == '[');
417 consume();
418
419 CharacterClassParserDelegate characterClassConstructor(m_delegate, m_err);
420
421 characterClassConstructor.begin(tryConsume('^'));
422
423 while (!atEndOfPattern()) {
424 switch (peek()) {
425 case ']':
426 consume();
427 characterClassConstructor.end();
428 return;
429
430 case '\\':
431 parseCharacterClassEscape(characterClassConstructor);
432 break;
433
434 default:
435 characterClassConstructor.atomPatternCharacterUnescaped(consume());
436 }
437
438 if (m_err)
439 return;
440 }
441
442 m_err = CharacterClassUnmatched;
443 }
444
445 /*
446 * parseParenthesesBegin():
447 *
448 * Helper for parseTokens(); checks for parentheses types other than regular capturing subpatterns.
449 */
450 void parseParenthesesBegin()
451 {
452 ASSERT(!m_err);
453 ASSERT(peek() == '(');
454 consume();
455
456 if (tryConsume('?')) {
457 if (atEndOfPattern()) {
458 m_err = ParenthesesTypeInvalid;
459 return;
460 }
461
462 switch (consume()) {
463 case ':':
464 m_delegate.atomParenthesesSubpatternBegin(false);
465 break;
466
467 case '=':
468 m_delegate.atomParentheticalAssertionBegin();
469 break;
470
471 case '!':
472 m_delegate.atomParentheticalAssertionBegin(true);
473 break;
474
475 default:
476 m_err = ParenthesesTypeInvalid;
477 }
478 } else
479 m_delegate.atomParenthesesSubpatternBegin();
480
481 ++m_parenthesesNestingDepth;
482 }
483
484 /*
485 * parseParenthesesEnd():
486 *
487 * Helper for parseTokens(); checks for parse errors (due to unmatched parentheses).
488 */
489 void parseParenthesesEnd()
490 {
491 ASSERT(!m_err);
492 ASSERT(peek() == ')');
493 consume();
494
495 if (m_parenthesesNestingDepth > 0)
496 m_delegate.atomParenthesesEnd();
497 else
498 m_err = ParenthesesUnmatched;
499
500 --m_parenthesesNestingDepth;
501 }
502
503 /*
504 * parseQuantifier():
505 *
506 * Helper for parseTokens(); checks for parse errors and non-greedy quantifiers.
507 */
508 void parseQuantifier(bool lastTokenWasAnAtom, unsigned min, unsigned max)
509 {
510 ASSERT(!m_err);
511 ASSERT(min <= max);
512
513 if (lastTokenWasAnAtom)
514 m_delegate.quantifyAtom(min, max, !tryConsume('?'));
515 else
516 m_err = QuantifierWithoutAtom;
517 }
518
519 /*
520 * parseTokens():
521 *
522 * This method loops over the input pattern reporting tokens to the delegate.
523 * The method returns when a parse error is detected, or the end of the pattern
524 * is reached. One piece of state is tracked around the loop, which is whether
525 * the last token passed to the delegate was an atom (this is necessary to detect
526 * a parse error when a quantifier provided without an atom to quantify).
527 */
528 void parseTokens()
529 {
530 bool lastTokenWasAnAtom = false;
531
532 while (!atEndOfPattern()) {
533 switch (peek()) {
534 case '|':
535 consume();
536 m_delegate.disjunction();
537 lastTokenWasAnAtom = false;
538 break;
539
540 case '(':
541 parseParenthesesBegin();
542 lastTokenWasAnAtom = false;
543 break;
544
545 case ')':
546 parseParenthesesEnd();
547 lastTokenWasAnAtom = true;
548 break;
549
550 case '^':
551 consume();
552 m_delegate.assertionBOL();
553 lastTokenWasAnAtom = false;
554 break;
555
556 case '$':
557 consume();
558 m_delegate.assertionEOL();
559 lastTokenWasAnAtom = false;
560 break;
561
562 case '.':
563 consume();
564 m_delegate.atomBuiltInCharacterClass(NewlineClassID, true);
565 lastTokenWasAnAtom = true;
566 break;
567
568 case '[':
569 parseCharacterClass();
570 lastTokenWasAnAtom = true;
571 break;
572
573 case '\\':
574 lastTokenWasAnAtom = parseAtomEscape();
575 break;
576
577 case '*':
578 consume();
579 parseQuantifier(lastTokenWasAnAtom, 0, UINT_MAX);
580 lastTokenWasAnAtom = false;
581 break;
582
583 case '+':
584 consume();
585 parseQuantifier(lastTokenWasAnAtom, 1, UINT_MAX);
586 lastTokenWasAnAtom = false;
587 break;
588
589 case '?':
590 consume();
591 parseQuantifier(lastTokenWasAnAtom, 0, 1);
592 lastTokenWasAnAtom = false;
593 break;
594
595 case '{': {
596 ParseState state = saveState();
597
598 consume();
599 if (peekIsDigit()) {
600 unsigned min = consumeNumber();
601 unsigned max = min;
602
603 if (tryConsume(','))
604 max = peekIsDigit() ? consumeNumber() : UINT_MAX;
605
606 if (tryConsume('}')) {
607 if (min <= max)
608 parseQuantifier(lastTokenWasAnAtom, min, max);
609 else
610 m_err = QuantifierOutOfOrder;
611 lastTokenWasAnAtom = false;
612 break;
613 }
614 }
615
616 restoreState(state);
617 } // if we did not find a complete quantifer, fall through to the default case.
618
619 default:
620 m_delegate.atomPatternCharacter(consume());
621 lastTokenWasAnAtom = true;
622 }
623
624 if (m_err)
625 return;
626 }
627
628 if (m_parenthesesNestingDepth > 0)
629 m_err = MissingParentheses;
630 }
631
632 /*
633 * parse():
634 *
635 * This method calls regexBegin(), calls parseTokens() to parse over the input
636 * patterns, calls regexEnd() or regexError() as appropriate, and converts any
637 * error code to a const char* for a result.
638 */
639 const char* parse()
640 {
641 m_delegate.regexBegin();
642
643 if (m_size > MAX_PATTERN_SIZE)
644 m_err = PatternTooLarge;
645 else
646 parseTokens();
647 ASSERT(atEndOfPattern() || m_err);
648
649 if (m_err)
650 m_delegate.regexError();
651 else
652 m_delegate.regexEnd();
653
654 // The order of this array must match the ErrorCode enum.
655 static const char* errorMessages[NumberOfErrorCodes] = {
656 0, // NoError
657 "regular expression too large",
658 "numbers out of order in {} quantifier",
659 "nothing to repeat",
660 "missing )",
661 "unmatched parentheses",
662 "unrecognized character after (?",
663 "missing terminating ] for character class",
664 "range out of order in character class",
665 "\\ at end of pattern"
666 };
667
668 return errorMessages[m_err];
669 }
670
671
672 // Misc helper functions:
673
674 typedef unsigned ParseState;
675
676 ParseState saveState()
677 {
678 return m_index;
679 }
680
681 void restoreState(ParseState state)
682 {
683 m_index = state;
684 }
685
686 bool atEndOfPattern()
687 {
688 ASSERT(m_index <= m_size);
689 return m_index == m_size;
690 }
691
692 int peek()
693 {
694 ASSERT(m_index < m_size);
695 return m_data[m_index];
696 }
697
698 bool peekIsDigit()
699 {
700 return !atEndOfPattern() && WTF::isASCIIDigit(peek());
701 }
702
703 unsigned peekDigit()
704 {
705 ASSERT(peekIsDigit());
706 return peek() - '0';
707 }
708
709 int consume()
710 {
711 ASSERT(m_index < m_size);
712 return m_data[m_index++];
713 }
714
715 unsigned consumeDigit()
716 {
717 ASSERT(peekIsDigit());
718 return consume() - '0';
719 }
720
721 unsigned consumeNumber()
722 {
723 unsigned n = consumeDigit();
724 // check for overflow.
725 for (unsigned newValue; peekIsDigit() && ((newValue = n * 10 + peekDigit()) >= n); ) {
726 n = newValue;
727 consume();
728 }
729 return n;
730 }
731
732 unsigned consumeOctal()
733 {
734 ASSERT(WTF::isASCIIOctalDigit(peek()));
735
736 unsigned n = consumeDigit();
737 while (n < 32 && !atEndOfPattern() && WTF::isASCIIOctalDigit(peek()))
738 n = n * 8 + consumeDigit();
739 return n;
740 }
741
742 bool tryConsume(UChar ch)
743 {
744 if (atEndOfPattern() || (m_data[m_index] != ch))
745 return false;
746 ++m_index;
747 return true;
748 }
749
750 int tryConsumeHex(int count)
751 {
752 ParseState state = saveState();
753
754 int n = 0;
755 while (count--) {
756 if (atEndOfPattern() || !WTF::isASCIIHexDigit(peek())) {
757 restoreState(state);
758 return -1;
759 }
760 n = (n << 4) | WTF::toASCIIHexValue(consume());
761 }
762 return n;
763 }
764
765 Delegate& m_delegate;
766 unsigned m_backReferenceLimit;
767 ErrorCode m_err;
768 const UChar* m_data;
769 unsigned m_size;
770 unsigned m_index;
771 unsigned m_parenthesesNestingDepth;
772
773 // Derived by empirical testing of compile time in PCRE and WREC.
774 static const unsigned MAX_PATTERN_SIZE = 1024 * 1024;
775 };
776
777 /*
778 * Yarr::parse():
779 *
780 * The parse method is passed a pattern to be parsed and a delegate upon which
781 * callbacks will be made to record the parsed tokens forming the regex.
782 * Yarr::parse() returns null on success, or a const C string providing an error
783 * message where a parse error occurs.
784 *
785 * The Delegate must implement the following interface:
786 *
787 * void assertionBOL();
788 * void assertionEOL();
789 * void assertionWordBoundary(bool invert);
790 *
791 * void atomPatternCharacter(UChar ch);
792 * void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert);
793 * void atomCharacterClassBegin(bool invert)
794 * void atomCharacterClassAtom(UChar ch)
795 * void atomCharacterClassRange(UChar begin, UChar end)
796 * void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
797 * void atomCharacterClassEnd()
798 * void atomParenthesesSubpatternBegin(bool capture = true);
799 * void atomParentheticalAssertionBegin(bool invert = false);
800 * void atomParenthesesEnd();
801 * void atomBackReference(unsigned subpatternId);
802 *
803 * void quantifyAtom(unsigned min, unsigned max, bool greedy);
804 *
805 * void disjunction();
806 *
807 * void regexBegin();
808 * void regexEnd();
809 * void regexError();
810 *
811 * Before any call recording tokens are made, regexBegin() will be called on the
812 * delegate once. Once parsing is complete either regexEnd() or regexError() will
813 * be called, as appropriate.
814 *
815 * The regular expression is described by a sequence of assertion*() and atom*()
816 * callbacks to the delegate, describing the terms in the regular expression.
817 * Following an atom a quantifyAtom() call may occur to indicate that the previous
818 * atom should be quantified. In the case of atoms described across multiple
819 * calls (parentheses and character classes) the call to quantifyAtom() will come
820 * after the call to the atom*End() method, never after atom*Begin().
821 *
822 * Character classes may either be described by a single call to
823 * atomBuiltInCharacterClass(), or by a sequence of atomCharacterClass*() calls.
824 * In the latter case, ...Begin() will be called, followed by a sequence of
825 * calls to ...Atom(), ...Range(), and ...BuiltIn(), followed by a call to ...End().
826 *
827 * Sequences of atoms and assertions are broken into alternatives via calls to
828 * disjunction(). Assertions, atoms, and disjunctions emitted between calls to
829 * atomParenthesesBegin() and atomParenthesesEnd() form the body of a subpattern.
830 * atomParenthesesBegin() is passed a subpatternId. In the case of a regular
831 * capturing subpattern, this will be the subpatternId associated with these
832 * parentheses, and will also by definition be the lowest subpatternId of these
833 * parentheses and of any nested paretheses. The atomParenthesesEnd() method
834 * is passed the subpatternId of the last capturing subexpression nested within
835 * these paretheses. In the case of a capturing subpattern with no nested
836 * capturing subpatterns, the same subpatternId will be passed to the begin and
837 * end functions. In the case of non-capturing subpatterns the subpatternId
838 * passed to the begin method is also the first possible subpatternId that might
839 * be nested within these paretheses. If a set of non-capturing parentheses does
840 * not contain any capturing subpatterns, then the subpatternId passed to begin
841 * will be greater than the subpatternId passed to end.
842 */
843
844 template<class Delegate>
845 const char* parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = UINT_MAX)
846 {
847 return Parser<Delegate>(delegate, pattern, backReferenceLimit).parse();
848 }
849
850 } } // namespace JSC::Yarr
851
852 #endif
853
854 #endif // RegexParser_h