]> git.saurik.com Git - apple/javascriptcore.git/blob - yarr/RegexParser.h
c946c2e8e48b2310eae3d323ee2954bc0a76f44c
[apple/javascriptcore.git] / yarr / RegexParser.h
1 /*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #ifndef RegexParser_h
27 #define RegexParser_h
28
29 #if ENABLE(YARR)
30
31 #include <UString.h>
32 #include <limits.h>
33 #include <wtf/ASCIICType.h>
34 #include <wtf/unicode/Unicode.h>
35
36 namespace JSC { namespace Yarr {
37
38 enum BuiltInCharacterClassID {
39 DigitClassID,
40 SpaceClassID,
41 WordClassID,
42 NewlineClassID,
43 };
44
45 // The Parser class should not be used directly - only via the Yarr::parse() method.
46 template<class Delegate>
47 class Parser {
48 private:
49 template<class FriendDelegate>
50 friend const char* parse(FriendDelegate& delegate, const UString& pattern, unsigned backReferenceLimit);
51
52 enum ErrorCode {
53 NoError,
54 PatternTooLarge,
55 QuantifierOutOfOrder,
56 QuantifierWithoutAtom,
57 MissingParentheses,
58 ParenthesesUnmatched,
59 ParenthesesTypeInvalid,
60 CharacterClassUnmatched,
61 CharacterClassOutOfOrder,
62 EscapeUnterminated,
63 NumberOfErrorCodes
64 };
65
66 /*
67 * CharacterClassParserDelegate:
68 *
69 * The class CharacterClassParserDelegate is used in the parsing of character
70 * classes. This class handles detection of character ranges. This class
71 * implements enough of the delegate interface such that it can be passed to
72 * parseEscape() as an EscapeDelegate. This allows parseEscape() to be reused
73 * to perform the parsing of escape characters in character sets.
74 */
75 class CharacterClassParserDelegate {
76 public:
77 CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err)
78 : m_delegate(delegate)
79 , m_err(err)
80 , m_state(empty)
81 {
82 }
83
84 /*
85 * begin():
86 *
87 * Called at beginning of construction.
88 */
89 void begin(bool invert)
90 {
91 m_delegate.atomCharacterClassBegin(invert);
92 }
93
94 /*
95 * atomPatternCharacterUnescaped():
96 *
97 * This method is called directly from parseCharacterClass(), to report a new
98 * pattern character token. This method differs from atomPatternCharacter(),
99 * which will be called from parseEscape(), since a hypen provided via this
100 * method may be indicating a character range, but a hyphen parsed by
101 * parseEscape() cannot be interpreted as doing so.
102 */
103 void atomPatternCharacterUnescaped(UChar ch)
104 {
105 switch (m_state) {
106 case empty:
107 m_character = ch;
108 m_state = cachedCharacter;
109 break;
110
111 case cachedCharacter:
112 if (ch == '-')
113 m_state = cachedCharacterHyphen;
114 else {
115 m_delegate.atomCharacterClassAtom(m_character);
116 m_character = ch;
117 }
118 break;
119
120 case cachedCharacterHyphen:
121 if (ch >= m_character)
122 m_delegate.atomCharacterClassRange(m_character, ch);
123 else
124 m_err = CharacterClassOutOfOrder;
125 m_state = empty;
126 }
127 }
128
129 /*
130 * atomPatternCharacter():
131 *
132 * Adds a pattern character, called by parseEscape(), as such will not
133 * interpret a hyphen as indicating a character range.
134 */
135 void atomPatternCharacter(UChar ch)
136 {
137 // Flush if a character is already pending to prevent the
138 // hyphen from begin interpreted as indicating a range.
139 if((ch == '-') && (m_state == cachedCharacter))
140 flush();
141
142 atomPatternCharacterUnescaped(ch);
143 }
144
145 /*
146 * atomBuiltInCharacterClass():
147 *
148 * Adds a built-in character class, called by parseEscape().
149 */
150 void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
151 {
152 flush();
153 m_delegate.atomCharacterClassBuiltIn(classID, invert);
154 }
155
156 /*
157 * end():
158 *
159 * Called at end of construction.
160 */
161 void end()
162 {
163 flush();
164 m_delegate.atomCharacterClassEnd();
165 }
166
167 // parseEscape() should never call these delegate methods when
168 // invoked with inCharacterClass set.
169 void assertionWordBoundary(bool) { ASSERT_NOT_REACHED(); }
170 void atomBackReference(unsigned) { ASSERT_NOT_REACHED(); }
171
172 private:
173 void flush()
174 {
175 if (m_state != empty) // either cachedCharacter or cachedCharacterHyphen
176 m_delegate.atomCharacterClassAtom(m_character);
177 if (m_state == cachedCharacterHyphen)
178 m_delegate.atomCharacterClassAtom('-');
179 m_state = empty;
180 }
181
182 Delegate& m_delegate;
183 ErrorCode& m_err;
184 enum CharacterClassConstructionState {
185 empty,
186 cachedCharacter,
187 cachedCharacterHyphen,
188 } m_state;
189 UChar m_character;
190 };
191
192 Parser(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit)
193 : m_delegate(delegate)
194 , m_backReferenceLimit(backReferenceLimit)
195 , m_err(NoError)
196 , m_data(pattern.data())
197 , m_size(pattern.size())
198 , m_index(0)
199 , m_parenthesesNestingDepth(0)
200 {
201 }
202
203 /*
204 * parseEscape():
205 *
206 * Helper for parseTokens() AND parseCharacterClass().
207 * Unlike the other parser methods, this function does not report tokens
208 * directly to the member delegate (m_delegate), instead tokens are
209 * emitted to the delegate provided as an argument. In the case of atom
210 * escapes, parseTokens() will call parseEscape() passing m_delegate as
211 * an argument, and as such the escape will be reported to the delegate.
212 *
213 * However this method may also be used by parseCharacterClass(), in which
214 * case a CharacterClassParserDelegate will be passed as the delegate that
215 * tokens should be added to. A boolean flag is also provided to indicate
216 * whether that an escape in a CharacterClass is being parsed (some parsing
217 * rules change in this context).
218 *
219 * The boolean value returned by this method indicates whether the token
220 * parsed was an atom (outside of a characted class \b and \B will be
221 * interpreted as assertions).
222 */
223 template<bool inCharacterClass, class EscapeDelegate>
224 bool parseEscape(EscapeDelegate& delegate)
225 {
226 ASSERT(!m_err);
227 ASSERT(peek() == '\\');
228 consume();
229
230 if (atEndOfPattern()) {
231 m_err = EscapeUnterminated;
232 return false;
233 }
234
235 switch (peek()) {
236 // Assertions
237 case 'b':
238 consume();
239 if (inCharacterClass)
240 delegate.atomPatternCharacter('\b');
241 else {
242 delegate.assertionWordBoundary(false);
243 return false;
244 }
245 break;
246 case 'B':
247 consume();
248 if (inCharacterClass)
249 delegate.atomPatternCharacter('B');
250 else {
251 delegate.assertionWordBoundary(true);
252 return false;
253 }
254 break;
255
256 // CharacterClassEscape
257 case 'd':
258 consume();
259 delegate.atomBuiltInCharacterClass(DigitClassID, false);
260 break;
261 case 's':
262 consume();
263 delegate.atomBuiltInCharacterClass(SpaceClassID, false);
264 break;
265 case 'w':
266 consume();
267 delegate.atomBuiltInCharacterClass(WordClassID, false);
268 break;
269 case 'D':
270 consume();
271 delegate.atomBuiltInCharacterClass(DigitClassID, true);
272 break;
273 case 'S':
274 consume();
275 delegate.atomBuiltInCharacterClass(SpaceClassID, true);
276 break;
277 case 'W':
278 consume();
279 delegate.atomBuiltInCharacterClass(WordClassID, true);
280 break;
281
282 // DecimalEscape
283 case '1':
284 case '2':
285 case '3':
286 case '4':
287 case '5':
288 case '6':
289 case '7':
290 case '8':
291 case '9': {
292 // To match Firefox, we parse an invalid backreference in the range [1-7] as an octal escape.
293 // First, try to parse this as backreference.
294 if (!inCharacterClass) {
295 ParseState state = saveState();
296
297 unsigned backReference = consumeNumber();
298 if (backReference <= m_backReferenceLimit) {
299 delegate.atomBackReference(backReference);
300 break;
301 }
302
303 restoreState(state);
304 }
305
306 // Not a backreference, and not octal.
307 if (peek() >= '8') {
308 delegate.atomPatternCharacter('\\');
309 break;
310 }
311
312 // Fall-through to handle this as an octal escape.
313 }
314
315 // Octal escape
316 case '0':
317 delegate.atomPatternCharacter(consumeOctal());
318 break;
319
320 // ControlEscape
321 case 'f':
322 consume();
323 delegate.atomPatternCharacter('\f');
324 break;
325 case 'n':
326 consume();
327 delegate.atomPatternCharacter('\n');
328 break;
329 case 'r':
330 consume();
331 delegate.atomPatternCharacter('\r');
332 break;
333 case 't':
334 consume();
335 delegate.atomPatternCharacter('\t');
336 break;
337 case 'v':
338 consume();
339 delegate.atomPatternCharacter('\v');
340 break;
341
342 // ControlLetter
343 case 'c': {
344 ParseState state = saveState();
345 consume();
346 if (!atEndOfPattern()) {
347 int control = consume();
348
349 // To match Firefox, inside a character class, we also accept numbers and '_' as control characters.
350 if (inCharacterClass ? WTF::isASCIIAlphanumeric(control) || (control == '_') : WTF::isASCIIAlpha(control)) {
351 delegate.atomPatternCharacter(control & 0x1f);
352 break;
353 }
354 }
355 restoreState(state);
356 delegate.atomPatternCharacter('\\');
357 break;
358 }
359
360 // HexEscape
361 case 'x': {
362 consume();
363 int x = tryConsumeHex(2);
364 if (x == -1)
365 delegate.atomPatternCharacter('x');
366 else
367 delegate.atomPatternCharacter(x);
368 break;
369 }
370
371 // UnicodeEscape
372 case 'u': {
373 consume();
374 int u = tryConsumeHex(4);
375 if (u == -1)
376 delegate.atomPatternCharacter('u');
377 else
378 delegate.atomPatternCharacter(u);
379 break;
380 }
381
382 // IdentityEscape
383 default:
384 delegate.atomPatternCharacter(consume());
385 }
386
387 return true;
388 }
389
390 /*
391 * parseAtomEscape(), parseCharacterClassEscape():
392 *
393 * These methods alias to parseEscape().
394 */
395 bool parseAtomEscape()
396 {
397 return parseEscape<false>(m_delegate);
398 }
399 void parseCharacterClassEscape(CharacterClassParserDelegate& delegate)
400 {
401 parseEscape<true>(delegate);
402 }
403
404 /*
405 * parseCharacterClass():
406 *
407 * Helper for parseTokens(); calls dirctly and indirectly (via parseCharacterClassEscape)
408 * to an instance of CharacterClassParserDelegate, to describe the character class to the
409 * delegate.
410 */
411 void parseCharacterClass()
412 {
413 ASSERT(!m_err);
414 ASSERT(peek() == '[');
415 consume();
416
417 CharacterClassParserDelegate characterClassConstructor(m_delegate, m_err);
418
419 characterClassConstructor.begin(tryConsume('^'));
420
421 while (!atEndOfPattern()) {
422 switch (peek()) {
423 case ']':
424 consume();
425 characterClassConstructor.end();
426 return;
427
428 case '\\':
429 parseCharacterClassEscape(characterClassConstructor);
430 break;
431
432 default:
433 characterClassConstructor.atomPatternCharacterUnescaped(consume());
434 }
435
436 if (m_err)
437 return;
438 }
439
440 m_err = CharacterClassUnmatched;
441 }
442
443 /*
444 * parseParenthesesBegin():
445 *
446 * Helper for parseTokens(); checks for parentheses types other than regular capturing subpatterns.
447 */
448 void parseParenthesesBegin()
449 {
450 ASSERT(!m_err);
451 ASSERT(peek() == '(');
452 consume();
453
454 if (tryConsume('?')) {
455 if (atEndOfPattern()) {
456 m_err = ParenthesesTypeInvalid;
457 return;
458 }
459
460 switch (consume()) {
461 case ':':
462 m_delegate.atomParenthesesSubpatternBegin(false);
463 break;
464
465 case '=':
466 m_delegate.atomParentheticalAssertionBegin();
467 break;
468
469 case '!':
470 m_delegate.atomParentheticalAssertionBegin(true);
471 break;
472
473 default:
474 m_err = ParenthesesTypeInvalid;
475 }
476 } else
477 m_delegate.atomParenthesesSubpatternBegin();
478
479 ++m_parenthesesNestingDepth;
480 }
481
482 /*
483 * parseParenthesesEnd():
484 *
485 * Helper for parseTokens(); checks for parse errors (due to unmatched parentheses).
486 */
487 void parseParenthesesEnd()
488 {
489 ASSERT(!m_err);
490 ASSERT(peek() == ')');
491 consume();
492
493 if (m_parenthesesNestingDepth > 0)
494 m_delegate.atomParenthesesEnd();
495 else
496 m_err = ParenthesesUnmatched;
497
498 --m_parenthesesNestingDepth;
499 }
500
501 /*
502 * parseQuantifier():
503 *
504 * Helper for parseTokens(); checks for parse errors and non-greedy quantifiers.
505 */
506 void parseQuantifier(bool lastTokenWasAnAtom, unsigned min, unsigned max)
507 {
508 ASSERT(!m_err);
509 ASSERT(min <= max);
510
511 if (lastTokenWasAnAtom)
512 m_delegate.quantifyAtom(min, max, !tryConsume('?'));
513 else
514 m_err = QuantifierWithoutAtom;
515 }
516
517 /*
518 * parseTokens():
519 *
520 * This method loops over the input pattern reporting tokens to the delegate.
521 * The method returns when a parse error is detected, or the end of the pattern
522 * is reached. One piece of state is tracked around the loop, which is whether
523 * the last token passed to the delegate was an atom (this is necessary to detect
524 * a parse error when a quantifier provided without an atom to quantify).
525 */
526 void parseTokens()
527 {
528 bool lastTokenWasAnAtom = false;
529
530 while (!atEndOfPattern()) {
531 switch (peek()) {
532 case '|':
533 consume();
534 m_delegate.disjunction();
535 lastTokenWasAnAtom = false;
536 break;
537
538 case '(':
539 parseParenthesesBegin();
540 lastTokenWasAnAtom = false;
541 break;
542
543 case ')':
544 parseParenthesesEnd();
545 lastTokenWasAnAtom = true;
546 break;
547
548 case '^':
549 consume();
550 m_delegate.assertionBOL();
551 lastTokenWasAnAtom = false;
552 break;
553
554 case '$':
555 consume();
556 m_delegate.assertionEOL();
557 lastTokenWasAnAtom = false;
558 break;
559
560 case '.':
561 consume();
562 m_delegate.atomBuiltInCharacterClass(NewlineClassID, true);
563 lastTokenWasAnAtom = true;
564 break;
565
566 case '[':
567 parseCharacterClass();
568 lastTokenWasAnAtom = true;
569 break;
570
571 case '\\':
572 lastTokenWasAnAtom = parseAtomEscape();
573 break;
574
575 case '*':
576 consume();
577 parseQuantifier(lastTokenWasAnAtom, 0, UINT_MAX);
578 lastTokenWasAnAtom = false;
579 break;
580
581 case '+':
582 consume();
583 parseQuantifier(lastTokenWasAnAtom, 1, UINT_MAX);
584 lastTokenWasAnAtom = false;
585 break;
586
587 case '?':
588 consume();
589 parseQuantifier(lastTokenWasAnAtom, 0, 1);
590 lastTokenWasAnAtom = false;
591 break;
592
593 case '{': {
594 ParseState state = saveState();
595
596 consume();
597 if (peekIsDigit()) {
598 unsigned min = consumeNumber();
599 unsigned max = min;
600
601 if (tryConsume(','))
602 max = peekIsDigit() ? consumeNumber() : UINT_MAX;
603
604 if (tryConsume('}')) {
605 if (min <= max)
606 parseQuantifier(lastTokenWasAnAtom, min, max);
607 else
608 m_err = QuantifierOutOfOrder;
609 lastTokenWasAnAtom = false;
610 break;
611 }
612 }
613
614 restoreState(state);
615 } // if we did not find a complete quantifer, fall through to the default case.
616
617 default:
618 m_delegate.atomPatternCharacter(consume());
619 lastTokenWasAnAtom = true;
620 }
621
622 if (m_err)
623 return;
624 }
625
626 if (m_parenthesesNestingDepth > 0)
627 m_err = MissingParentheses;
628 }
629
630 /*
631 * parse():
632 *
633 * This method calls regexBegin(), calls parseTokens() to parse over the input
634 * patterns, calls regexEnd() or regexError() as appropriate, and converts any
635 * error code to a const char* for a result.
636 */
637 const char* parse()
638 {
639 m_delegate.regexBegin();
640
641 if (m_size > MAX_PATTERN_SIZE)
642 m_err = PatternTooLarge;
643 else
644 parseTokens();
645 ASSERT(atEndOfPattern() || m_err);
646
647 if (m_err)
648 m_delegate.regexError();
649 else
650 m_delegate.regexEnd();
651
652 // The order of this array must match the ErrorCode enum.
653 static const char* errorMessages[NumberOfErrorCodes] = {
654 0, // NoError
655 "regular expression too large",
656 "numbers out of order in {} quantifier",
657 "nothing to repeat",
658 "missing )",
659 "unmatched parentheses",
660 "unrecognized character after (?",
661 "missing terminating ] for character class",
662 "range out of order in character class",
663 "\\ at end of pattern"
664 };
665
666 return errorMessages[m_err];
667 }
668
669
670 // Misc helper functions:
671
672 typedef unsigned ParseState;
673
674 ParseState saveState()
675 {
676 return m_index;
677 }
678
679 void restoreState(ParseState state)
680 {
681 m_index = state;
682 }
683
684 bool atEndOfPattern()
685 {
686 ASSERT(m_index <= m_size);
687 return m_index == m_size;
688 }
689
690 int peek()
691 {
692 ASSERT(m_index < m_size);
693 return m_data[m_index];
694 }
695
696 bool peekIsDigit()
697 {
698 return !atEndOfPattern() && WTF::isASCIIDigit(peek());
699 }
700
701 unsigned peekDigit()
702 {
703 ASSERT(peekIsDigit());
704 return peek() - '0';
705 }
706
707 int consume()
708 {
709 ASSERT(m_index < m_size);
710 return m_data[m_index++];
711 }
712
713 unsigned consumeDigit()
714 {
715 ASSERT(peekIsDigit());
716 return consume() - '0';
717 }
718
719 unsigned consumeNumber()
720 {
721 unsigned n = consumeDigit();
722 // check for overflow.
723 for (unsigned newValue; peekIsDigit() && ((newValue = n * 10 + peekDigit()) >= n); ) {
724 n = newValue;
725 consume();
726 }
727 return n;
728 }
729
730 unsigned consumeOctal()
731 {
732 ASSERT(WTF::isASCIIOctalDigit(peek()));
733
734 unsigned n = consumeDigit();
735 while (n < 32 && !atEndOfPattern() && WTF::isASCIIOctalDigit(peek()))
736 n = n * 8 + consumeDigit();
737 return n;
738 }
739
740 bool tryConsume(UChar ch)
741 {
742 if (atEndOfPattern() || (m_data[m_index] != ch))
743 return false;
744 ++m_index;
745 return true;
746 }
747
748 int tryConsumeHex(int count)
749 {
750 ParseState state = saveState();
751
752 int n = 0;
753 while (count--) {
754 if (atEndOfPattern() || !WTF::isASCIIHexDigit(peek())) {
755 restoreState(state);
756 return -1;
757 }
758 n = (n << 4) | WTF::toASCIIHexValue(consume());
759 }
760 return n;
761 }
762
763 Delegate& m_delegate;
764 unsigned m_backReferenceLimit;
765 ErrorCode m_err;
766 const UChar* m_data;
767 unsigned m_size;
768 unsigned m_index;
769 unsigned m_parenthesesNestingDepth;
770
771 // Derived by empirical testing of compile time in PCRE and WREC.
772 static const unsigned MAX_PATTERN_SIZE = 1024 * 1024;
773 };
774
775 /*
776 * Yarr::parse():
777 *
778 * The parse method is passed a pattern to be parsed and a delegate upon which
779 * callbacks will be made to record the parsed tokens forming the regex.
780 * Yarr::parse() returns null on success, or a const C string providing an error
781 * message where a parse error occurs.
782 *
783 * The Delegate must implement the following interface:
784 *
785 * void assertionBOL();
786 * void assertionEOL();
787 * void assertionWordBoundary(bool invert);
788 *
789 * void atomPatternCharacter(UChar ch);
790 * void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert);
791 * void atomCharacterClassBegin(bool invert)
792 * void atomCharacterClassAtom(UChar ch)
793 * void atomCharacterClassRange(UChar begin, UChar end)
794 * void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
795 * void atomCharacterClassEnd()
796 * void atomParenthesesSubpatternBegin(bool capture = true);
797 * void atomParentheticalAssertionBegin(bool invert = false);
798 * void atomParenthesesEnd();
799 * void atomBackReference(unsigned subpatternId);
800 *
801 * void quantifyAtom(unsigned min, unsigned max, bool greedy);
802 *
803 * void disjunction();
804 *
805 * void regexBegin();
806 * void regexEnd();
807 * void regexError();
808 *
809 * Before any call recording tokens are made, regexBegin() will be called on the
810 * delegate once. Once parsing is complete either regexEnd() or regexError() will
811 * be called, as appropriate.
812 *
813 * The regular expression is described by a sequence of assertion*() and atom*()
814 * callbacks to the delegate, describing the terms in the regular expression.
815 * Following an atom a quantifyAtom() call may occur to indicate that the previous
816 * atom should be quantified. In the case of atoms described across multiple
817 * calls (parentheses and character classes) the call to quantifyAtom() will come
818 * after the call to the atom*End() method, never after atom*Begin().
819 *
820 * Character classes may either be described by a single call to
821 * atomBuiltInCharacterClass(), or by a sequence of atomCharacterClass*() calls.
822 * In the latter case, ...Begin() will be called, followed by a sequence of
823 * calls to ...Atom(), ...Range(), and ...BuiltIn(), followed by a call to ...End().
824 *
825 * Sequences of atoms and assertions are broken into alternatives via calls to
826 * disjunction(). Assertions, atoms, and disjunctions emitted between calls to
827 * atomParenthesesBegin() and atomParenthesesEnd() form the body of a subpattern.
828 * atomParenthesesBegin() is passed a subpatternId. In the case of a regular
829 * capturing subpattern, this will be the subpatternId associated with these
830 * parentheses, and will also by definition be the lowest subpatternId of these
831 * parentheses and of any nested paretheses. The atomParenthesesEnd() method
832 * is passed the subpatternId of the last capturing subexpression nested within
833 * these paretheses. In the case of a capturing subpattern with no nested
834 * capturing subpatterns, the same subpatternId will be passed to the begin and
835 * end functions. In the case of non-capturing subpatterns the subpatternId
836 * passed to the begin method is also the first possible subpatternId that might
837 * be nested within these paretheses. If a set of non-capturing parentheses does
838 * not contain any capturing subpatterns, then the subpatternId passed to begin
839 * will be greater than the subpatternId passed to end.
840 */
841
842 template<class Delegate>
843 const char* parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = UINT_MAX)
844 {
845 return Parser<Delegate>(delegate, pattern, backReferenceLimit).parse();
846 }
847
848 } } // namespace JSC::Yarr
849
850 #endif
851
852 #endif // RegexParser_h