2 * Copyright (C) 2008 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <wtf/Platform.h>
34 #include "Quantifier.h"
36 #include "WRECGenerator.h"
37 #include <wtf/ASCIICType.h>
39 namespace JSC
{ namespace WREC
{
41 struct CharacterClass
;
44 typedef Generator::JumpList JumpList
;
45 typedef Generator::ParenthesesType ParenthesesType
;
47 friend class SavedState
;
50 Parser(const UString
& pattern
, bool ignoreCase
, bool multiline
)
52 , m_data(pattern
.data())
53 , m_size(pattern
.size())
54 , m_ignoreCase(ignoreCase
)
55 , m_multiline(multiline
)
60 Generator
& generator() { return m_generator
; }
62 bool ignoreCase() const { return m_ignoreCase
; }
63 bool multiline() const { return m_multiline
; }
65 void recordSubpattern() { ++m_numSubpatterns
; }
66 unsigned numSubpatterns() const { return m_numSubpatterns
; }
68 const char* error() const { return m_error
; }
69 const char* syntaxError() const { return m_error
== ParenthesesNotSupported
? 0 : m_error
; }
71 void parsePattern(JumpList
& failures
)
75 parseDisjunction(failures
);
77 if (peek() != EndOfPattern
)
78 setError(ParenthesesUnmatched
); // Parsing the pattern should fully consume it.
81 void parseDisjunction(JumpList
& failures
);
82 void parseAlternative(JumpList
& failures
);
83 bool parseTerm(JumpList
& failures
);
84 bool parseNonCharacterEscape(JumpList
& failures
, const Escape
&);
85 bool parseParentheses(JumpList
& failures
);
86 bool parseCharacterClass(JumpList
& failures
);
87 bool parseCharacterClassQuantifier(JumpList
& failures
, const CharacterClass
& charClass
, bool invert
);
88 bool parseBackreferenceQuantifier(JumpList
& failures
, unsigned subpatternId
);
93 SavedState(Parser
& parser
)
95 , m_index(parser
.m_index
)
101 m_parser
.m_index
= m_index
;
112 m_numSubpatterns
= 0;
116 void setError(const char* error
)
125 if (m_index
>= m_size
)
127 return m_data
[m_index
];
132 if (m_index
>= m_size
)
134 return m_data
[m_index
++];
139 return WTF::isASCIIDigit(peek());
144 ASSERT(peekIsDigit());
148 unsigned consumeDigit()
150 ASSERT(peekIsDigit());
151 return consume() - '0';
154 unsigned consumeNumber()
156 int n
= consumeDigit();
157 while (peekIsDigit()) {
164 int consumeHex(int count
)
168 if (!WTF::isASCIIHexDigit(peek()))
170 n
= (n
<< 4) | WTF::toASCIIHexValue(consume());
175 unsigned consumeOctal()
178 while (n
< 32 && WTF::isASCIIOctalDigit(peek()))
179 n
= n
* 8 + consumeDigit();
183 ALWAYS_INLINE Quantifier
consumeGreedyQuantifier();
184 Quantifier
consumeQuantifier();
185 Escape
consumeEscape(bool inCharacterClass
);
186 ParenthesesType
consumeParenthesesType();
188 static const int EndOfPattern
= -1;
191 static const char* QuantifierOutOfOrder
;
192 static const char* QuantifierWithoutAtom
;
193 static const char* ParenthesesUnmatched
;
194 static const char* ParenthesesTypeInvalid
;
195 static const char* ParenthesesNotSupported
;
196 static const char* CharacterClassUnmatched
;
197 static const char* CharacterClassOutOfOrder
;
198 static const char* EscapeUnterminated
;
200 Generator m_generator
;
206 unsigned m_numSubpatterns
;
210 } } // namespace JSC::WREC
212 #endif // ENABLE(WREC)