2 * Copyright (C) 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "LiteralParser.h"
32 #include <wtf/ASCIICType.h>
37 LiteralParser::TokenType
LiteralParser::Lexer::lex(LiteralParserToken
& token
)
39 while (m_ptr
< m_end
&& isASCIISpace(*m_ptr
))
42 ASSERT(m_ptr
<= m_end
);
45 token
.start
= token
.end
= m_ptr
;
48 token
.type
= TokError
;
52 token
.type
= TokLBracket
;
56 token
.type
= TokRBracket
;
60 token
.type
= TokLParen
;
64 token
.type
= TokRParen
;
68 token
.type
= TokLBrace
;
72 token
.type
= TokRBrace
;
76 token
.type
= TokComma
;
80 token
.type
= TokColon
;
84 if (m_mode
== StrictJSON
)
85 return lexString
<StrictJSON
>(token
);
86 return lexString
<NonStrictJSON
>(token
);
88 if (m_end
- m_ptr
>= 4 && m_ptr
[1] == 'r' && m_ptr
[2] == 'u' && m_ptr
[3] == 'e') {
96 if (m_end
- m_ptr
>= 5 && m_ptr
[1] == 'a' && m_ptr
[2] == 'l' && m_ptr
[3] == 's' && m_ptr
[4] == 'e') {
98 token
.type
= TokFalse
;
104 if (m_end
- m_ptr
>= 4 && m_ptr
[1] == 'u' && m_ptr
[2] == 'l' && m_ptr
[3] == 'l') {
106 token
.type
= TokNull
;
122 return lexNumber(token
);
127 template <LiteralParser::ParserMode mode
> static inline bool isSafeStringCharacter(UChar c
)
129 return (c
>= ' ' && (mode
== LiteralParser::StrictJSON
|| c
<= 0xff) && c
!= '\\' && c
!= '"') || c
== '\t';
132 template <LiteralParser::ParserMode mode
> LiteralParser::TokenType
LiteralParser::Lexer::lexString(LiteralParserToken
& token
)
135 const UChar
* runStart
;
136 token
.stringToken
= UString();
139 while (m_ptr
< m_end
&& isSafeStringCharacter
<mode
>(*m_ptr
))
141 if (runStart
< m_ptr
)
142 token
.stringToken
.append(runStart
, m_ptr
- runStart
);
143 if ((mode
== StrictJSON
) && m_ptr
< m_end
&& *m_ptr
== '\\') {
149 token
.stringToken
.append('"');
153 token
.stringToken
.append('\\');
157 token
.stringToken
.append('/');
161 token
.stringToken
.append('\b');
165 token
.stringToken
.append('\f');
169 token
.stringToken
.append('\n');
173 token
.stringToken
.append('\r');
177 token
.stringToken
.append('\t');
182 if ((m_end
- m_ptr
) < 5) // uNNNN == 5 characters
184 for (int i
= 1; i
< 5; i
++) {
185 if (!isASCIIHexDigit(m_ptr
[i
]))
188 token
.stringToken
.append(JSC::Lexer::convertUnicode(m_ptr
[1], m_ptr
[2], m_ptr
[3], m_ptr
[4]));
196 } while ((mode
== StrictJSON
) && m_ptr
!= runStart
&& (m_ptr
< m_end
) && *m_ptr
!= '"');
198 if (m_ptr
>= m_end
|| *m_ptr
!= '"')
201 token
.type
= TokString
;
206 LiteralParser::TokenType
LiteralParser::Lexer::lexNumber(LiteralParserToken
& token
)
208 // ES5 and json.org define numbers as
215 // -? digit1-9 digits?
220 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
222 if (m_ptr
< m_end
&& *m_ptr
== '-') // -?
226 if (m_ptr
< m_end
&& *m_ptr
== '0') // 0
228 else if (m_ptr
< m_end
&& *m_ptr
>= '1' && *m_ptr
<= '9') { // [1-9]
231 while (m_ptr
< m_end
&& isASCIIDigit(*m_ptr
))
237 if (m_ptr
< m_end
&& *m_ptr
== '.') {
240 if (m_ptr
>= m_end
|| !isASCIIDigit(*m_ptr
))
244 while (m_ptr
< m_end
&& isASCIIDigit(*m_ptr
))
248 // ([eE][+-]? [0-9]+)?
249 if (m_ptr
< m_end
&& (*m_ptr
== 'e' || *m_ptr
== 'E')) { // [eE]
253 if (m_ptr
< m_end
&& (*m_ptr
== '-' || *m_ptr
== '+'))
257 if (m_ptr
>= m_end
|| !isASCIIDigit(*m_ptr
))
261 while (m_ptr
< m_end
&& isASCIIDigit(*m_ptr
))
265 token
.type
= TokNumber
;
267 Vector
<char, 64> buffer(token
.end
- token
.start
+ 1);
269 for (i
= 0; i
< token
.end
- token
.start
; i
++) {
270 ASSERT(static_cast<char>(token
.start
[i
]) == token
.start
[i
]);
271 buffer
[i
] = static_cast<char>(token
.start
[i
]);
275 token
.numberToken
= WTF::strtod(buffer
.data(), &end
);
276 ASSERT(buffer
.data() + (token
.end
- token
.start
) == end
);
280 JSValue
LiteralParser::parse(ParserState initialState
)
282 ParserState state
= initialState
;
283 MarkedArgumentBuffer objectStack
;
285 Vector
<ParserState
, 16> stateStack
;
286 Vector
<Identifier
, 16> identifierStack
;
290 case StartParseArray
: {
291 JSArray
* array
= constructEmptyArray(m_exec
);
292 objectStack
.append(array
);
295 doParseArrayStartExpression
:
296 case DoParseArrayStartExpression
: {
297 if (m_lexer
.next() == TokRBracket
) {
299 lastValue
= objectStack
.last();
300 objectStack
.removeLast();
304 stateStack
.append(DoParseArrayEndExpression
);
305 goto startParseExpression
;
307 case DoParseArrayEndExpression
: {
308 asArray(objectStack
.last())->push(m_exec
, lastValue
);
310 if (m_lexer
.currentToken().type
== TokComma
)
311 goto doParseArrayStartExpression
;
313 if (m_lexer
.currentToken().type
!= TokRBracket
)
317 lastValue
= objectStack
.last();
318 objectStack
.removeLast();
322 case StartParseObject
: {
323 JSObject
* object
= constructEmptyObject(m_exec
);
324 objectStack
.append(object
);
326 TokenType type
= m_lexer
.next();
327 if (type
== TokString
) {
328 Lexer::LiteralParserToken identifierToken
= m_lexer
.currentToken();
331 if (m_lexer
.next() != TokColon
)
335 identifierStack
.append(Identifier(m_exec
, identifierToken
.stringToken
));
336 stateStack
.append(DoParseObjectEndExpression
);
337 goto startParseExpression
;
338 } else if (type
!= TokRBrace
)
341 lastValue
= objectStack
.last();
342 objectStack
.removeLast();
345 doParseObjectStartExpression
:
346 case DoParseObjectStartExpression
: {
347 TokenType type
= m_lexer
.next();
348 if (type
!= TokString
)
350 Lexer::LiteralParserToken identifierToken
= m_lexer
.currentToken();
353 if (m_lexer
.next() != TokColon
)
357 identifierStack
.append(Identifier(m_exec
, identifierToken
.stringToken
));
358 stateStack
.append(DoParseObjectEndExpression
);
359 goto startParseExpression
;
361 case DoParseObjectEndExpression
:
363 asObject(objectStack
.last())->putDirect(identifierStack
.last(), lastValue
);
364 identifierStack
.removeLast();
365 if (m_lexer
.currentToken().type
== TokComma
)
366 goto doParseObjectStartExpression
;
367 if (m_lexer
.currentToken().type
!= TokRBrace
)
370 lastValue
= objectStack
.last();
371 objectStack
.removeLast();
374 startParseExpression
:
375 case StartParseExpression
: {
376 switch (m_lexer
.currentToken().type
) {
378 goto startParseArray
;
380 goto startParseObject
;
382 Lexer::LiteralParserToken stringToken
= m_lexer
.currentToken();
384 lastValue
= jsString(m_exec
, stringToken
.stringToken
);
388 Lexer::LiteralParserToken numberToken
= m_lexer
.currentToken();
390 lastValue
= jsNumber(m_exec
, numberToken
.numberToken
);
395 lastValue
= jsNull();
400 lastValue
= jsBoolean(true);
405 lastValue
= jsBoolean(false);
414 case StartParseStatement
: {
415 switch (m_lexer
.currentToken().type
) {
419 goto startParseExpression
;
423 stateStack
.append(StartParseStatementEndStatement
);
424 goto startParseExpression
;
430 case StartParseStatementEndStatement
: {
431 ASSERT(stateStack
.isEmpty());
432 if (m_lexer
.currentToken().type
!= TokRParen
)
434 if (m_lexer
.next() == TokEnd
)
439 ASSERT_NOT_REACHED();
441 if (stateStack
.isEmpty())
443 state
= stateStack
.last();
444 stateStack
.removeLast();