2 * Copyright (C) 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "LiteralParser.h"
32 #include "StringBuilder.h"
33 #include <wtf/ASCIICType.h>
38 LiteralParser::TokenType
LiteralParser::Lexer::lex(LiteralParserToken
& token
)
40 while (m_ptr
< m_end
&& isASCIISpace(*m_ptr
))
43 ASSERT(m_ptr
<= m_end
);
46 token
.start
= token
.end
= m_ptr
;
49 token
.type
= TokError
;
53 token
.type
= TokLBracket
;
57 token
.type
= TokRBracket
;
61 token
.type
= TokLParen
;
65 token
.type
= TokRParen
;
69 token
.type
= TokLBrace
;
73 token
.type
= TokRBrace
;
77 token
.type
= TokComma
;
81 token
.type
= TokColon
;
85 if (m_mode
== StrictJSON
)
86 return lexString
<StrictJSON
>(token
);
87 return lexString
<NonStrictJSON
>(token
);
89 if (m_end
- m_ptr
>= 4 && m_ptr
[1] == 'r' && m_ptr
[2] == 'u' && m_ptr
[3] == 'e') {
97 if (m_end
- m_ptr
>= 5 && m_ptr
[1] == 'a' && m_ptr
[2] == 'l' && m_ptr
[3] == 's' && m_ptr
[4] == 'e') {
99 token
.type
= TokFalse
;
105 if (m_end
- m_ptr
>= 4 && m_ptr
[1] == 'u' && m_ptr
[2] == 'l' && m_ptr
[3] == 'l') {
107 token
.type
= TokNull
;
123 return lexNumber(token
);
128 template <LiteralParser::ParserMode mode
> static inline bool isSafeStringCharacter(UChar c
)
130 return (c
>= ' ' && (mode
== LiteralParser::StrictJSON
|| c
<= 0xff) && c
!= '\\' && c
!= '"') || c
== '\t';
133 // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
134 template <LiteralParser::ParserMode mode
> inline LiteralParser::TokenType
LiteralParser::Lexer::lexString(LiteralParserToken
& token
)
137 const UChar
* runStart
;
138 StringBuilder builder
;
141 while (m_ptr
< m_end
&& isSafeStringCharacter
<mode
>(*m_ptr
))
143 if (runStart
< m_ptr
)
144 builder
.append(runStart
, m_ptr
- runStart
);
145 if ((mode
== StrictJSON
) && m_ptr
< m_end
&& *m_ptr
== '\\') {
155 builder
.append('\\');
163 builder
.append('\b');
167 builder
.append('\f');
171 builder
.append('\n');
175 builder
.append('\r');
179 builder
.append('\t');
184 if ((m_end
- m_ptr
) < 5) // uNNNN == 5 characters
186 for (int i
= 1; i
< 5; i
++) {
187 if (!isASCIIHexDigit(m_ptr
[i
]))
190 builder
.append(JSC::Lexer::convertUnicode(m_ptr
[1], m_ptr
[2], m_ptr
[3], m_ptr
[4]));
198 } while ((mode
== StrictJSON
) && m_ptr
!= runStart
&& (m_ptr
< m_end
) && *m_ptr
!= '"');
200 if (m_ptr
>= m_end
|| *m_ptr
!= '"')
203 token
.stringToken
= builder
.release();
204 token
.type
= TokString
;
209 LiteralParser::TokenType
LiteralParser::Lexer::lexNumber(LiteralParserToken
& token
)
211 // ES5 and json.org define numbers as
218 // -? digit1-9 digits?
223 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
225 if (m_ptr
< m_end
&& *m_ptr
== '-') // -?
229 if (m_ptr
< m_end
&& *m_ptr
== '0') // 0
231 else if (m_ptr
< m_end
&& *m_ptr
>= '1' && *m_ptr
<= '9') { // [1-9]
234 while (m_ptr
< m_end
&& isASCIIDigit(*m_ptr
))
240 if (m_ptr
< m_end
&& *m_ptr
== '.') {
243 if (m_ptr
>= m_end
|| !isASCIIDigit(*m_ptr
))
247 while (m_ptr
< m_end
&& isASCIIDigit(*m_ptr
))
251 // ([eE][+-]? [0-9]+)?
252 if (m_ptr
< m_end
&& (*m_ptr
== 'e' || *m_ptr
== 'E')) { // [eE]
256 if (m_ptr
< m_end
&& (*m_ptr
== '-' || *m_ptr
== '+'))
260 if (m_ptr
>= m_end
|| !isASCIIDigit(*m_ptr
))
264 while (m_ptr
< m_end
&& isASCIIDigit(*m_ptr
))
268 token
.type
= TokNumber
;
270 Vector
<char, 64> buffer(token
.end
- token
.start
+ 1);
272 for (i
= 0; i
< token
.end
- token
.start
; i
++) {
273 ASSERT(static_cast<char>(token
.start
[i
]) == token
.start
[i
]);
274 buffer
[i
] = static_cast<char>(token
.start
[i
]);
278 token
.numberToken
= WTF::strtod(buffer
.data(), &end
);
279 ASSERT(buffer
.data() + (token
.end
- token
.start
) == end
);
283 JSValue
LiteralParser::parse(ParserState initialState
)
285 ParserState state
= initialState
;
286 MarkedArgumentBuffer objectStack
;
288 Vector
<ParserState
, 16> stateStack
;
289 Vector
<Identifier
, 16> identifierStack
;
293 case StartParseArray
: {
294 JSArray
* array
= constructEmptyArray(m_exec
);
295 objectStack
.append(array
);
298 doParseArrayStartExpression
:
299 case DoParseArrayStartExpression
: {
300 TokenType lastToken
= m_lexer
.currentToken().type
;
301 if (m_lexer
.next() == TokRBracket
) {
302 if (lastToken
== TokComma
)
305 lastValue
= objectStack
.last();
306 objectStack
.removeLast();
310 stateStack
.append(DoParseArrayEndExpression
);
311 goto startParseExpression
;
313 case DoParseArrayEndExpression
: {
314 asArray(objectStack
.last())->push(m_exec
, lastValue
);
316 if (m_lexer
.currentToken().type
== TokComma
)
317 goto doParseArrayStartExpression
;
319 if (m_lexer
.currentToken().type
!= TokRBracket
)
323 lastValue
= objectStack
.last();
324 objectStack
.removeLast();
328 case StartParseObject
: {
329 JSObject
* object
= constructEmptyObject(m_exec
);
330 objectStack
.append(object
);
332 TokenType type
= m_lexer
.next();
333 if (type
== TokString
) {
334 Lexer::LiteralParserToken identifierToken
= m_lexer
.currentToken();
337 if (m_lexer
.next() != TokColon
)
341 identifierStack
.append(Identifier(m_exec
, identifierToken
.stringToken
));
342 stateStack
.append(DoParseObjectEndExpression
);
343 goto startParseExpression
;
344 } else if (type
!= TokRBrace
)
347 lastValue
= objectStack
.last();
348 objectStack
.removeLast();
351 doParseObjectStartExpression
:
352 case DoParseObjectStartExpression
: {
353 TokenType type
= m_lexer
.next();
354 if (type
!= TokString
)
356 Lexer::LiteralParserToken identifierToken
= m_lexer
.currentToken();
359 if (m_lexer
.next() != TokColon
)
363 identifierStack
.append(Identifier(m_exec
, identifierToken
.stringToken
));
364 stateStack
.append(DoParseObjectEndExpression
);
365 goto startParseExpression
;
367 case DoParseObjectEndExpression
:
369 asObject(objectStack
.last())->putDirect(identifierStack
.last(), lastValue
);
370 identifierStack
.removeLast();
371 if (m_lexer
.currentToken().type
== TokComma
)
372 goto doParseObjectStartExpression
;
373 if (m_lexer
.currentToken().type
!= TokRBrace
)
376 lastValue
= objectStack
.last();
377 objectStack
.removeLast();
380 startParseExpression
:
381 case StartParseExpression
: {
382 switch (m_lexer
.currentToken().type
) {
384 goto startParseArray
;
386 goto startParseObject
;
388 Lexer::LiteralParserToken stringToken
= m_lexer
.currentToken();
390 lastValue
= jsString(m_exec
, stringToken
.stringToken
);
394 Lexer::LiteralParserToken numberToken
= m_lexer
.currentToken();
396 lastValue
= jsNumber(m_exec
, numberToken
.numberToken
);
401 lastValue
= jsNull();
406 lastValue
= jsBoolean(true);
411 lastValue
= jsBoolean(false);
420 case StartParseStatement
: {
421 switch (m_lexer
.currentToken().type
) {
425 goto startParseExpression
;
429 stateStack
.append(StartParseStatementEndStatement
);
430 goto startParseExpression
;
436 case StartParseStatementEndStatement
: {
437 ASSERT(stateStack
.isEmpty());
438 if (m_lexer
.currentToken().type
!= TokRParen
)
440 if (m_lexer
.next() == TokEnd
)
445 ASSERT_NOT_REACHED();
447 if (stateStack
.isEmpty())
449 state
= stateStack
.last();
450 stateStack
.removeLast();