2 * Copyright (C) 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "LiteralParser.h"
32 #include "UStringBuilder.h"
33 #include <wtf/ASCIICType.h>
38 static inline bool isJSONWhiteSpace(const UChar
& c
)
40 // The JSON RFC 4627 defines a list of allowed characters to be considered
41 // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar).
42 return c
== ' ' || c
== 0x9 || c
== 0xA || c
== 0xD;
45 bool LiteralParser::tryJSONPParse(Vector
<JSONPData
>& results
, bool needsFullSourceInfo
)
47 if (m_lexer
.next() != TokIdentifier
)
50 Vector
<JSONPPathEntry
> path
;
51 // Unguarded next to start off the lexer
52 Identifier name
= Identifier(m_exec
, m_lexer
.currentToken().start
, m_lexer
.currentToken().end
- m_lexer
.currentToken().start
);
54 if (name
== m_exec
->globalData().propertyNames
->varKeyword
) {
55 if (m_lexer
.next() != TokIdentifier
)
57 entry
.m_type
= JSONPPathEntryTypeDeclare
;
58 entry
.m_pathEntryName
= Identifier(m_exec
, m_lexer
.currentToken().start
, m_lexer
.currentToken().end
- m_lexer
.currentToken().start
);
61 entry
.m_type
= JSONPPathEntryTypeDot
;
62 entry
.m_pathEntryName
= Identifier(m_exec
, m_lexer
.currentToken().start
, m_lexer
.currentToken().end
- m_lexer
.currentToken().start
);
65 if (m_exec
->globalData().lexer
->isKeyword(entry
.m_pathEntryName
))
67 TokenType tokenType
= m_lexer
.next();
68 while (tokenType
!= TokAssign
) {
71 entry
.m_type
= JSONPPathEntryTypeLookup
;
72 if (m_lexer
.next() != TokNumber
)
74 double doubleIndex
= m_lexer
.currentToken().numberToken
;
75 int index
= (int)doubleIndex
;
76 if (index
!= doubleIndex
|| index
< 0)
78 entry
.m_pathIndex
= index
;
79 if (m_lexer
.next() != TokRBracket
)
84 entry
.m_type
= JSONPPathEntryTypeDot
;
85 if (m_lexer
.next() != TokIdentifier
)
87 entry
.m_pathEntryName
= Identifier(m_exec
, m_lexer
.currentToken().start
, m_lexer
.currentToken().end
- m_lexer
.currentToken().start
);
91 if (path
.last().m_type
!= JSONPPathEntryTypeDot
|| needsFullSourceInfo
)
93 path
.last().m_type
= JSONPPathEntryTypeCall
;
101 tokenType
= m_lexer
.next();
105 results
.append(JSONPData());
106 results
.last().m_value
.set(m_exec
->globalData(), parse(StartParseExpression
));
107 if (!results
.last().m_value
)
109 results
.last().m_path
.swap(path
);
110 if (entry
.m_type
== JSONPPathEntryTypeCall
) {
111 if (m_lexer
.currentToken().type
!= TokRParen
)
115 if (m_lexer
.currentToken().type
!= TokSemi
)
118 } while (m_lexer
.currentToken().type
== TokIdentifier
);
119 return m_lexer
.currentToken().type
== TokEnd
;
122 ALWAYS_INLINE
const Identifier
LiteralParser::makeIdentifier(const UChar
* characters
, size_t length
)
125 return m_exec
->globalData().propertyNames
->emptyIdentifier
;
126 if (characters
[0] >= MaximumCachableCharacter
)
127 return Identifier(&m_exec
->globalData(), characters
, length
);
130 if (!m_shortIdentifiers
[characters
[0]].isNull())
131 return m_shortIdentifiers
[characters
[0]];
132 m_shortIdentifiers
[characters
[0]] = Identifier(&m_exec
->globalData(), characters
, length
);
133 return m_shortIdentifiers
[characters
[0]];
135 if (!m_recentIdentifiers
[characters
[0]].isNull() && Identifier::equal(m_recentIdentifiers
[characters
[0]].impl(), characters
, length
))
136 return m_recentIdentifiers
[characters
[0]];
137 m_recentIdentifiers
[characters
[0]] = Identifier(&m_exec
->globalData(), characters
, length
);
138 return m_recentIdentifiers
[characters
[0]];
141 template <LiteralParser::ParserMode mode
> LiteralParser::TokenType
LiteralParser::Lexer::lex(LiteralParserToken
& token
)
143 while (m_ptr
< m_end
&& isJSONWhiteSpace(*m_ptr
))
146 ASSERT(m_ptr
<= m_end
);
147 if (m_ptr
>= m_end
) {
149 token
.start
= token
.end
= m_ptr
;
152 token
.type
= TokError
;
156 token
.type
= TokLBracket
;
160 token
.type
= TokRBracket
;
164 token
.type
= TokLParen
;
168 token
.type
= TokRParen
;
172 token
.type
= TokLBrace
;
176 token
.type
= TokRBrace
;
180 token
.type
= TokComma
;
184 token
.type
= TokColon
;
188 return lexString
<mode
, '"'>(token
);
190 if (m_end
- m_ptr
>= 4 && m_ptr
[1] == 'r' && m_ptr
[2] == 'u' && m_ptr
[3] == 'e') {
192 token
.type
= TokTrue
;
198 if (m_end
- m_ptr
>= 5 && m_ptr
[1] == 'a' && m_ptr
[2] == 'l' && m_ptr
[3] == 's' && m_ptr
[4] == 'e') {
200 token
.type
= TokFalse
;
206 if (m_end
- m_ptr
>= 4 && m_ptr
[1] == 'u' && m_ptr
[2] == 'l' && m_ptr
[3] == 'l') {
208 token
.type
= TokNull
;
224 return lexNumber(token
);
233 token
.type
= TokAssign
;
238 token
.type
= TokSemi
;
242 if (isASCIIAlpha(*m_ptr
) || *m_ptr
== '_' || *m_ptr
== '$') {
243 while (m_ptr
< m_end
&& (isASCIIAlphanumeric(*m_ptr
) || *m_ptr
== '_' || *m_ptr
== '$'))
245 token
.stringToken
= token
.start
;
246 token
.stringLength
= m_ptr
- token
.start
;
247 token
.type
= TokIdentifier
;
249 return TokIdentifier
;
251 if (*m_ptr
== '\'') {
252 if (mode
== StrictJSON
)
254 return lexString
<mode
, '\''>(token
);
260 LiteralParser::TokenType
LiteralParser::Lexer::next()
262 if (m_mode
== NonStrictJSON
)
263 return lex
<NonStrictJSON
>(m_currentToken
);
265 return lex
<JSONP
>(m_currentToken
);
266 return lex
<StrictJSON
>(m_currentToken
);
269 template <LiteralParser::ParserMode mode
, UChar terminator
> static inline bool isSafeStringCharacter(UChar c
)
271 return (c
>= ' ' && (mode
== LiteralParser::StrictJSON
|| c
<= 0xff) && c
!= '\\' && c
!= terminator
) || c
== '\t';
274 // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
275 template <LiteralParser::ParserMode mode
, UChar terminator
> inline LiteralParser::TokenType
LiteralParser::Lexer::lexString(LiteralParserToken
& token
)
278 const UChar
* runStart
= m_ptr
;
279 UStringBuilder builder
;
282 while (m_ptr
< m_end
&& isSafeStringCharacter
<mode
, terminator
>(*m_ptr
))
284 if (builder
.length())
285 builder
.append(runStart
, m_ptr
- runStart
);
286 if ((mode
!= NonStrictJSON
) && m_ptr
< m_end
&& *m_ptr
== '\\') {
287 if (builder
.isEmpty() && runStart
< m_ptr
)
288 builder
.append(runStart
, m_ptr
- runStart
);
298 builder
.append('\\');
306 builder
.append('\b');
310 builder
.append('\f');
314 builder
.append('\n');
318 builder
.append('\r');
322 builder
.append('\t');
327 if ((m_end
- m_ptr
) < 5) // uNNNN == 5 characters
329 for (int i
= 1; i
< 5; i
++) {
330 if (!isASCIIHexDigit(m_ptr
[i
]))
333 builder
.append(JSC::Lexer::convertUnicode(m_ptr
[1], m_ptr
[2], m_ptr
[3], m_ptr
[4]));
338 if (*m_ptr
== '\'' && mode
!= StrictJSON
) {
339 builder
.append('\'');
346 } while ((mode
!= NonStrictJSON
) && m_ptr
!= runStart
&& (m_ptr
< m_end
) && *m_ptr
!= terminator
);
348 if (m_ptr
>= m_end
|| *m_ptr
!= terminator
)
351 if (builder
.isEmpty()) {
352 token
.stringBuffer
= UString();
353 token
.stringToken
= runStart
;
354 token
.stringLength
= m_ptr
- runStart
;
356 token
.stringBuffer
= builder
.toUString();
357 token
.stringToken
= token
.stringBuffer
.characters();
358 token
.stringLength
= token
.stringBuffer
.length();
360 token
.type
= TokString
;
365 LiteralParser::TokenType
LiteralParser::Lexer::lexNumber(LiteralParserToken
& token
)
367 // ES5 and json.org define numbers as
374 // -? digit1-9 digits?
379 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
381 if (m_ptr
< m_end
&& *m_ptr
== '-') // -?
385 if (m_ptr
< m_end
&& *m_ptr
== '0') // 0
387 else if (m_ptr
< m_end
&& *m_ptr
>= '1' && *m_ptr
<= '9') { // [1-9]
390 while (m_ptr
< m_end
&& isASCIIDigit(*m_ptr
))
396 if (m_ptr
< m_end
&& *m_ptr
== '.') {
399 if (m_ptr
>= m_end
|| !isASCIIDigit(*m_ptr
))
403 while (m_ptr
< m_end
&& isASCIIDigit(*m_ptr
))
405 } else if (m_ptr
< m_end
&& (*m_ptr
!= 'e' && *m_ptr
!= 'E') && (m_ptr
- token
.start
) < 10) {
407 token
.type
= TokNumber
;
409 const UChar
* digit
= token
.start
;
416 while (digit
< m_ptr
)
417 result
= result
* 10 + (*digit
++) - '0';
419 token
.numberToken
= result
;
423 // ([eE][+-]? [0-9]+)?
424 if (m_ptr
< m_end
&& (*m_ptr
== 'e' || *m_ptr
== 'E')) { // [eE]
428 if (m_ptr
< m_end
&& (*m_ptr
== '-' || *m_ptr
== '+'))
432 if (m_ptr
>= m_end
|| !isASCIIDigit(*m_ptr
))
436 while (m_ptr
< m_end
&& isASCIIDigit(*m_ptr
))
440 token
.type
= TokNumber
;
442 Vector
<char, 64> buffer(token
.end
- token
.start
+ 1);
444 for (i
= 0; i
< token
.end
- token
.start
; i
++) {
445 ASSERT(static_cast<char>(token
.start
[i
]) == token
.start
[i
]);
446 buffer
[i
] = static_cast<char>(token
.start
[i
]);
450 token
.numberToken
= WTF::strtod(buffer
.data(), &end
);
451 ASSERT(buffer
.data() + (token
.end
- token
.start
) == end
);
455 JSValue
LiteralParser::parse(ParserState initialState
)
457 ParserState state
= initialState
;
458 MarkedArgumentBuffer objectStack
;
460 Vector
<ParserState
, 16> stateStack
;
461 Vector
<Identifier
, 16> identifierStack
;
465 case StartParseArray
: {
466 JSArray
* array
= constructEmptyArray(m_exec
);
467 objectStack
.append(array
);
470 doParseArrayStartExpression
:
471 case DoParseArrayStartExpression
: {
472 TokenType lastToken
= m_lexer
.currentToken().type
;
473 if (m_lexer
.next() == TokRBracket
) {
474 if (lastToken
== TokComma
)
477 lastValue
= objectStack
.last();
478 objectStack
.removeLast();
482 stateStack
.append(DoParseArrayEndExpression
);
483 goto startParseExpression
;
485 case DoParseArrayEndExpression
: {
486 asArray(objectStack
.last())->push(m_exec
, lastValue
);
488 if (m_lexer
.currentToken().type
== TokComma
)
489 goto doParseArrayStartExpression
;
491 if (m_lexer
.currentToken().type
!= TokRBracket
)
495 lastValue
= objectStack
.last();
496 objectStack
.removeLast();
500 case StartParseObject
: {
501 JSObject
* object
= constructEmptyObject(m_exec
);
502 objectStack
.append(object
);
504 TokenType type
= m_lexer
.next();
505 if (type
== TokString
|| (m_mode
!= StrictJSON
&& type
== TokIdentifier
)) {
506 Lexer::LiteralParserToken identifierToken
= m_lexer
.currentToken();
509 if (m_lexer
.next() != TokColon
)
513 identifierStack
.append(makeIdentifier(identifierToken
.stringToken
, identifierToken
.stringLength
));
514 stateStack
.append(DoParseObjectEndExpression
);
515 goto startParseExpression
;
517 if (type
!= TokRBrace
)
520 lastValue
= objectStack
.last();
521 objectStack
.removeLast();
524 doParseObjectStartExpression
:
525 case DoParseObjectStartExpression
: {
526 TokenType type
= m_lexer
.next();
527 if (type
!= TokString
&& (m_mode
== StrictJSON
|| type
!= TokIdentifier
))
529 Lexer::LiteralParserToken identifierToken
= m_lexer
.currentToken();
532 if (m_lexer
.next() != TokColon
)
536 identifierStack
.append(makeIdentifier(identifierToken
.stringToken
, identifierToken
.stringLength
));
537 stateStack
.append(DoParseObjectEndExpression
);
538 goto startParseExpression
;
540 case DoParseObjectEndExpression
:
542 asObject(objectStack
.last())->putDirect(m_exec
->globalData(), identifierStack
.last(), lastValue
);
543 identifierStack
.removeLast();
544 if (m_lexer
.currentToken().type
== TokComma
)
545 goto doParseObjectStartExpression
;
546 if (m_lexer
.currentToken().type
!= TokRBrace
)
549 lastValue
= objectStack
.last();
550 objectStack
.removeLast();
553 startParseExpression
:
554 case StartParseExpression
: {
555 switch (m_lexer
.currentToken().type
) {
557 goto startParseArray
;
559 goto startParseObject
;
561 Lexer::LiteralParserToken stringToken
= m_lexer
.currentToken();
563 lastValue
= jsString(m_exec
, makeIdentifier(stringToken
.stringToken
, stringToken
.stringLength
).ustring());
567 Lexer::LiteralParserToken numberToken
= m_lexer
.currentToken();
569 lastValue
= jsNumber(numberToken
.numberToken
);
574 lastValue
= jsNull();
579 lastValue
= jsBoolean(true);
584 lastValue
= jsBoolean(false);
593 case StartParseStatement
: {
594 switch (m_lexer
.currentToken().type
) {
598 goto startParseExpression
;
602 stateStack
.append(StartParseStatementEndStatement
);
603 goto startParseExpression
;
609 case StartParseStatementEndStatement
: {
610 ASSERT(stateStack
.isEmpty());
611 if (m_lexer
.currentToken().type
!= TokRParen
)
613 if (m_lexer
.next() == TokEnd
)
618 ASSERT_NOT_REACHED();
620 if (stateStack
.isEmpty())
622 state
= stateStack
.last();
623 stateStack
.removeLast();