2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "LiteralParser.h"
33 #include "StrongInlines.h"
34 #include "UStringBuilder.h"
35 #include <wtf/ASCIICType.h>
40 template <typename CharType
>
41 static inline bool isJSONWhiteSpace(const CharType
& c
)
43 // The JSON RFC 4627 defines a list of allowed characters to be considered
44 // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar).
45 return c
== ' ' || c
== 0x9 || c
== 0xA || c
== 0xD;
48 template <typename CharType
>
49 bool LiteralParser
<CharType
>::tryJSONPParse(Vector
<JSONPData
>& results
, bool needsFullSourceInfo
)
51 if (m_lexer
.next() != TokIdentifier
)
54 Vector
<JSONPPathEntry
> path
;
55 // Unguarded next to start off the lexer
56 Identifier name
= Identifier(&m_exec
->globalData(), m_lexer
.currentToken().start
, m_lexer
.currentToken().end
- m_lexer
.currentToken().start
);
58 if (name
== m_exec
->globalData().propertyNames
->varKeyword
) {
59 if (m_lexer
.next() != TokIdentifier
)
61 entry
.m_type
= JSONPPathEntryTypeDeclare
;
62 entry
.m_pathEntryName
= Identifier(&m_exec
->globalData(), m_lexer
.currentToken().start
, m_lexer
.currentToken().end
- m_lexer
.currentToken().start
);
65 entry
.m_type
= JSONPPathEntryTypeDot
;
66 entry
.m_pathEntryName
= Identifier(&m_exec
->globalData(), m_lexer
.currentToken().start
, m_lexer
.currentToken().end
- m_lexer
.currentToken().start
);
69 if (m_exec
->globalData().keywords
->isKeyword(entry
.m_pathEntryName
))
71 TokenType tokenType
= m_lexer
.next();
72 while (tokenType
!= TokAssign
) {
75 entry
.m_type
= JSONPPathEntryTypeLookup
;
76 if (m_lexer
.next() != TokNumber
)
78 double doubleIndex
= m_lexer
.currentToken().numberToken
;
79 int index
= (int)doubleIndex
;
80 if (index
!= doubleIndex
|| index
< 0)
82 entry
.m_pathIndex
= index
;
83 if (m_lexer
.next() != TokRBracket
)
88 entry
.m_type
= JSONPPathEntryTypeDot
;
89 if (m_lexer
.next() != TokIdentifier
)
91 entry
.m_pathEntryName
= Identifier(&m_exec
->globalData(), m_lexer
.currentToken().start
, m_lexer
.currentToken().end
- m_lexer
.currentToken().start
);
95 if (path
.last().m_type
!= JSONPPathEntryTypeDot
|| needsFullSourceInfo
)
97 path
.last().m_type
= JSONPPathEntryTypeCall
;
105 tokenType
= m_lexer
.next();
109 results
.append(JSONPData());
110 results
.last().m_value
.set(m_exec
->globalData(), parse(StartParseExpression
));
111 if (!results
.last().m_value
)
113 results
.last().m_path
.swap(path
);
114 if (entry
.m_type
== JSONPPathEntryTypeCall
) {
115 if (m_lexer
.currentToken().type
!= TokRParen
)
119 if (m_lexer
.currentToken().type
!= TokSemi
)
122 } while (m_lexer
.currentToken().type
== TokIdentifier
);
123 return m_lexer
.currentToken().type
== TokEnd
;
126 template <typename CharType
>
127 ALWAYS_INLINE
const Identifier LiteralParser
<CharType
>::makeIdentifier(const LChar
* characters
, size_t length
)
130 return m_exec
->globalData().propertyNames
->emptyIdentifier
;
131 if (characters
[0] >= MaximumCachableCharacter
)
132 return Identifier(&m_exec
->globalData(), characters
, length
);
135 if (!m_shortIdentifiers
[characters
[0]].isNull())
136 return m_shortIdentifiers
[characters
[0]];
137 m_shortIdentifiers
[characters
[0]] = Identifier(&m_exec
->globalData(), characters
, length
);
138 return m_shortIdentifiers
[characters
[0]];
140 if (!m_recentIdentifiers
[characters
[0]].isNull() && Identifier::equal(m_recentIdentifiers
[characters
[0]].impl(), characters
, length
))
141 return m_recentIdentifiers
[characters
[0]];
142 m_recentIdentifiers
[characters
[0]] = Identifier(&m_exec
->globalData(), characters
, length
);
143 return m_recentIdentifiers
[characters
[0]];
146 template <typename CharType
>
147 ALWAYS_INLINE
const Identifier LiteralParser
<CharType
>::makeIdentifier(const UChar
* characters
, size_t length
)
150 return m_exec
->globalData().propertyNames
->emptyIdentifier
;
151 if (characters
[0] >= MaximumCachableCharacter
)
152 return Identifier(&m_exec
->globalData(), characters
, length
);
155 if (!m_shortIdentifiers
[characters
[0]].isNull())
156 return m_shortIdentifiers
[characters
[0]];
157 m_shortIdentifiers
[characters
[0]] = Identifier(&m_exec
->globalData(), characters
, length
);
158 return m_shortIdentifiers
[characters
[0]];
160 if (!m_recentIdentifiers
[characters
[0]].isNull() && Identifier::equal(m_recentIdentifiers
[characters
[0]].impl(), characters
, length
))
161 return m_recentIdentifiers
[characters
[0]];
162 m_recentIdentifiers
[characters
[0]] = Identifier(&m_exec
->globalData(), characters
, length
);
163 return m_recentIdentifiers
[characters
[0]];
166 template <typename CharType
>
167 template <ParserMode mode
> TokenType LiteralParser
<CharType
>::Lexer::lex(LiteralParserToken
<CharType
>& token
)
169 while (m_ptr
< m_end
&& isJSONWhiteSpace(*m_ptr
))
172 ASSERT(m_ptr
<= m_end
);
173 if (m_ptr
>= m_end
) {
175 token
.start
= token
.end
= m_ptr
;
178 token
.type
= TokError
;
182 token
.type
= TokLBracket
;
186 token
.type
= TokRBracket
;
190 token
.type
= TokLParen
;
194 token
.type
= TokRParen
;
198 token
.type
= TokLBrace
;
202 token
.type
= TokRBrace
;
206 token
.type
= TokComma
;
210 token
.type
= TokColon
;
214 return lexString
<mode
, '"'>(token
);
216 if (m_end
- m_ptr
>= 4 && m_ptr
[1] == 'r' && m_ptr
[2] == 'u' && m_ptr
[3] == 'e') {
218 token
.type
= TokTrue
;
224 if (m_end
- m_ptr
>= 5 && m_ptr
[1] == 'a' && m_ptr
[2] == 'l' && m_ptr
[3] == 's' && m_ptr
[4] == 'e') {
226 token
.type
= TokFalse
;
232 if (m_end
- m_ptr
>= 4 && m_ptr
[1] == 'u' && m_ptr
[2] == 'l' && m_ptr
[3] == 'l') {
234 token
.type
= TokNull
;
250 return lexNumber(token
);
259 token
.type
= TokAssign
;
264 token
.type
= TokSemi
;
268 if (isASCIIAlpha(*m_ptr
) || *m_ptr
== '_' || *m_ptr
== '$')
269 return lexIdentifier(token
);
270 if (*m_ptr
== '\'') {
271 if (mode
== StrictJSON
) {
272 m_lexErrorMessage
= "Single quotes (\') are not allowed in JSON";
275 return lexString
<mode
, '\''>(token
);
278 m_lexErrorMessage
= String::format("Unrecognized token '%c'", *m_ptr
).impl();
283 ALWAYS_INLINE TokenType LiteralParser
<LChar
>::Lexer::lexIdentifier(LiteralParserToken
<LChar
>& token
)
285 while (m_ptr
< m_end
&& (isASCIIAlphanumeric(*m_ptr
) || *m_ptr
== '_' || *m_ptr
== '$'))
287 token
.stringIs8Bit
= 1;
288 token
.stringToken8
= token
.start
;
289 token
.stringLength
= m_ptr
- token
.start
;
290 token
.type
= TokIdentifier
;
292 return TokIdentifier
;
296 ALWAYS_INLINE TokenType LiteralParser
<UChar
>::Lexer::lexIdentifier(LiteralParserToken
<UChar
>& token
)
298 while (m_ptr
< m_end
&& (isASCIIAlphanumeric(*m_ptr
) || *m_ptr
== '_' || *m_ptr
== '$' || *m_ptr
== 0x200C || *m_ptr
== 0x200D))
300 token
.stringIs8Bit
= 0;
301 token
.stringToken16
= token
.start
;
302 token
.stringLength
= m_ptr
- token
.start
;
303 token
.type
= TokIdentifier
;
305 return TokIdentifier
;
308 template <typename CharType
>
309 TokenType LiteralParser
<CharType
>::Lexer::next()
311 if (m_mode
== NonStrictJSON
)
312 return lex
<NonStrictJSON
>(m_currentToken
);
314 return lex
<JSONP
>(m_currentToken
);
315 return lex
<StrictJSON
>(m_currentToken
);
319 ALWAYS_INLINE
void setParserTokenString
<LChar
>(LiteralParserToken
<LChar
>& token
, const LChar
* string
)
321 token
.stringIs8Bit
= 1;
322 token
.stringToken8
= string
;
326 ALWAYS_INLINE
void setParserTokenString
<UChar
>(LiteralParserToken
<UChar
>& token
, const UChar
* string
)
328 token
.stringIs8Bit
= 0;
329 token
.stringToken16
= string
;
332 template <ParserMode mode
, typename CharType
, LChar terminator
> static inline bool isSafeStringCharacter(LChar c
)
334 return (c
>= ' ' && c
!= '\\' && c
!= terminator
) || (c
== '\t' && mode
!= StrictJSON
);
337 template <ParserMode mode
, typename CharType
, UChar terminator
> static inline bool isSafeStringCharacter(UChar c
)
339 return (c
>= ' ' && (mode
== StrictJSON
|| c
<= 0xff) && c
!= '\\' && c
!= terminator
) || (c
== '\t' && mode
!= StrictJSON
);
342 template <typename CharType
>
343 template <ParserMode mode
, char terminator
> ALWAYS_INLINE TokenType LiteralParser
<CharType
>::Lexer::lexString(LiteralParserToken
<CharType
>& token
)
346 const CharType
* runStart
= m_ptr
;
347 UStringBuilder builder
;
350 while (m_ptr
< m_end
&& isSafeStringCharacter
<mode
, CharType
, terminator
>(*m_ptr
))
352 if (builder
.length())
353 builder
.append(runStart
, m_ptr
- runStart
);
354 if ((mode
!= NonStrictJSON
) && m_ptr
< m_end
&& *m_ptr
== '\\') {
355 if (builder
.isEmpty() && runStart
< m_ptr
)
356 builder
.append(runStart
, m_ptr
- runStart
);
358 if (m_ptr
>= m_end
) {
359 m_lexErrorMessage
= "Unterminated string";
368 builder
.append('\\');
376 builder
.append('\b');
380 builder
.append('\f');
384 builder
.append('\n');
388 builder
.append('\r');
392 builder
.append('\t');
397 if ((m_end
- m_ptr
) < 5) {
398 m_lexErrorMessage
= "\\u must be followed by 4 hex digits";
400 } // uNNNN == 5 characters
401 for (int i
= 1; i
< 5; i
++) {
402 if (!isASCIIHexDigit(m_ptr
[i
])) {
403 m_lexErrorMessage
= String::format("\"\\%s\" is not a valid unicode escape", UString(m_ptr
, 5).ascii().data()).impl();
407 builder
.append(JSC::Lexer
<CharType
>::convertUnicode(m_ptr
[1], m_ptr
[2], m_ptr
[3], m_ptr
[4]));
412 if (*m_ptr
== '\'' && mode
!= StrictJSON
) {
413 builder
.append('\'');
417 m_lexErrorMessage
= String::format("Invalid escape character %c", *m_ptr
).impl();
421 } while ((mode
!= NonStrictJSON
) && m_ptr
!= runStart
&& (m_ptr
< m_end
) && *m_ptr
!= terminator
);
423 if (m_ptr
>= m_end
|| *m_ptr
!= terminator
) {
424 m_lexErrorMessage
= "Unterminated string";
428 if (builder
.isEmpty()) {
429 token
.stringBuffer
= UString();
430 setParserTokenString
<CharType
>(token
, runStart
);
431 token
.stringLength
= m_ptr
- runStart
;
433 token
.stringBuffer
= builder
.toUString();
434 if (token
.stringBuffer
.is8Bit()) {
435 token
.stringIs8Bit
= 1;
436 token
.stringToken8
= token
.stringBuffer
.characters8();
438 token
.stringIs8Bit
= 0;
439 token
.stringToken16
= token
.stringBuffer
.characters16();
441 token
.stringLength
= token
.stringBuffer
.length();
443 token
.type
= TokString
;
448 template <typename CharType
>
449 TokenType LiteralParser
<CharType
>::Lexer::lexNumber(LiteralParserToken
<CharType
>& token
)
451 // ES5 and json.org define numbers as
458 // -? digit1-9 digits?
463 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
465 if (m_ptr
< m_end
&& *m_ptr
== '-') // -?
469 if (m_ptr
< m_end
&& *m_ptr
== '0') // 0
471 else if (m_ptr
< m_end
&& *m_ptr
>= '1' && *m_ptr
<= '9') { // [1-9]
474 while (m_ptr
< m_end
&& isASCIIDigit(*m_ptr
))
477 m_lexErrorMessage
= "Invalid number";
482 if (m_ptr
< m_end
&& *m_ptr
== '.') {
485 if (m_ptr
>= m_end
|| !isASCIIDigit(*m_ptr
)) {
486 m_lexErrorMessage
= "Invalid digits after decimal point";
491 while (m_ptr
< m_end
&& isASCIIDigit(*m_ptr
))
493 } else if (m_ptr
< m_end
&& (*m_ptr
!= 'e' && *m_ptr
!= 'E') && (m_ptr
- token
.start
) < 10) {
495 token
.type
= TokNumber
;
497 const CharType
* digit
= token
.start
;
504 while (digit
< m_ptr
)
505 result
= result
* 10 + (*digit
++) - '0';
507 token
.numberToken
= result
;
511 // ([eE][+-]? [0-9]+)?
512 if (m_ptr
< m_end
&& (*m_ptr
== 'e' || *m_ptr
== 'E')) { // [eE]
516 if (m_ptr
< m_end
&& (*m_ptr
== '-' || *m_ptr
== '+'))
520 if (m_ptr
>= m_end
|| !isASCIIDigit(*m_ptr
)) {
521 m_lexErrorMessage
= "Exponent symbols should be followed by an optional '+' or '-' and then by at least one number";
526 while (m_ptr
< m_end
&& isASCIIDigit(*m_ptr
))
530 token
.type
= TokNumber
;
533 token
.numberToken
= parseDouble(token
.start
, token
.end
- token
.start
, parsedLength
);
537 template <typename CharType
>
538 JSValue LiteralParser
<CharType
>::parse(ParserState initialState
)
540 ParserState state
= initialState
;
541 MarkedArgumentBuffer objectStack
;
543 Vector
<ParserState
, 16> stateStack
;
544 Vector
<Identifier
, 16> identifierStack
;
548 case StartParseArray
: {
549 JSArray
* array
= constructEmptyArray(m_exec
);
550 objectStack
.append(array
);
553 doParseArrayStartExpression
:
554 case DoParseArrayStartExpression
: {
555 TokenType lastToken
= m_lexer
.currentToken().type
;
556 if (m_lexer
.next() == TokRBracket
) {
557 if (lastToken
== TokComma
) {
558 m_parseErrorMessage
= "Unexpected comma at the end of array expression";
562 lastValue
= objectStack
.last();
563 objectStack
.removeLast();
567 stateStack
.append(DoParseArrayEndExpression
);
568 goto startParseExpression
;
570 case DoParseArrayEndExpression
: {
571 asArray(objectStack
.last())->push(m_exec
, lastValue
);
573 if (m_lexer
.currentToken().type
== TokComma
)
574 goto doParseArrayStartExpression
;
576 if (m_lexer
.currentToken().type
!= TokRBracket
) {
577 m_parseErrorMessage
= "Expected ']'";
582 lastValue
= objectStack
.last();
583 objectStack
.removeLast();
587 case StartParseObject
: {
588 JSObject
* object
= constructEmptyObject(m_exec
);
589 objectStack
.append(object
);
591 TokenType type
= m_lexer
.next();
592 if (type
== TokString
|| (m_mode
!= StrictJSON
&& type
== TokIdentifier
)) {
593 LiteralParserToken
<CharType
> identifierToken
= m_lexer
.currentToken();
596 if (m_lexer
.next() != TokColon
) {
597 m_parseErrorMessage
= "Expected ':' before value in object property definition";
602 if (identifierToken
.stringIs8Bit
)
603 identifierStack
.append(makeIdentifier(identifierToken
.stringToken8
, identifierToken
.stringLength
));
605 identifierStack
.append(makeIdentifier(identifierToken
.stringToken16
, identifierToken
.stringLength
));
606 stateStack
.append(DoParseObjectEndExpression
);
607 goto startParseExpression
;
609 if (type
!= TokRBrace
) {
610 m_parseErrorMessage
= "Expected '}'";
614 lastValue
= objectStack
.last();
615 objectStack
.removeLast();
618 doParseObjectStartExpression
:
619 case DoParseObjectStartExpression
: {
620 TokenType type
= m_lexer
.next();
621 if (type
!= TokString
&& (m_mode
== StrictJSON
|| type
!= TokIdentifier
)) {
622 m_parseErrorMessage
= "Property name must be a string literal";
625 LiteralParserToken
<CharType
> identifierToken
= m_lexer
.currentToken();
628 if (m_lexer
.next() != TokColon
) {
629 m_parseErrorMessage
= "Expected ':'";
634 if (identifierToken
.stringIs8Bit
)
635 identifierStack
.append(makeIdentifier(identifierToken
.stringToken8
, identifierToken
.stringLength
));
637 identifierStack
.append(makeIdentifier(identifierToken
.stringToken16
, identifierToken
.stringLength
));
638 stateStack
.append(DoParseObjectEndExpression
);
639 goto startParseExpression
;
641 case DoParseObjectEndExpression
:
643 asObject(objectStack
.last())->putDirect(m_exec
->globalData(), identifierStack
.last(), lastValue
);
644 identifierStack
.removeLast();
645 if (m_lexer
.currentToken().type
== TokComma
)
646 goto doParseObjectStartExpression
;
647 if (m_lexer
.currentToken().type
!= TokRBrace
) {
648 m_parseErrorMessage
= "Expected '}'";
652 lastValue
= objectStack
.last();
653 objectStack
.removeLast();
656 startParseExpression
:
657 case StartParseExpression
: {
658 switch (m_lexer
.currentToken().type
) {
660 goto startParseArray
;
662 goto startParseObject
;
664 LiteralParserToken
<CharType
> stringToken
= m_lexer
.currentToken();
666 if (stringToken
.stringIs8Bit
)
667 lastValue
= jsString(m_exec
, makeIdentifier(stringToken
.stringToken8
, stringToken
.stringLength
).ustring());
669 lastValue
= jsString(m_exec
, makeIdentifier(stringToken
.stringToken16
, stringToken
.stringLength
).ustring());
673 LiteralParserToken
<CharType
> numberToken
= m_lexer
.currentToken();
675 lastValue
= jsNumber(numberToken
.numberToken
);
680 lastValue
= jsNull();
685 lastValue
= jsBoolean(true);
690 lastValue
= jsBoolean(false);
693 m_parseErrorMessage
= "Unexpected token ']'";
696 m_parseErrorMessage
= "Unexpected token '}'";
698 case TokIdentifier
: {
699 const LiteralParserToken
<CharType
>& token
= m_lexer
.currentToken();
700 if (token
.stringIs8Bit
)
701 m_parseErrorMessage
= String::format("Unexpected identifier \"%s\"", UString(m_lexer
.currentToken().stringToken8
, m_lexer
.currentToken().stringLength
).ascii().data()).impl();
703 m_parseErrorMessage
= String::format("Unexpected identifier \"%s\"", UString(m_lexer
.currentToken().stringToken16
, m_lexer
.currentToken().stringLength
).ascii().data()).impl();
707 m_parseErrorMessage
= "Unexpected token ':'";
710 m_parseErrorMessage
= "Unexpected token '('";
713 m_parseErrorMessage
= "Unexpected token ')'";
716 m_parseErrorMessage
= "Unexpected token ','";
719 m_parseErrorMessage
= "Unexpected token '.'";
722 m_parseErrorMessage
= "Unexpected token '='";
725 m_parseErrorMessage
= "Unexpected token ';'";
728 m_parseErrorMessage
= "Unexpected EOF";
733 m_parseErrorMessage
= "Could not parse value expression";
738 case StartParseStatement
: {
739 switch (m_lexer
.currentToken().type
) {
743 goto startParseExpression
;
747 stateStack
.append(StartParseStatementEndStatement
);
748 goto startParseExpression
;
751 m_parseErrorMessage
= "Unexpected token ']'";
754 m_parseErrorMessage
= "Unexpected token '{'";
757 m_parseErrorMessage
= "Unexpected token '}'";
760 m_parseErrorMessage
= "Unexpected identifier";
763 m_parseErrorMessage
= "Unexpected token ':'";
766 m_parseErrorMessage
= "Unexpected token ')'";
769 m_parseErrorMessage
= "Unexpected token ','";
772 m_parseErrorMessage
= "Unexpected token 'true'";
775 m_parseErrorMessage
= "Unexpected token 'false'";
778 m_parseErrorMessage
= "Unexpected token 'null'";
781 m_parseErrorMessage
= "Unexpected EOF";
784 m_parseErrorMessage
= "Unexpected token '.'";
787 m_parseErrorMessage
= "Unexpected token '='";
790 m_parseErrorMessage
= "Unexpected token ';'";
794 m_parseErrorMessage
= "Could not parse statement";
798 case StartParseStatementEndStatement
: {
799 ASSERT(stateStack
.isEmpty());
800 if (m_lexer
.currentToken().type
!= TokRParen
)
802 if (m_lexer
.next() == TokEnd
)
804 m_parseErrorMessage
= "Unexpected content at end of JSON literal";
808 ASSERT_NOT_REACHED();
810 if (stateStack
.isEmpty())
812 state
= stateStack
.last();
813 stateStack
.removeLast();
818 // Instantiate the two flavors of LiteralParser we need instead of putting most of this file in LiteralParser.h
819 template class LiteralParser
<LChar
>;
820 template class LiteralParser
<UChar
>;