X-Git-Url: https://git.saurik.com/apple/javascriptcore.git/blobdiff_plain/f9bf01c6616d5ddcf65b13b33cedf9e387ff7a63..14957cd040308e3eeec43d26bae5d76da13fcd85:/runtime/LiteralParser.cpp?ds=inline diff --git a/runtime/LiteralParser.cpp b/runtime/LiteralParser.cpp index aa1e5ed..ed42d0d 100644 --- a/runtime/LiteralParser.cpp +++ b/runtime/LiteralParser.cpp @@ -29,15 +29,118 @@ #include "JSArray.h" #include "JSString.h" #include "Lexer.h" -#include "StringBuilder.h" +#include "UStringBuilder.h" #include #include namespace JSC { -LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) +static inline bool isJSONWhiteSpace(const UChar& c) { - while (m_ptr < m_end && isASCIISpace(*m_ptr)) + // The JSON RFC 4627 defines a list of allowed characters to be considered + // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar). + return c == ' ' || c == 0x9 || c == 0xA || c == 0xD; +} + +bool LiteralParser::tryJSONPParse(Vector& results, bool needsFullSourceInfo) +{ + if (m_lexer.next() != TokIdentifier) + return false; + do { + Vector path; + // Unguarded next to start off the lexer + Identifier name = Identifier(m_exec, m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start); + JSONPPathEntry entry; + if (name == m_exec->globalData().propertyNames->varKeyword) { + if (m_lexer.next() != TokIdentifier) + return false; + entry.m_type = JSONPPathEntryTypeDeclare; + entry.m_pathEntryName = Identifier(m_exec, m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start); + path.append(entry); + } else { + entry.m_type = JSONPPathEntryTypeDot; + entry.m_pathEntryName = Identifier(m_exec, m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start); + path.append(entry); + } + if (m_exec->globalData().lexer->isKeyword(entry.m_pathEntryName)) + return false; + TokenType tokenType = m_lexer.next(); + while (tokenType != TokAssign) { + switch (tokenType) { + case TokLBracket: { + entry.m_type = JSONPPathEntryTypeLookup; + if (m_lexer.next() != TokNumber) + return false; + double doubleIndex = m_lexer.currentToken().numberToken; + int index = (int)doubleIndex; + if (index != doubleIndex || index < 0) + return false; + entry.m_pathIndex = index; + if (m_lexer.next() != TokRBracket) + return false; + break; + } + case TokDot: { + entry.m_type = JSONPPathEntryTypeDot; + if (m_lexer.next() != TokIdentifier) + return false; + entry.m_pathEntryName = Identifier(m_exec, m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start); + break; + } + case TokLParen: { + if (path.last().m_type != JSONPPathEntryTypeDot || needsFullSourceInfo) + return false; + path.last().m_type = JSONPPathEntryTypeCall; + entry = path.last(); + goto startJSON; + } + default: + return false; + } + path.append(entry); + tokenType = m_lexer.next(); + } + startJSON: + m_lexer.next(); + results.append(JSONPData()); + results.last().m_value.set(m_exec->globalData(), parse(StartParseExpression)); + if (!results.last().m_value) + return false; + results.last().m_path.swap(path); + if (entry.m_type == JSONPPathEntryTypeCall) { + if (m_lexer.currentToken().type != TokRParen) + return false; + m_lexer.next(); + } + if (m_lexer.currentToken().type != TokSemi) + break; + m_lexer.next(); + } while (m_lexer.currentToken().type == TokIdentifier); + return m_lexer.currentToken().type == TokEnd; +} + +ALWAYS_INLINE const Identifier LiteralParser::makeIdentifier(const UChar* characters, size_t length) +{ + if (!length) + return m_exec->globalData().propertyNames->emptyIdentifier; + if (characters[0] >= MaximumCachableCharacter) + return Identifier(&m_exec->globalData(), characters, length); + + if (length == 1) { + if (!m_shortIdentifiers[characters[0]].isNull()) + return m_shortIdentifiers[characters[0]]; + m_shortIdentifiers[characters[0]] = Identifier(&m_exec->globalData(), characters, length); + return m_shortIdentifiers[characters[0]]; + } + if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length)) + return m_recentIdentifiers[characters[0]]; + m_recentIdentifiers[characters[0]] = Identifier(&m_exec->globalData(), characters, length); + return m_recentIdentifiers[characters[0]]; +} + +template LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) +{ + while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr)) ++m_ptr; ASSERT(m_ptr <= m_end); @@ -60,11 +163,11 @@ LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) case '(': token.type = TokLParen; token.end = ++m_ptr; - return TokLBracket; + return TokLParen; case ')': token.type = TokRParen; token.end = ++m_ptr; - return TokRBracket; + return TokRParen; case '{': token.type = TokLBrace; token.end = ++m_ptr; @@ -82,9 +185,7 @@ LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) token.end = ++m_ptr; return TokColon; case '"': - if (m_mode == StrictJSON) - return lexString(token); - return lexString(token); + return lexString(token); case 't': if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') { m_ptr += 4; @@ -108,7 +209,7 @@ LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) token.end = m_ptr; return TokNull; } - break; + break; case '-': case '0': case '1': @@ -122,27 +223,69 @@ LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) case '9': return lexNumber(token); } + if (m_ptr < m_end) { + if (*m_ptr == '.') { + token.type = TokDot; + token.end = ++m_ptr; + return TokDot; + } + if (*m_ptr == '=') { + token.type = TokAssign; + token.end = ++m_ptr; + return TokAssign; + } + if (*m_ptr == ';') { + token.type = TokSemi; + token.end = ++m_ptr; + return TokAssign; + } + if (isASCIIAlpha(*m_ptr) || *m_ptr == '_' || *m_ptr == '$') { + while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$')) + m_ptr++; + token.stringToken = token.start; + token.stringLength = m_ptr - token.start; + token.type = TokIdentifier; + token.end = m_ptr; + return TokIdentifier; + } + if (*m_ptr == '\'') { + if (mode == StrictJSON) + return TokError; + return lexString(token); + } + } return TokError; } -template static inline bool isSafeStringCharacter(UChar c) +LiteralParser::TokenType LiteralParser::Lexer::next() +{ + if (m_mode == NonStrictJSON) + return lex(m_currentToken); + if (m_mode == JSONP) + return lex(m_currentToken); + return lex(m_currentToken); +} + +template static inline bool isSafeStringCharacter(UChar c) { - return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t'; + return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != terminator) || c == '\t'; } // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions. -template inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token) +template inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token) { ++m_ptr; - const UChar* runStart; - StringBuilder builder; + const UChar* runStart = m_ptr; + UStringBuilder builder; do { runStart = m_ptr; - while (m_ptr < m_end && isSafeStringCharacter(*m_ptr)) + while (m_ptr < m_end && isSafeStringCharacter(*m_ptr)) ++m_ptr; - if (runStart < m_ptr) + if (builder.length()) builder.append(runStart, m_ptr - runStart); - if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') { + if ((mode != NonStrictJSON) && m_ptr < m_end && *m_ptr == '\\') { + if (builder.isEmpty() && runStart < m_ptr) + builder.append(runStart, m_ptr - runStart); ++m_ptr; if (m_ptr >= m_end) return TokError; @@ -192,15 +335,28 @@ template inline LiteralParser::TokenType Litera break; default: + if (*m_ptr == '\'' && mode != StrictJSON) { + builder.append('\''); + m_ptr++; + break; + } return TokError; } } - } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"'); + } while ((mode != NonStrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != terminator); - if (m_ptr >= m_end || *m_ptr != '"') + if (m_ptr >= m_end || *m_ptr != terminator) return TokError; - token.stringToken = builder.release(); + if (builder.isEmpty()) { + token.stringBuffer = UString(); + token.stringToken = runStart; + token.stringLength = m_ptr - runStart; + } else { + token.stringBuffer = builder.toUString(); + token.stringToken = token.stringBuffer.characters(); + token.stringLength = token.stringBuffer.length(); + } token.type = TokString; token.end = ++m_ptr; return TokString; @@ -246,6 +402,22 @@ LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& tok ++m_ptr; while (m_ptr < m_end && isASCIIDigit(*m_ptr)) ++m_ptr; + } else if (m_ptr < m_end && (*m_ptr != 'e' && *m_ptr != 'E') && (m_ptr - token.start) < 10) { + int result = 0; + token.type = TokNumber; + token.end = m_ptr; + const UChar* digit = token.start; + int negative = 1; + if (*digit == '-') { + negative = -1; + digit++; + } + + while (digit < m_ptr) + result = result * 10 + (*digit++) - '0'; + result *= negative; + token.numberToken = result; + return TokNumber; } // ([eE][+-]? [0-9]+)? @@ -330,7 +502,7 @@ JSValue LiteralParser::parse(ParserState initialState) objectStack.append(object); TokenType type = m_lexer.next(); - if (type == TokString) { + if (type == TokString || (m_mode != StrictJSON && type == TokIdentifier)) { Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); // Check for colon @@ -338,10 +510,11 @@ JSValue LiteralParser::parse(ParserState initialState) return JSValue(); m_lexer.next(); - identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); + identifierStack.append(makeIdentifier(identifierToken.stringToken, identifierToken.stringLength)); stateStack.append(DoParseObjectEndExpression); goto startParseExpression; - } else if (type != TokRBrace) + } + if (type != TokRBrace) return JSValue(); m_lexer.next(); lastValue = objectStack.last(); @@ -351,7 +524,7 @@ JSValue LiteralParser::parse(ParserState initialState) doParseObjectStartExpression: case DoParseObjectStartExpression: { TokenType type = m_lexer.next(); - if (type != TokString) + if (type != TokString && (m_mode == StrictJSON || type != TokIdentifier)) return JSValue(); Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); @@ -360,13 +533,13 @@ JSValue LiteralParser::parse(ParserState initialState) return JSValue(); m_lexer.next(); - identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); + identifierStack.append(makeIdentifier(identifierToken.stringToken, identifierToken.stringLength)); stateStack.append(DoParseObjectEndExpression); goto startParseExpression; } case DoParseObjectEndExpression: { - asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue); + asObject(objectStack.last())->putDirect(m_exec->globalData(), identifierStack.last(), lastValue); identifierStack.removeLast(); if (m_lexer.currentToken().type == TokComma) goto doParseObjectStartExpression; @@ -387,13 +560,13 @@ JSValue LiteralParser::parse(ParserState initialState) case TokString: { Lexer::LiteralParserToken stringToken = m_lexer.currentToken(); m_lexer.next(); - lastValue = jsString(m_exec, stringToken.stringToken); + lastValue = jsString(m_exec, makeIdentifier(stringToken.stringToken, stringToken.stringLength).ustring()); break; } case TokNumber: { Lexer::LiteralParserToken numberToken = m_lexer.currentToken(); m_lexer.next(); - lastValue = jsNumber(m_exec, numberToken.numberToken); + lastValue = jsNumber(numberToken.numberToken); break; } case TokNull: