X-Git-Url: https://git.saurik.com/apple/javascriptcore.git/blobdiff_plain/ba379fdc102753d6be2c4d937058fe40257329fe..14957cd040308e3eeec43d26bae5d76da13fcd85:/runtime/LiteralParser.cpp diff --git a/runtime/LiteralParser.cpp b/runtime/LiteralParser.cpp index 798013a..ed42d0d 100644 --- a/runtime/LiteralParser.cpp +++ b/runtime/LiteralParser.cpp @@ -29,14 +29,118 @@ #include "JSArray.h" #include "JSString.h" #include "Lexer.h" +#include "UStringBuilder.h" #include #include namespace JSC { -LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) +static inline bool isJSONWhiteSpace(const UChar& c) { - while (m_ptr < m_end && isASCIISpace(*m_ptr)) + // The JSON RFC 4627 defines a list of allowed characters to be considered + // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar). + return c == ' ' || c == 0x9 || c == 0xA || c == 0xD; +} + +bool LiteralParser::tryJSONPParse(Vector& results, bool needsFullSourceInfo) +{ + if (m_lexer.next() != TokIdentifier) + return false; + do { + Vector path; + // Unguarded next to start off the lexer + Identifier name = Identifier(m_exec, m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start); + JSONPPathEntry entry; + if (name == m_exec->globalData().propertyNames->varKeyword) { + if (m_lexer.next() != TokIdentifier) + return false; + entry.m_type = JSONPPathEntryTypeDeclare; + entry.m_pathEntryName = Identifier(m_exec, m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start); + path.append(entry); + } else { + entry.m_type = JSONPPathEntryTypeDot; + entry.m_pathEntryName = Identifier(m_exec, m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start); + path.append(entry); + } + if (m_exec->globalData().lexer->isKeyword(entry.m_pathEntryName)) + return false; + TokenType tokenType = m_lexer.next(); + while (tokenType != TokAssign) { + switch (tokenType) { + case TokLBracket: { + entry.m_type = JSONPPathEntryTypeLookup; + if (m_lexer.next() != TokNumber) + return false; + double doubleIndex = m_lexer.currentToken().numberToken; + int index = (int)doubleIndex; + if (index != doubleIndex || index < 0) + return false; + entry.m_pathIndex = index; + if (m_lexer.next() != TokRBracket) + return false; + break; + } + case TokDot: { + entry.m_type = JSONPPathEntryTypeDot; + if (m_lexer.next() != TokIdentifier) + return false; + entry.m_pathEntryName = Identifier(m_exec, m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start); + break; + } + case TokLParen: { + if (path.last().m_type != JSONPPathEntryTypeDot || needsFullSourceInfo) + return false; + path.last().m_type = JSONPPathEntryTypeCall; + entry = path.last(); + goto startJSON; + } + default: + return false; + } + path.append(entry); + tokenType = m_lexer.next(); + } + startJSON: + m_lexer.next(); + results.append(JSONPData()); + results.last().m_value.set(m_exec->globalData(), parse(StartParseExpression)); + if (!results.last().m_value) + return false; + results.last().m_path.swap(path); + if (entry.m_type == JSONPPathEntryTypeCall) { + if (m_lexer.currentToken().type != TokRParen) + return false; + m_lexer.next(); + } + if (m_lexer.currentToken().type != TokSemi) + break; + m_lexer.next(); + } while (m_lexer.currentToken().type == TokIdentifier); + return m_lexer.currentToken().type == TokEnd; +} + +ALWAYS_INLINE const Identifier LiteralParser::makeIdentifier(const UChar* characters, size_t length) +{ + if (!length) + return m_exec->globalData().propertyNames->emptyIdentifier; + if (characters[0] >= MaximumCachableCharacter) + return Identifier(&m_exec->globalData(), characters, length); + + if (length == 1) { + if (!m_shortIdentifiers[characters[0]].isNull()) + return m_shortIdentifiers[characters[0]]; + m_shortIdentifiers[characters[0]] = Identifier(&m_exec->globalData(), characters, length); + return m_shortIdentifiers[characters[0]]; + } + if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length)) + return m_recentIdentifiers[characters[0]]; + m_recentIdentifiers[characters[0]] = Identifier(&m_exec->globalData(), characters, length); + return m_recentIdentifiers[characters[0]]; +} + +template LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) +{ + while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr)) ++m_ptr; ASSERT(m_ptr <= m_end); @@ -59,11 +163,11 @@ LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) case '(': token.type = TokLParen; token.end = ++m_ptr; - return TokLBracket; + return TokLParen; case ')': token.type = TokRParen; token.end = ++m_ptr; - return TokRBracket; + return TokRParen; case '{': token.type = TokLBrace; token.end = ++m_ptr; @@ -81,9 +185,7 @@ LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) token.end = ++m_ptr; return TokColon; case '"': - if (m_mode == StrictJSON) - return lexString(token); - return lexString(token); + return lexString(token); case 't': if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') { m_ptr += 4; @@ -107,7 +209,7 @@ LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) token.end = m_ptr; return TokNull; } - break; + break; case '-': case '0': case '1': @@ -121,60 +223,103 @@ LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) case '9': return lexNumber(token); } + if (m_ptr < m_end) { + if (*m_ptr == '.') { + token.type = TokDot; + token.end = ++m_ptr; + return TokDot; + } + if (*m_ptr == '=') { + token.type = TokAssign; + token.end = ++m_ptr; + return TokAssign; + } + if (*m_ptr == ';') { + token.type = TokSemi; + token.end = ++m_ptr; + return TokAssign; + } + if (isASCIIAlpha(*m_ptr) || *m_ptr == '_' || *m_ptr == '$') { + while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$')) + m_ptr++; + token.stringToken = token.start; + token.stringLength = m_ptr - token.start; + token.type = TokIdentifier; + token.end = m_ptr; + return TokIdentifier; + } + if (*m_ptr == '\'') { + if (mode == StrictJSON) + return TokError; + return lexString(token); + } + } return TokError; } -template static inline bool isSafeStringCharacter(UChar c) +LiteralParser::TokenType LiteralParser::Lexer::next() +{ + if (m_mode == NonStrictJSON) + return lex(m_currentToken); + if (m_mode == JSONP) + return lex(m_currentToken); + return lex(m_currentToken); +} + +template static inline bool isSafeStringCharacter(UChar c) { - return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t'; + return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != terminator) || c == '\t'; } -template LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token) +// "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions. +template inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token) { ++m_ptr; - const UChar* runStart; - token.stringToken = UString(); + const UChar* runStart = m_ptr; + UStringBuilder builder; do { runStart = m_ptr; - while (m_ptr < m_end && isSafeStringCharacter(*m_ptr)) + while (m_ptr < m_end && isSafeStringCharacter(*m_ptr)) ++m_ptr; - if (runStart < m_ptr) - token.stringToken.append(runStart, m_ptr - runStart); - if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') { + if (builder.length()) + builder.append(runStart, m_ptr - runStart); + if ((mode != NonStrictJSON) && m_ptr < m_end && *m_ptr == '\\') { + if (builder.isEmpty() && runStart < m_ptr) + builder.append(runStart, m_ptr - runStart); ++m_ptr; if (m_ptr >= m_end) return TokError; switch (*m_ptr) { case '"': - token.stringToken.append('"'); + builder.append('"'); m_ptr++; break; case '\\': - token.stringToken.append('\\'); + builder.append('\\'); m_ptr++; break; case '/': - token.stringToken.append('/'); + builder.append('/'); m_ptr++; break; case 'b': - token.stringToken.append('\b'); + builder.append('\b'); m_ptr++; break; case 'f': - token.stringToken.append('\f'); + builder.append('\f'); m_ptr++; break; case 'n': - token.stringToken.append('\n'); + builder.append('\n'); m_ptr++; break; case 'r': - token.stringToken.append('\r'); + builder.append('\r'); m_ptr++; break; case 't': - token.stringToken.append('\t'); + builder.append('\t'); m_ptr++; break; @@ -185,19 +330,33 @@ template LiteralParser::TokenType LiteralParser if (!isASCIIHexDigit(m_ptr[i])) return TokError; } - token.stringToken.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4])); + builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4])); m_ptr += 5; break; default: + if (*m_ptr == '\'' && mode != StrictJSON) { + builder.append('\''); + m_ptr++; + break; + } return TokError; } } - } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"'); + } while ((mode != NonStrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != terminator); - if (m_ptr >= m_end || *m_ptr != '"') + if (m_ptr >= m_end || *m_ptr != terminator) return TokError; + if (builder.isEmpty()) { + token.stringBuffer = UString(); + token.stringToken = runStart; + token.stringLength = m_ptr - runStart; + } else { + token.stringBuffer = builder.toUString(); + token.stringToken = token.stringBuffer.characters(); + token.stringLength = token.stringBuffer.length(); + } token.type = TokString; token.end = ++m_ptr; return TokString; @@ -243,6 +402,22 @@ LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& tok ++m_ptr; while (m_ptr < m_end && isASCIIDigit(*m_ptr)) ++m_ptr; + } else if (m_ptr < m_end && (*m_ptr != 'e' && *m_ptr != 'E') && (m_ptr - token.start) < 10) { + int result = 0; + token.type = TokNumber; + token.end = m_ptr; + const UChar* digit = token.start; + int negative = 1; + if (*digit == '-') { + negative = -1; + digit++; + } + + while (digit < m_ptr) + result = result * 10 + (*digit++) - '0'; + result *= negative; + token.numberToken = result; + return TokNumber; } // ([eE][+-]? [0-9]+)? @@ -294,7 +469,10 @@ JSValue LiteralParser::parse(ParserState initialState) } doParseArrayStartExpression: case DoParseArrayStartExpression: { + TokenType lastToken = m_lexer.currentToken().type; if (m_lexer.next() == TokRBracket) { + if (lastToken == TokComma) + return JSValue(); m_lexer.next(); lastValue = objectStack.last(); objectStack.removeLast(); @@ -324,7 +502,7 @@ JSValue LiteralParser::parse(ParserState initialState) objectStack.append(object); TokenType type = m_lexer.next(); - if (type == TokString) { + if (type == TokString || (m_mode != StrictJSON && type == TokIdentifier)) { Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); // Check for colon @@ -332,10 +510,11 @@ JSValue LiteralParser::parse(ParserState initialState) return JSValue(); m_lexer.next(); - identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); + identifierStack.append(makeIdentifier(identifierToken.stringToken, identifierToken.stringLength)); stateStack.append(DoParseObjectEndExpression); goto startParseExpression; - } else if (type != TokRBrace) + } + if (type != TokRBrace) return JSValue(); m_lexer.next(); lastValue = objectStack.last(); @@ -345,7 +524,7 @@ JSValue LiteralParser::parse(ParserState initialState) doParseObjectStartExpression: case DoParseObjectStartExpression: { TokenType type = m_lexer.next(); - if (type != TokString) + if (type != TokString && (m_mode == StrictJSON || type != TokIdentifier)) return JSValue(); Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); @@ -354,13 +533,13 @@ JSValue LiteralParser::parse(ParserState initialState) return JSValue(); m_lexer.next(); - identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); + identifierStack.append(makeIdentifier(identifierToken.stringToken, identifierToken.stringLength)); stateStack.append(DoParseObjectEndExpression); goto startParseExpression; } case DoParseObjectEndExpression: { - asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue); + asObject(objectStack.last())->putDirect(m_exec->globalData(), identifierStack.last(), lastValue); identifierStack.removeLast(); if (m_lexer.currentToken().type == TokComma) goto doParseObjectStartExpression; @@ -381,13 +560,13 @@ JSValue LiteralParser::parse(ParserState initialState) case TokString: { Lexer::LiteralParserToken stringToken = m_lexer.currentToken(); m_lexer.next(); - lastValue = jsString(m_exec, stringToken.stringToken); + lastValue = jsString(m_exec, makeIdentifier(stringToken.stringToken, stringToken.stringLength).ustring()); break; } case TokNumber: { Lexer::LiteralParserToken numberToken = m_lexer.currentToken(); m_lexer.next(); - lastValue = jsNumber(m_exec, numberToken.numberToken); + lastValue = jsNumber(numberToken.numberToken); break; } case TokNull: