]> git.saurik.com Git - apple/javascriptcore.git/blob - runtime/LiteralParser.cpp
JavaScriptCore-903.5.tar.gz
[apple/javascriptcore.git] / runtime / LiteralParser.cpp
1 /*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "config.h"
27 #include "LiteralParser.h"
28
29 #include "JSArray.h"
30 #include "JSString.h"
31 #include "Lexer.h"
32 #include "UStringBuilder.h"
33 #include <wtf/ASCIICType.h>
34 #include <wtf/dtoa.h>
35
36 namespace JSC {
37
38 static inline bool isJSONWhiteSpace(const UChar& c)
39 {
40 // The JSON RFC 4627 defines a list of allowed characters to be considered
41 // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar).
42 return c == ' ' || c == 0x9 || c == 0xA || c == 0xD;
43 }
44
45 bool LiteralParser::tryJSONPParse(Vector<JSONPData>& results, bool needsFullSourceInfo)
46 {
47 if (m_lexer.next() != TokIdentifier)
48 return false;
49 do {
50 Vector<JSONPPathEntry> path;
51 // Unguarded next to start off the lexer
52 Identifier name = Identifier(m_exec, m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start);
53 JSONPPathEntry entry;
54 if (name == m_exec->globalData().propertyNames->varKeyword) {
55 if (m_lexer.next() != TokIdentifier)
56 return false;
57 entry.m_type = JSONPPathEntryTypeDeclare;
58 entry.m_pathEntryName = Identifier(m_exec, m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start);
59 path.append(entry);
60 } else {
61 entry.m_type = JSONPPathEntryTypeDot;
62 entry.m_pathEntryName = Identifier(m_exec, m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start);
63 path.append(entry);
64 }
65 if (m_exec->globalData().lexer->isKeyword(entry.m_pathEntryName))
66 return false;
67 TokenType tokenType = m_lexer.next();
68 while (tokenType != TokAssign) {
69 switch (tokenType) {
70 case TokLBracket: {
71 entry.m_type = JSONPPathEntryTypeLookup;
72 if (m_lexer.next() != TokNumber)
73 return false;
74 double doubleIndex = m_lexer.currentToken().numberToken;
75 int index = (int)doubleIndex;
76 if (index != doubleIndex || index < 0)
77 return false;
78 entry.m_pathIndex = index;
79 if (m_lexer.next() != TokRBracket)
80 return false;
81 break;
82 }
83 case TokDot: {
84 entry.m_type = JSONPPathEntryTypeDot;
85 if (m_lexer.next() != TokIdentifier)
86 return false;
87 entry.m_pathEntryName = Identifier(m_exec, m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start);
88 break;
89 }
90 case TokLParen: {
91 if (path.last().m_type != JSONPPathEntryTypeDot || needsFullSourceInfo)
92 return false;
93 path.last().m_type = JSONPPathEntryTypeCall;
94 entry = path.last();
95 goto startJSON;
96 }
97 default:
98 return false;
99 }
100 path.append(entry);
101 tokenType = m_lexer.next();
102 }
103 startJSON:
104 m_lexer.next();
105 results.append(JSONPData());
106 results.last().m_value.set(m_exec->globalData(), parse(StartParseExpression));
107 if (!results.last().m_value)
108 return false;
109 results.last().m_path.swap(path);
110 if (entry.m_type == JSONPPathEntryTypeCall) {
111 if (m_lexer.currentToken().type != TokRParen)
112 return false;
113 m_lexer.next();
114 }
115 if (m_lexer.currentToken().type != TokSemi)
116 break;
117 m_lexer.next();
118 } while (m_lexer.currentToken().type == TokIdentifier);
119 return m_lexer.currentToken().type == TokEnd;
120 }
121
122 ALWAYS_INLINE const Identifier LiteralParser::makeIdentifier(const UChar* characters, size_t length)
123 {
124 if (!length)
125 return m_exec->globalData().propertyNames->emptyIdentifier;
126 if (characters[0] >= MaximumCachableCharacter)
127 return Identifier(&m_exec->globalData(), characters, length);
128
129 if (length == 1) {
130 if (!m_shortIdentifiers[characters[0]].isNull())
131 return m_shortIdentifiers[characters[0]];
132 m_shortIdentifiers[characters[0]] = Identifier(&m_exec->globalData(), characters, length);
133 return m_shortIdentifiers[characters[0]];
134 }
135 if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length))
136 return m_recentIdentifiers[characters[0]];
137 m_recentIdentifiers[characters[0]] = Identifier(&m_exec->globalData(), characters, length);
138 return m_recentIdentifiers[characters[0]];
139 }
140
141 template <LiteralParser::ParserMode mode> LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
142 {
143 while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr))
144 ++m_ptr;
145
146 ASSERT(m_ptr <= m_end);
147 if (m_ptr >= m_end) {
148 token.type = TokEnd;
149 token.start = token.end = m_ptr;
150 return TokEnd;
151 }
152 token.type = TokError;
153 token.start = m_ptr;
154 switch (*m_ptr) {
155 case '[':
156 token.type = TokLBracket;
157 token.end = ++m_ptr;
158 return TokLBracket;
159 case ']':
160 token.type = TokRBracket;
161 token.end = ++m_ptr;
162 return TokRBracket;
163 case '(':
164 token.type = TokLParen;
165 token.end = ++m_ptr;
166 return TokLParen;
167 case ')':
168 token.type = TokRParen;
169 token.end = ++m_ptr;
170 return TokRParen;
171 case '{':
172 token.type = TokLBrace;
173 token.end = ++m_ptr;
174 return TokLBrace;
175 case '}':
176 token.type = TokRBrace;
177 token.end = ++m_ptr;
178 return TokRBrace;
179 case ',':
180 token.type = TokComma;
181 token.end = ++m_ptr;
182 return TokComma;
183 case ':':
184 token.type = TokColon;
185 token.end = ++m_ptr;
186 return TokColon;
187 case '"':
188 return lexString<mode, '"'>(token);
189 case 't':
190 if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
191 m_ptr += 4;
192 token.type = TokTrue;
193 token.end = m_ptr;
194 return TokTrue;
195 }
196 break;
197 case 'f':
198 if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
199 m_ptr += 5;
200 token.type = TokFalse;
201 token.end = m_ptr;
202 return TokFalse;
203 }
204 break;
205 case 'n':
206 if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
207 m_ptr += 4;
208 token.type = TokNull;
209 token.end = m_ptr;
210 return TokNull;
211 }
212 break;
213 case '-':
214 case '0':
215 case '1':
216 case '2':
217 case '3':
218 case '4':
219 case '5':
220 case '6':
221 case '7':
222 case '8':
223 case '9':
224 return lexNumber(token);
225 }
226 if (m_ptr < m_end) {
227 if (*m_ptr == '.') {
228 token.type = TokDot;
229 token.end = ++m_ptr;
230 return TokDot;
231 }
232 if (*m_ptr == '=') {
233 token.type = TokAssign;
234 token.end = ++m_ptr;
235 return TokAssign;
236 }
237 if (*m_ptr == ';') {
238 token.type = TokSemi;
239 token.end = ++m_ptr;
240 return TokAssign;
241 }
242 if (isASCIIAlpha(*m_ptr) || *m_ptr == '_' || *m_ptr == '$') {
243 while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$'))
244 m_ptr++;
245 token.stringToken = token.start;
246 token.stringLength = m_ptr - token.start;
247 token.type = TokIdentifier;
248 token.end = m_ptr;
249 return TokIdentifier;
250 }
251 if (*m_ptr == '\'') {
252 if (mode == StrictJSON)
253 return TokError;
254 return lexString<mode, '\''>(token);
255 }
256 }
257 return TokError;
258 }
259
260 LiteralParser::TokenType LiteralParser::Lexer::next()
261 {
262 if (m_mode == NonStrictJSON)
263 return lex<NonStrictJSON>(m_currentToken);
264 if (m_mode == JSONP)
265 return lex<JSONP>(m_currentToken);
266 return lex<StrictJSON>(m_currentToken);
267 }
268
269 template <LiteralParser::ParserMode mode, UChar terminator> static inline bool isSafeStringCharacter(UChar c)
270 {
271 return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != terminator) || c == '\t';
272 }
273
274 // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
275 template <LiteralParser::ParserMode mode, UChar terminator> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
276 {
277 ++m_ptr;
278 const UChar* runStart = m_ptr;
279 UStringBuilder builder;
280 do {
281 runStart = m_ptr;
282 while (m_ptr < m_end && isSafeStringCharacter<mode, terminator>(*m_ptr))
283 ++m_ptr;
284 if (builder.length())
285 builder.append(runStart, m_ptr - runStart);
286 if ((mode != NonStrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
287 if (builder.isEmpty() && runStart < m_ptr)
288 builder.append(runStart, m_ptr - runStart);
289 ++m_ptr;
290 if (m_ptr >= m_end)
291 return TokError;
292 switch (*m_ptr) {
293 case '"':
294 builder.append('"');
295 m_ptr++;
296 break;
297 case '\\':
298 builder.append('\\');
299 m_ptr++;
300 break;
301 case '/':
302 builder.append('/');
303 m_ptr++;
304 break;
305 case 'b':
306 builder.append('\b');
307 m_ptr++;
308 break;
309 case 'f':
310 builder.append('\f');
311 m_ptr++;
312 break;
313 case 'n':
314 builder.append('\n');
315 m_ptr++;
316 break;
317 case 'r':
318 builder.append('\r');
319 m_ptr++;
320 break;
321 case 't':
322 builder.append('\t');
323 m_ptr++;
324 break;
325
326 case 'u':
327 if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
328 return TokError;
329 for (int i = 1; i < 5; i++) {
330 if (!isASCIIHexDigit(m_ptr[i]))
331 return TokError;
332 }
333 builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
334 m_ptr += 5;
335 break;
336
337 default:
338 if (*m_ptr == '\'' && mode != StrictJSON) {
339 builder.append('\'');
340 m_ptr++;
341 break;
342 }
343 return TokError;
344 }
345 }
346 } while ((mode != NonStrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != terminator);
347
348 if (m_ptr >= m_end || *m_ptr != terminator)
349 return TokError;
350
351 if (builder.isEmpty()) {
352 token.stringBuffer = UString();
353 token.stringToken = runStart;
354 token.stringLength = m_ptr - runStart;
355 } else {
356 token.stringBuffer = builder.toUString();
357 token.stringToken = token.stringBuffer.characters();
358 token.stringLength = token.stringBuffer.length();
359 }
360 token.type = TokString;
361 token.end = ++m_ptr;
362 return TokString;
363 }
364
365 LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
366 {
367 // ES5 and json.org define numbers as
368 // number
369 // int
370 // int frac? exp?
371 //
372 // int
373 // -? 0
374 // -? digit1-9 digits?
375 //
376 // digits
377 // digit digits?
378 //
379 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
380
381 if (m_ptr < m_end && *m_ptr == '-') // -?
382 ++m_ptr;
383
384 // (0 | [1-9][0-9]*)
385 if (m_ptr < m_end && *m_ptr == '0') // 0
386 ++m_ptr;
387 else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
388 ++m_ptr;
389 // [0-9]*
390 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
391 ++m_ptr;
392 } else
393 return TokError;
394
395 // ('.' [0-9]+)?
396 if (m_ptr < m_end && *m_ptr == '.') {
397 ++m_ptr;
398 // [0-9]+
399 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
400 return TokError;
401
402 ++m_ptr;
403 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
404 ++m_ptr;
405 } else if (m_ptr < m_end && (*m_ptr != 'e' && *m_ptr != 'E') && (m_ptr - token.start) < 10) {
406 int result = 0;
407 token.type = TokNumber;
408 token.end = m_ptr;
409 const UChar* digit = token.start;
410 int negative = 1;
411 if (*digit == '-') {
412 negative = -1;
413 digit++;
414 }
415
416 while (digit < m_ptr)
417 result = result * 10 + (*digit++) - '0';
418 result *= negative;
419 token.numberToken = result;
420 return TokNumber;
421 }
422
423 // ([eE][+-]? [0-9]+)?
424 if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
425 ++m_ptr;
426
427 // [-+]?
428 if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
429 ++m_ptr;
430
431 // [0-9]+
432 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
433 return TokError;
434
435 ++m_ptr;
436 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
437 ++m_ptr;
438 }
439
440 token.type = TokNumber;
441 token.end = m_ptr;
442 Vector<char, 64> buffer(token.end - token.start + 1);
443 int i;
444 for (i = 0; i < token.end - token.start; i++) {
445 ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
446 buffer[i] = static_cast<char>(token.start[i]);
447 }
448 buffer[i] = 0;
449 char* end;
450 token.numberToken = WTF::strtod(buffer.data(), &end);
451 ASSERT(buffer.data() + (token.end - token.start) == end);
452 return TokNumber;
453 }
454
455 JSValue LiteralParser::parse(ParserState initialState)
456 {
457 ParserState state = initialState;
458 MarkedArgumentBuffer objectStack;
459 JSValue lastValue;
460 Vector<ParserState, 16> stateStack;
461 Vector<Identifier, 16> identifierStack;
462 while (1) {
463 switch(state) {
464 startParseArray:
465 case StartParseArray: {
466 JSArray* array = constructEmptyArray(m_exec);
467 objectStack.append(array);
468 // fallthrough
469 }
470 doParseArrayStartExpression:
471 case DoParseArrayStartExpression: {
472 TokenType lastToken = m_lexer.currentToken().type;
473 if (m_lexer.next() == TokRBracket) {
474 if (lastToken == TokComma)
475 return JSValue();
476 m_lexer.next();
477 lastValue = objectStack.last();
478 objectStack.removeLast();
479 break;
480 }
481
482 stateStack.append(DoParseArrayEndExpression);
483 goto startParseExpression;
484 }
485 case DoParseArrayEndExpression: {
486 asArray(objectStack.last())->push(m_exec, lastValue);
487
488 if (m_lexer.currentToken().type == TokComma)
489 goto doParseArrayStartExpression;
490
491 if (m_lexer.currentToken().type != TokRBracket)
492 return JSValue();
493
494 m_lexer.next();
495 lastValue = objectStack.last();
496 objectStack.removeLast();
497 break;
498 }
499 startParseObject:
500 case StartParseObject: {
501 JSObject* object = constructEmptyObject(m_exec);
502 objectStack.append(object);
503
504 TokenType type = m_lexer.next();
505 if (type == TokString || (m_mode != StrictJSON && type == TokIdentifier)) {
506 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
507
508 // Check for colon
509 if (m_lexer.next() != TokColon)
510 return JSValue();
511
512 m_lexer.next();
513 identifierStack.append(makeIdentifier(identifierToken.stringToken, identifierToken.stringLength));
514 stateStack.append(DoParseObjectEndExpression);
515 goto startParseExpression;
516 }
517 if (type != TokRBrace)
518 return JSValue();
519 m_lexer.next();
520 lastValue = objectStack.last();
521 objectStack.removeLast();
522 break;
523 }
524 doParseObjectStartExpression:
525 case DoParseObjectStartExpression: {
526 TokenType type = m_lexer.next();
527 if (type != TokString && (m_mode == StrictJSON || type != TokIdentifier))
528 return JSValue();
529 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
530
531 // Check for colon
532 if (m_lexer.next() != TokColon)
533 return JSValue();
534
535 m_lexer.next();
536 identifierStack.append(makeIdentifier(identifierToken.stringToken, identifierToken.stringLength));
537 stateStack.append(DoParseObjectEndExpression);
538 goto startParseExpression;
539 }
540 case DoParseObjectEndExpression:
541 {
542 asObject(objectStack.last())->putDirect(m_exec->globalData(), identifierStack.last(), lastValue);
543 identifierStack.removeLast();
544 if (m_lexer.currentToken().type == TokComma)
545 goto doParseObjectStartExpression;
546 if (m_lexer.currentToken().type != TokRBrace)
547 return JSValue();
548 m_lexer.next();
549 lastValue = objectStack.last();
550 objectStack.removeLast();
551 break;
552 }
553 startParseExpression:
554 case StartParseExpression: {
555 switch (m_lexer.currentToken().type) {
556 case TokLBracket:
557 goto startParseArray;
558 case TokLBrace:
559 goto startParseObject;
560 case TokString: {
561 Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
562 m_lexer.next();
563 lastValue = jsString(m_exec, makeIdentifier(stringToken.stringToken, stringToken.stringLength).ustring());
564 break;
565 }
566 case TokNumber: {
567 Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
568 m_lexer.next();
569 lastValue = jsNumber(numberToken.numberToken);
570 break;
571 }
572 case TokNull:
573 m_lexer.next();
574 lastValue = jsNull();
575 break;
576
577 case TokTrue:
578 m_lexer.next();
579 lastValue = jsBoolean(true);
580 break;
581
582 case TokFalse:
583 m_lexer.next();
584 lastValue = jsBoolean(false);
585 break;
586
587 default:
588 // Error
589 return JSValue();
590 }
591 break;
592 }
593 case StartParseStatement: {
594 switch (m_lexer.currentToken().type) {
595 case TokLBracket:
596 case TokNumber:
597 case TokString:
598 goto startParseExpression;
599
600 case TokLParen: {
601 m_lexer.next();
602 stateStack.append(StartParseStatementEndStatement);
603 goto startParseExpression;
604 }
605 default:
606 return JSValue();
607 }
608 }
609 case StartParseStatementEndStatement: {
610 ASSERT(stateStack.isEmpty());
611 if (m_lexer.currentToken().type != TokRParen)
612 return JSValue();
613 if (m_lexer.next() == TokEnd)
614 return lastValue;
615 return JSValue();
616 }
617 default:
618 ASSERT_NOT_REACHED();
619 }
620 if (stateStack.isEmpty())
621 return lastValue;
622 state = stateStack.last();
623 stateStack.removeLast();
624 continue;
625 }
626 }
627
628 }