]> git.saurik.com Git - apple/javascriptcore.git/blob - runtime/LiteralParser.cpp
aa1e5ed9f63e63a76731b9cc068905470d54179e
[apple/javascriptcore.git] / runtime / LiteralParser.cpp
1 /*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "config.h"
27 #include "LiteralParser.h"
28
29 #include "JSArray.h"
30 #include "JSString.h"
31 #include "Lexer.h"
32 #include "StringBuilder.h"
33 #include <wtf/ASCIICType.h>
34 #include <wtf/dtoa.h>
35
36 namespace JSC {
37
38 LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
39 {
40 while (m_ptr < m_end && isASCIISpace(*m_ptr))
41 ++m_ptr;
42
43 ASSERT(m_ptr <= m_end);
44 if (m_ptr >= m_end) {
45 token.type = TokEnd;
46 token.start = token.end = m_ptr;
47 return TokEnd;
48 }
49 token.type = TokError;
50 token.start = m_ptr;
51 switch (*m_ptr) {
52 case '[':
53 token.type = TokLBracket;
54 token.end = ++m_ptr;
55 return TokLBracket;
56 case ']':
57 token.type = TokRBracket;
58 token.end = ++m_ptr;
59 return TokRBracket;
60 case '(':
61 token.type = TokLParen;
62 token.end = ++m_ptr;
63 return TokLBracket;
64 case ')':
65 token.type = TokRParen;
66 token.end = ++m_ptr;
67 return TokRBracket;
68 case '{':
69 token.type = TokLBrace;
70 token.end = ++m_ptr;
71 return TokLBrace;
72 case '}':
73 token.type = TokRBrace;
74 token.end = ++m_ptr;
75 return TokRBrace;
76 case ',':
77 token.type = TokComma;
78 token.end = ++m_ptr;
79 return TokComma;
80 case ':':
81 token.type = TokColon;
82 token.end = ++m_ptr;
83 return TokColon;
84 case '"':
85 if (m_mode == StrictJSON)
86 return lexString<StrictJSON>(token);
87 return lexString<NonStrictJSON>(token);
88 case 't':
89 if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
90 m_ptr += 4;
91 token.type = TokTrue;
92 token.end = m_ptr;
93 return TokTrue;
94 }
95 break;
96 case 'f':
97 if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
98 m_ptr += 5;
99 token.type = TokFalse;
100 token.end = m_ptr;
101 return TokFalse;
102 }
103 break;
104 case 'n':
105 if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
106 m_ptr += 4;
107 token.type = TokNull;
108 token.end = m_ptr;
109 return TokNull;
110 }
111 break;
112 case '-':
113 case '0':
114 case '1':
115 case '2':
116 case '3':
117 case '4':
118 case '5':
119 case '6':
120 case '7':
121 case '8':
122 case '9':
123 return lexNumber(token);
124 }
125 return TokError;
126 }
127
128 template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c)
129 {
130 return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t';
131 }
132
133 // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
134 template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
135 {
136 ++m_ptr;
137 const UChar* runStart;
138 StringBuilder builder;
139 do {
140 runStart = m_ptr;
141 while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
142 ++m_ptr;
143 if (runStart < m_ptr)
144 builder.append(runStart, m_ptr - runStart);
145 if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
146 ++m_ptr;
147 if (m_ptr >= m_end)
148 return TokError;
149 switch (*m_ptr) {
150 case '"':
151 builder.append('"');
152 m_ptr++;
153 break;
154 case '\\':
155 builder.append('\\');
156 m_ptr++;
157 break;
158 case '/':
159 builder.append('/');
160 m_ptr++;
161 break;
162 case 'b':
163 builder.append('\b');
164 m_ptr++;
165 break;
166 case 'f':
167 builder.append('\f');
168 m_ptr++;
169 break;
170 case 'n':
171 builder.append('\n');
172 m_ptr++;
173 break;
174 case 'r':
175 builder.append('\r');
176 m_ptr++;
177 break;
178 case 't':
179 builder.append('\t');
180 m_ptr++;
181 break;
182
183 case 'u':
184 if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
185 return TokError;
186 for (int i = 1; i < 5; i++) {
187 if (!isASCIIHexDigit(m_ptr[i]))
188 return TokError;
189 }
190 builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
191 m_ptr += 5;
192 break;
193
194 default:
195 return TokError;
196 }
197 }
198 } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
199
200 if (m_ptr >= m_end || *m_ptr != '"')
201 return TokError;
202
203 token.stringToken = builder.release();
204 token.type = TokString;
205 token.end = ++m_ptr;
206 return TokString;
207 }
208
209 LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
210 {
211 // ES5 and json.org define numbers as
212 // number
213 // int
214 // int frac? exp?
215 //
216 // int
217 // -? 0
218 // -? digit1-9 digits?
219 //
220 // digits
221 // digit digits?
222 //
223 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
224
225 if (m_ptr < m_end && *m_ptr == '-') // -?
226 ++m_ptr;
227
228 // (0 | [1-9][0-9]*)
229 if (m_ptr < m_end && *m_ptr == '0') // 0
230 ++m_ptr;
231 else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
232 ++m_ptr;
233 // [0-9]*
234 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
235 ++m_ptr;
236 } else
237 return TokError;
238
239 // ('.' [0-9]+)?
240 if (m_ptr < m_end && *m_ptr == '.') {
241 ++m_ptr;
242 // [0-9]+
243 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
244 return TokError;
245
246 ++m_ptr;
247 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
248 ++m_ptr;
249 }
250
251 // ([eE][+-]? [0-9]+)?
252 if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
253 ++m_ptr;
254
255 // [-+]?
256 if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
257 ++m_ptr;
258
259 // [0-9]+
260 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
261 return TokError;
262
263 ++m_ptr;
264 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
265 ++m_ptr;
266 }
267
268 token.type = TokNumber;
269 token.end = m_ptr;
270 Vector<char, 64> buffer(token.end - token.start + 1);
271 int i;
272 for (i = 0; i < token.end - token.start; i++) {
273 ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
274 buffer[i] = static_cast<char>(token.start[i]);
275 }
276 buffer[i] = 0;
277 char* end;
278 token.numberToken = WTF::strtod(buffer.data(), &end);
279 ASSERT(buffer.data() + (token.end - token.start) == end);
280 return TokNumber;
281 }
282
283 JSValue LiteralParser::parse(ParserState initialState)
284 {
285 ParserState state = initialState;
286 MarkedArgumentBuffer objectStack;
287 JSValue lastValue;
288 Vector<ParserState, 16> stateStack;
289 Vector<Identifier, 16> identifierStack;
290 while (1) {
291 switch(state) {
292 startParseArray:
293 case StartParseArray: {
294 JSArray* array = constructEmptyArray(m_exec);
295 objectStack.append(array);
296 // fallthrough
297 }
298 doParseArrayStartExpression:
299 case DoParseArrayStartExpression: {
300 TokenType lastToken = m_lexer.currentToken().type;
301 if (m_lexer.next() == TokRBracket) {
302 if (lastToken == TokComma)
303 return JSValue();
304 m_lexer.next();
305 lastValue = objectStack.last();
306 objectStack.removeLast();
307 break;
308 }
309
310 stateStack.append(DoParseArrayEndExpression);
311 goto startParseExpression;
312 }
313 case DoParseArrayEndExpression: {
314 asArray(objectStack.last())->push(m_exec, lastValue);
315
316 if (m_lexer.currentToken().type == TokComma)
317 goto doParseArrayStartExpression;
318
319 if (m_lexer.currentToken().type != TokRBracket)
320 return JSValue();
321
322 m_lexer.next();
323 lastValue = objectStack.last();
324 objectStack.removeLast();
325 break;
326 }
327 startParseObject:
328 case StartParseObject: {
329 JSObject* object = constructEmptyObject(m_exec);
330 objectStack.append(object);
331
332 TokenType type = m_lexer.next();
333 if (type == TokString) {
334 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
335
336 // Check for colon
337 if (m_lexer.next() != TokColon)
338 return JSValue();
339
340 m_lexer.next();
341 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
342 stateStack.append(DoParseObjectEndExpression);
343 goto startParseExpression;
344 } else if (type != TokRBrace)
345 return JSValue();
346 m_lexer.next();
347 lastValue = objectStack.last();
348 objectStack.removeLast();
349 break;
350 }
351 doParseObjectStartExpression:
352 case DoParseObjectStartExpression: {
353 TokenType type = m_lexer.next();
354 if (type != TokString)
355 return JSValue();
356 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
357
358 // Check for colon
359 if (m_lexer.next() != TokColon)
360 return JSValue();
361
362 m_lexer.next();
363 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
364 stateStack.append(DoParseObjectEndExpression);
365 goto startParseExpression;
366 }
367 case DoParseObjectEndExpression:
368 {
369 asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue);
370 identifierStack.removeLast();
371 if (m_lexer.currentToken().type == TokComma)
372 goto doParseObjectStartExpression;
373 if (m_lexer.currentToken().type != TokRBrace)
374 return JSValue();
375 m_lexer.next();
376 lastValue = objectStack.last();
377 objectStack.removeLast();
378 break;
379 }
380 startParseExpression:
381 case StartParseExpression: {
382 switch (m_lexer.currentToken().type) {
383 case TokLBracket:
384 goto startParseArray;
385 case TokLBrace:
386 goto startParseObject;
387 case TokString: {
388 Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
389 m_lexer.next();
390 lastValue = jsString(m_exec, stringToken.stringToken);
391 break;
392 }
393 case TokNumber: {
394 Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
395 m_lexer.next();
396 lastValue = jsNumber(m_exec, numberToken.numberToken);
397 break;
398 }
399 case TokNull:
400 m_lexer.next();
401 lastValue = jsNull();
402 break;
403
404 case TokTrue:
405 m_lexer.next();
406 lastValue = jsBoolean(true);
407 break;
408
409 case TokFalse:
410 m_lexer.next();
411 lastValue = jsBoolean(false);
412 break;
413
414 default:
415 // Error
416 return JSValue();
417 }
418 break;
419 }
420 case StartParseStatement: {
421 switch (m_lexer.currentToken().type) {
422 case TokLBracket:
423 case TokNumber:
424 case TokString:
425 goto startParseExpression;
426
427 case TokLParen: {
428 m_lexer.next();
429 stateStack.append(StartParseStatementEndStatement);
430 goto startParseExpression;
431 }
432 default:
433 return JSValue();
434 }
435 }
436 case StartParseStatementEndStatement: {
437 ASSERT(stateStack.isEmpty());
438 if (m_lexer.currentToken().type != TokRParen)
439 return JSValue();
440 if (m_lexer.next() == TokEnd)
441 return lastValue;
442 return JSValue();
443 }
444 default:
445 ASSERT_NOT_REACHED();
446 }
447 if (stateStack.isEmpty())
448 return lastValue;
449 state = stateStack.last();
450 stateStack.removeLast();
451 continue;
452 }
453 }
454
455 }