1 #define VERSION "1.0.3"
3 /* CJSON - JSON support for Lua
5 * Copyright (c) 2010-2011 Mark Pulford <mark@kyne.com.au>
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 * - JSON "null" values are represented as lightuserdata since Lua
29 * tables cannot contain "nil". Compare with cjson.null.
30 * - Invalid UTF-8 characters are not detected and will be passed
31 * untouched. If required, UTF-8 error checking should be done
32 * outside this library.
33 * - Javascript comments are not part of the JSON spec, and are not
34 * currently supported.
36 * Note: Decoding is slower than encoding. Lua spends significant
37 * time (30%) managing tables when parsing JSON since it is
38 * difficult to know object/array sizes ahead of time.
50 #define isinf(x) (!isnan(x) && isnan((x) - (x)))
53 #define DEFAULT_SPARSE_CONVERT 0
54 #define DEFAULT_SPARSE_RATIO 2
55 #define DEFAULT_SPARSE_SAFE 10
56 #define DEFAULT_MAX_DEPTH 20
57 #define DEFAULT_ENCODE_REFUSE_BADNUM 1
58 #define DEFAULT_DECODE_REFUSE_BADNUM 0
59 #define DEFAULT_ENCODE_KEEP_BUFFER 1
78 static const char *json_token_type_name
[] = {
97 json_token_type_t ch2token
[256];
98 char escape2char
[256]; /* Decoding */
100 char escapes
[35][8]; /* Pre-generated escape string buffer */
101 char *char2escape
[256]; /* Encoding */
104 char number_fmt
[8]; /* "%.XXg\0" */
107 int encode_sparse_convert
;
108 int encode_sparse_ratio
;
109 int encode_sparse_safe
;
110 int encode_max_depth
;
111 int encode_refuse_badnum
;
112 int decode_refuse_badnum
;
113 int encode_keep_buffer
;
114 int encode_number_precision
;
120 strbuf_t
*tmp
; /* Temporary storage for strings */
125 json_token_type_t type
;
135 static const char *char2escape
[256] = {
136 "\\u0000", "\\u0001", "\\u0002", "\\u0003",
137 "\\u0004", "\\u0005", "\\u0006", "\\u0007",
138 "\\b", "\\t", "\\n", "\\u000b",
139 "\\f", "\\r", "\\u000e", "\\u000f",
140 "\\u0010", "\\u0011", "\\u0012", "\\u0013",
141 "\\u0014", "\\u0015", "\\u0016", "\\u0017",
142 "\\u0018", "\\u0019", "\\u001a", "\\u001b",
143 "\\u001c", "\\u001d", "\\u001e", "\\u001f",
144 NULL
, NULL
, "\\\"", NULL
, NULL
, NULL
, NULL
, NULL
,
145 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, "\\/",
146 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
147 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
148 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
149 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
150 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
151 NULL
, NULL
, NULL
, NULL
, "\\\\", NULL
, NULL
, NULL
,
152 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
153 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
154 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
155 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, "\\u007f",
156 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
157 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
158 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
159 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
160 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
161 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
162 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
163 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
164 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
165 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
166 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
167 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
168 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
169 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
170 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
171 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
174 static int json_config_key
;
176 /* ===== CONFIGURATION ===== */
178 static json_config_t
*json_fetch_config(lua_State
*l
)
182 lua_pushlightuserdata(l
, &json_config_key
);
183 lua_gettable(l
, LUA_REGISTRYINDEX
);
184 cfg
= lua_touserdata(l
, -1);
186 luaL_error(l
, "BUG: Unable to fetch CJSON configuration");
193 static void json_verify_arg_count(lua_State
*l
, int args
)
195 luaL_argcheck(l
, lua_gettop(l
) <= args
, args
+ 1,
196 "found too many arguments");
199 /* Configures handling of extremely sparse arrays:
200 * convert: Convert extremely sparse arrays into objects? Otherwise error.
201 * ratio: 0: always allow sparse; 1: never allow sparse; >1: use ratio
202 * safe: Always use an array when the max index <= safe */
203 static int json_cfg_encode_sparse_array(lua_State
*l
)
208 json_verify_arg_count(l
, 3);
209 cfg
= json_fetch_config(l
);
211 switch (lua_gettop(l
)) {
213 val
= luaL_checkinteger(l
, 3);
214 luaL_argcheck(l
, val
>= 0, 3, "expected integer >= 0");
215 cfg
->encode_sparse_safe
= val
;
217 val
= luaL_checkinteger(l
, 2);
218 luaL_argcheck(l
, val
>= 0, 2, "expected integer >= 0");
219 cfg
->encode_sparse_ratio
= val
;
221 luaL_argcheck(l
, lua_isboolean(l
, 1), 1, "expected boolean");
222 cfg
->encode_sparse_convert
= lua_toboolean(l
, 1);
225 lua_pushboolean(l
, cfg
->encode_sparse_convert
);
226 lua_pushinteger(l
, cfg
->encode_sparse_ratio
);
227 lua_pushinteger(l
, cfg
->encode_sparse_safe
);
232 /* Configures the maximum number of nested arrays/objects allowed when
234 static int json_cfg_encode_max_depth(lua_State
*l
)
239 json_verify_arg_count(l
, 1);
240 cfg
= json_fetch_config(l
);
243 depth
= luaL_checkinteger(l
, 1);
244 luaL_argcheck(l
, depth
> 0, 1, "expected positive integer");
245 cfg
->encode_max_depth
= depth
;
248 lua_pushinteger(l
, cfg
->encode_max_depth
);
253 static void json_set_number_precision(json_config_t
*cfg
, int prec
)
255 cfg
->encode_number_precision
= prec
;
256 sprintf(cfg
->number_fmt
, "%%.%dg", prec
);
259 /* Configures number precision when converting doubles to text */
260 static int json_cfg_encode_number_precision(lua_State
*l
)
265 json_verify_arg_count(l
, 1);
266 cfg
= json_fetch_config(l
);
269 precision
= luaL_checkinteger(l
, 1);
270 luaL_argcheck(l
, 1 <= precision
&& precision
<= 14, 1,
271 "expected integer between 1 and 14");
272 json_set_number_precision(cfg
, precision
);
275 lua_pushinteger(l
, cfg
->encode_number_precision
);
280 /* Configures JSON encoding buffer persistence */
281 static int json_cfg_encode_keep_buffer(lua_State
*l
)
285 json_verify_arg_count(l
, 1);
286 cfg
= json_fetch_config(l
);
289 luaL_checktype(l
, 1, LUA_TBOOLEAN
);
290 cfg
->encode_keep_buffer
= lua_toboolean(l
, 1);
293 lua_pushboolean(l
, cfg
->encode_keep_buffer
);
298 /* On argument: decode enum and set config variables
299 * **options must point to a NULL terminated array of 4 enums
300 * Returns: current enum value */
301 static void json_enum_option(lua_State
*l
, const char **options
,
302 int *opt1
, int *opt2
)
307 if (lua_isboolean(l
, 1))
308 setting
= lua_toboolean(l
, 1) * 3;
310 setting
= luaL_checkoption(l
, 1, NULL
, options
);
312 *opt1
= setting
& 1 ? 1 : 0;
313 *opt2
= setting
& 2 ? 1 : 0;
315 setting
= *opt1
| (*opt2
<< 1);
319 lua_pushstring(l
, options
[setting
]);
321 lua_pushboolean(l
, 0);
325 /* When enabled, rejects: NaN, Infinity, hexidecimal numbers */
326 static int json_cfg_refuse_invalid_numbers(lua_State
*l
)
328 static const char *options_enc_dec
[] = { "none", "encode", "decode",
332 json_verify_arg_count(l
, 1);
333 cfg
= json_fetch_config(l
);
335 json_enum_option(l
, options_enc_dec
,
336 &cfg
->encode_refuse_badnum
,
337 &cfg
->decode_refuse_badnum
);
342 static int json_destroy_config(lua_State
*l
)
346 cfg
= lua_touserdata(l
, 1);
348 strbuf_free(&cfg
->encode_buf
);
354 static void json_create_config(lua_State
*l
)
359 cfg
= lua_newuserdata(l
, sizeof(*cfg
));
361 /* Create GC method to clean up strbuf */
363 lua_pushcfunction(l
, json_destroy_config
);
364 lua_setfield(l
, -2, "__gc");
365 lua_setmetatable(l
, -2);
367 strbuf_init(&cfg
->encode_buf
, 0);
369 cfg
->encode_sparse_convert
= DEFAULT_SPARSE_CONVERT
;
370 cfg
->encode_sparse_ratio
= DEFAULT_SPARSE_RATIO
;
371 cfg
->encode_sparse_safe
= DEFAULT_SPARSE_SAFE
;
372 cfg
->encode_max_depth
= DEFAULT_MAX_DEPTH
;
373 cfg
->encode_refuse_badnum
= DEFAULT_ENCODE_REFUSE_BADNUM
;
374 cfg
->decode_refuse_badnum
= DEFAULT_DECODE_REFUSE_BADNUM
;
375 cfg
->encode_keep_buffer
= DEFAULT_ENCODE_KEEP_BUFFER
;
376 json_set_number_precision(cfg
, 14);
380 /* Tag all characters as an error */
381 for (i
= 0; i
< 256; i
++)
382 cfg
->ch2token
[i
] = T_ERROR
;
384 /* Set tokens that require no further processing */
385 cfg
->ch2token
['{'] = T_OBJ_BEGIN
;
386 cfg
->ch2token
['}'] = T_OBJ_END
;
387 cfg
->ch2token
['['] = T_ARR_BEGIN
;
388 cfg
->ch2token
[']'] = T_ARR_END
;
389 cfg
->ch2token
[','] = T_COMMA
;
390 cfg
->ch2token
[':'] = T_COLON
;
391 cfg
->ch2token
['\0'] = T_END
;
392 cfg
->ch2token
[' '] = T_WHITESPACE
;
393 cfg
->ch2token
['\t'] = T_WHITESPACE
;
394 cfg
->ch2token
['\n'] = T_WHITESPACE
;
395 cfg
->ch2token
['\r'] = T_WHITESPACE
;
397 /* Update characters that require further processing */
398 cfg
->ch2token
['f'] = T_UNKNOWN
; /* false? */
399 cfg
->ch2token
['i'] = T_UNKNOWN
; /* inf, ininity? */
400 cfg
->ch2token
['I'] = T_UNKNOWN
;
401 cfg
->ch2token
['n'] = T_UNKNOWN
; /* null, nan? */
402 cfg
->ch2token
['N'] = T_UNKNOWN
;
403 cfg
->ch2token
['t'] = T_UNKNOWN
; /* true? */
404 cfg
->ch2token
['"'] = T_UNKNOWN
; /* string? */
405 cfg
->ch2token
['+'] = T_UNKNOWN
; /* number? */
406 cfg
->ch2token
['-'] = T_UNKNOWN
;
407 for (i
= 0; i
< 10; i
++)
408 cfg
->ch2token
['0' + i
] = T_UNKNOWN
;
410 /* Lookup table for parsing escape characters */
411 for (i
= 0; i
< 256; i
++)
412 cfg
->escape2char
[i
] = 0; /* String error */
413 cfg
->escape2char
['"'] = '"';
414 cfg
->escape2char
['\\'] = '\\';
415 cfg
->escape2char
['/'] = '/';
416 cfg
->escape2char
['b'] = '\b';
417 cfg
->escape2char
['t'] = '\t';
418 cfg
->escape2char
['n'] = '\n';
419 cfg
->escape2char
['f'] = '\f';
420 cfg
->escape2char
['r'] = '\r';
421 cfg
->escape2char
['u'] = 'u'; /* Unicode parsing required */
425 /* Initialise separate storage for pre-generated escape codes.
426 * Escapes 0-31 map directly, 34, 92, 127 follow afterwards to
428 for (i
= 0 ; i
< 32; i
++)
429 sprintf(cfg
->escapes
[i
], "\\u%04x", i
);
430 strcpy(cfg
->escapes
[8], "\b"); /* Override simpler escapes */
431 strcpy(cfg
->escapes
[9], "\t");
432 strcpy(cfg
->escapes
[10], "\n");
433 strcpy(cfg
->escapes
[12], "\f");
434 strcpy(cfg
->escapes
[13], "\r");
435 strcpy(cfg
->escapes
[32], "\\\""); /* chr(34) */
436 strcpy(cfg
->escapes
[33], "\\\\"); /* chr(92) */
437 sprintf(cfg
->escapes
[34], "\\u%04x", 127); /* char(127) */
439 /* Initialise encoding escape lookup table */
440 for (i
= 0; i
< 32; i
++)
441 cfg
->char2escape
[i
] = cfg
->escapes
[i
];
442 for (i
= 32; i
< 256; i
++)
443 cfg
->char2escape
[i
] = NULL
;
444 cfg
->char2escape
[34] = cfg
->escapes
[32];
445 cfg
->char2escape
[92] = cfg
->escapes
[33];
446 cfg
->char2escape
[127] = cfg
->escapes
[34];
450 /* ===== ENCODING ===== */
452 static void json_encode_exception(lua_State
*l
, json_config_t
*cfg
, int lindex
,
455 if (!cfg
->encode_keep_buffer
)
456 strbuf_free(&cfg
->encode_buf
);
457 luaL_error(l
, "Cannot serialise %s: %s",
458 lua_typename(l
, lua_type(l
, lindex
)), reason
);
461 /* json_append_string args:
464 * - String (Lua stack index)
466 * Returns nothing. Doesn't remove string from Lua stack */
467 static void json_append_string(lua_State
*l
, strbuf_t
*json
, int lindex
)
474 str
= lua_tolstring(l
, lindex
, &len
);
476 /* Worst case is len * 6 (all unicode escapes).
477 * This buffer is reused constantly for small strings
478 * If there are any excess pages, they won't be hit anyway.
479 * This gains ~5% speedup. */
480 strbuf_ensure_empty_length(json
, len
* 6 + 2);
482 strbuf_append_char_unsafe(json
, '\"');
483 for (i
= 0; i
< len
; i
++) {
484 escstr
= char2escape
[(unsigned char)str
[i
]];
486 strbuf_append_string(json
, escstr
);
488 strbuf_append_char_unsafe(json
, str
[i
]);
490 strbuf_append_char_unsafe(json
, '\"');
493 /* Find the size of the array on the top of the Lua stack
494 * -1 object (not a pure array)
495 * >=0 elements in array
497 static int lua_array_length(lua_State
*l
, json_config_t
*cfg
)
507 /* table, startkey */
508 while (lua_next(l
, -2) != 0) {
509 /* table, key, value */
510 if (lua_type(l
, -2) == LUA_TNUMBER
&&
511 (k
= lua_tonumber(l
, -2))) {
513 if (floor(k
) == k
&& k
>= 1) {
522 /* Must not be an array (non integer key) */
527 /* Encode excessively sparse arrays as objects (if enabled) */
528 if (cfg
->encode_sparse_ratio
> 0 &&
529 max
> items
* cfg
->encode_sparse_ratio
&&
530 max
> cfg
->encode_sparse_safe
) {
531 if (!cfg
->encode_sparse_convert
)
532 json_encode_exception(l
, cfg
, -1, "excessively sparse array");
540 static void json_encode_descend(lua_State
*l
, json_config_t
*cfg
)
542 cfg
->current_depth
++;
544 if (cfg
->current_depth
> cfg
->encode_max_depth
) {
545 if (!cfg
->encode_keep_buffer
)
546 strbuf_free(&cfg
->encode_buf
);
547 luaL_error(l
, "Cannot serialise, excessive nesting (%d)",
552 static void json_append_data(lua_State
*l
, json_config_t
*cfg
, strbuf_t
*json
);
554 /* json_append_array args:
557 * - Size of passwd Lua array (top of stack) */
558 static void json_append_array(lua_State
*l
, json_config_t
*cfg
, strbuf_t
*json
,
563 json_encode_descend(l
, cfg
);
565 strbuf_append_char(json
, '[');
568 for (i
= 1; i
<= array_length
; i
++) {
570 strbuf_append_char(json
, ',');
574 lua_rawgeti(l
, -1, i
);
575 json_append_data(l
, cfg
, json
);
579 strbuf_append_char(json
, ']');
581 cfg
->current_depth
--;
584 static void json_append_number(lua_State
*l
, strbuf_t
*json
, int index
,
587 double num
= lua_tonumber(l
, index
);
589 if (cfg
->encode_refuse_badnum
&& (isinf(num
) || isnan(num
)))
590 json_encode_exception(l
, cfg
, index
, "must not be NaN or Inf");
592 /* Lowest double printed with %.14g is 21 characters long:
593 * -1.7976931348623e+308
595 * Use 32 to include the \0, and a few extra just in case..
597 strbuf_append_fmt(json
, 32, cfg
->number_fmt
, num
);
600 static void json_append_object(lua_State
*l
, json_config_t
*cfg
,
605 json_encode_descend(l
, cfg
);
608 strbuf_append_char(json
, '{');
611 /* table, startkey */
613 while (lua_next(l
, -2) != 0) {
615 strbuf_append_char(json
, ',');
619 /* table, key, value */
620 keytype
= lua_type(l
, -2);
621 if (keytype
== LUA_TNUMBER
) {
622 strbuf_append_char(json
, '"');
623 json_append_number(l
, json
, -2, cfg
);
624 strbuf_append_mem(json
, "\":", 2);
625 } else if (keytype
== LUA_TSTRING
) {
626 json_append_string(l
, json
, -2);
627 strbuf_append_char(json
, ':');
629 json_encode_exception(l
, cfg
, -2,
630 "table key must be a number or string");
634 /* table, key, value */
635 json_append_data(l
, cfg
, json
);
640 strbuf_append_char(json
, '}');
642 cfg
->current_depth
--;
645 /* Serialise Lua data into JSON string. */
646 static void json_append_data(lua_State
*l
, json_config_t
*cfg
, strbuf_t
*json
)
650 switch (lua_type(l
, -1)) {
652 json_append_string(l
, json
, -1);
655 json_append_number(l
, json
, -1, cfg
);
658 if (lua_toboolean(l
, -1))
659 strbuf_append_mem(json
, "true", 4);
661 strbuf_append_mem(json
, "false", 5);
664 len
= lua_array_length(l
, cfg
);
666 json_append_array(l
, cfg
, json
, len
);
668 json_append_object(l
, cfg
, json
);
671 strbuf_append_mem(json
, "null", 4);
673 case LUA_TLIGHTUSERDATA
:
674 if (lua_touserdata(l
, -1) == NULL
) {
675 strbuf_append_mem(json
, "null", 4);
679 /* Remaining types (LUA_TFUNCTION, LUA_TUSERDATA, LUA_TTHREAD,
680 * and LUA_TLIGHTUSERDATA) cannot be serialised */
681 json_encode_exception(l
, cfg
, -1, "type not supported");
686 static int json_encode(lua_State
*l
)
692 /* Can't use json_verify_arg_count() since we need to ensure
693 * there is only 1 argument */
694 luaL_argcheck(l
, lua_gettop(l
) == 1, 1, "expected 1 argument");
696 cfg
= json_fetch_config(l
);
697 cfg
->current_depth
= 0;
699 /* Reset the persistent buffer if it exists.
700 * Otherwise allocate a new buffer. */
701 if (strbuf_allocated(&cfg
->encode_buf
))
702 strbuf_reset(&cfg
->encode_buf
);
704 strbuf_init(&cfg
->encode_buf
, 0);
706 json_append_data(l
, cfg
, &cfg
->encode_buf
);
707 json
= strbuf_string(&cfg
->encode_buf
, &len
);
709 lua_pushlstring(l
, json
, len
);
711 if (!cfg
->encode_keep_buffer
)
712 strbuf_free(&cfg
->encode_buf
);
717 /* ===== DECODING ===== */
719 static void json_process_value(lua_State
*l
, json_parse_t
*json
,
720 json_token_t
*token
);
722 static int hexdigit2int(char hex
)
724 if ('0' <= hex
&& hex
<= '9')
727 /* Force lowercase */
729 if ('a' <= hex
&& hex
<= 'f')
730 return 10 + hex
- 'a';
735 static int decode_hex4(const char *hex
)
740 /* Convert ASCII hex digit to numeric digit
741 * Note: this returns an error for invalid hex digits, including
743 for (i
= 0; i
< 4; i
++) {
744 digit
[i
] = hexdigit2int(hex
[i
]);
750 return (digit
[0] << 12) +
756 /* Converts a Unicode codepoint to UTF-8.
757 * Returns UTF-8 string length, and up to 4 bytes in *utf8 */
758 static int codepoint_to_utf8(char *utf8
, int codepoint
)
761 if (codepoint
<= 0x7F) {
766 /* 110xxxxx 10xxxxxx */
767 if (codepoint
<= 0x7FF) {
768 utf8
[0] = (codepoint
>> 6) | 0xC0;
769 utf8
[1] = (codepoint
& 0x3F) | 0x80;
773 /* 1110xxxx 10xxxxxx 10xxxxxx */
774 if (codepoint
<= 0xFFFF) {
775 utf8
[0] = (codepoint
>> 12) | 0xE0;
776 utf8
[1] = ((codepoint
>> 6) & 0x3F) | 0x80;
777 utf8
[2] = (codepoint
& 0x3F) | 0x80;
781 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
782 if (codepoint
<= 0x1FFFFF) {
783 utf8
[0] = (codepoint
>> 18) | 0xF0;
784 utf8
[1] = ((codepoint
>> 12) & 0x3F) | 0x80;
785 utf8
[2] = ((codepoint
>> 6) & 0x3F) | 0x80;
786 utf8
[3] = (codepoint
& 0x3F) | 0x80;
794 /* Called when index pointing to beginning of UTF-16 code escape: \uXXXX
795 * \u is guaranteed to exist, but the remaining hex characters may be
797 * Translate to UTF-8 and append to temporary token string.
798 * Must advance index to the next character to be processed.
802 static int json_append_unicode_escape(json_parse_t
*json
)
804 char utf8
[4]; /* Surrogate pairs require 4 UTF-8 bytes */
810 /* Fetch UTF-16 code unit */
811 codepoint
= decode_hex4(&json
->data
[json
->index
+ 2]);
815 /* UTF-16 surrogate pairs take the following 2 byte form:
817 * When x = 0: y is the high 10 bits of the codepoint
818 * x = 1: y is the low 10 bits of the codepoint
820 * Check for a surrogate pair (high or low) */
821 if ((codepoint
& 0xF800) == 0xD800) {
822 /* Error if the 1st surrogate is not high */
823 if (codepoint
& 0x400)
826 /* Ensure the next code is a unicode escape */
827 if (json
->data
[json
->index
+ escape_len
] != '\\' ||
828 json
->data
[json
->index
+ escape_len
+ 1] != 'u') {
832 /* Fetch the next codepoint */
833 surrogate_low
= decode_hex4(&json
->data
[json
->index
+ 2 + escape_len
]);
834 if (surrogate_low
< 0)
837 /* Error if the 2nd code is not a low surrogate */
838 if ((surrogate_low
& 0xFC00) != 0xDC00)
841 /* Calculate Unicode codepoint */
842 codepoint
= (codepoint
& 0x3FF) << 10;
843 surrogate_low
&= 0x3FF;
844 codepoint
= (codepoint
| surrogate_low
) + 0x10000;
848 /* Convert codepoint to UTF-8 */
849 len
= codepoint_to_utf8(utf8
, codepoint
);
853 /* Append bytes and advance parse index */
854 strbuf_append_mem_unsafe(json
->tmp
, utf8
, len
);
855 json
->index
+= escape_len
;
860 static void json_set_token_error(json_token_t
*token
, json_parse_t
*json
,
863 token
->type
= T_ERROR
;
864 token
->index
= json
->index
;
865 token
->value
.string
= errtype
;
868 static void json_next_string_token(json_parse_t
*json
, json_token_t
*token
)
870 char *escape2char
= json
->cfg
->escape2char
;
873 /* Caller must ensure a string is next */
874 assert(json
->data
[json
->index
] == '"');
879 /* json->tmp is the temporary strbuf used to accumulate the
880 * decoded string value. */
881 strbuf_reset(json
->tmp
);
882 while ((ch
= json
->data
[json
->index
]) != '"') {
884 /* Premature end of the string */
885 json_set_token_error(token
, json
, "unexpected end of string");
891 /* Fetch escape character */
892 ch
= json
->data
[json
->index
+ 1];
894 /* Translate escape code and append to tmp string */
895 ch
= escape2char
[(unsigned char)ch
];
897 if (json_append_unicode_escape(json
) == 0)
900 json_set_token_error(token
, json
,
901 "invalid unicode escape code");
905 json_set_token_error(token
, json
, "invalid escape code");
912 /* Append normal character or translated single character
913 * Unicode escapes are handled above */
914 strbuf_append_char_unsafe(json
->tmp
, ch
);
917 json
->index
++; /* Eat final quote (") */
919 strbuf_ensure_null(json
->tmp
);
921 token
->type
= T_STRING
;
922 token
->value
.string
= strbuf_string(json
->tmp
, &token
->string_len
);
925 /* JSON numbers should take the following form:
926 * -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
928 * json_next_number_token() uses strtod() which allows other forms:
929 * - numbers starting with '+'
930 * - NaN, -NaN, infinity, -infinity
931 * - hexidecimal numbers
932 * - numbers with leading zeros
934 * json_is_invalid_number() detects "numbers" which may pass strtod()'s
935 * error checking, but should not be allowed with strict JSON.
937 * json_is_invalid_number() may pass numbers which cause strtod()
938 * to generate an error.
940 static int json_is_invalid_number(json_parse_t
*json
)
944 /* Reject numbers starting with + */
945 if (json
->data
[i
] == '+')
948 /* Skip minus sign if it exists */
949 if (json
->data
[i
] == '-')
952 /* Reject numbers starting with 0x, or leading zeros */
953 if (json
->data
[i
] == '0') {
954 int ch2
= json
->data
[i
+ 1];
956 if ((ch2
| 0x20) == 'x' || /* Hex */
957 ('0' <= ch2
&& ch2
<= '9')) /* Leading zero */
961 } else if (json
->data
[i
] <= '9') {
962 return 0; /* Ordinary number */
967 if (!strncasecmp(&json
->data
[i
], "inf", 3))
969 if (!strncasecmp(&json
->data
[i
], "nan", 3))
972 /* Pass all other numbers which may still be invalid, but
973 * strtod() will catch them. */
977 static void json_next_number_token(json_parse_t
*json
, json_token_t
*token
)
979 const char *startptr
;
982 token
->type
= T_NUMBER
;
983 startptr
= &json
->data
[json
->index
];
984 token
->value
.number
= strtod(&json
->data
[json
->index
], &endptr
);
985 if (startptr
== endptr
)
986 json_set_token_error(token
, json
, "invalid number");
988 json
->index
+= endptr
- startptr
; /* Skip the processed number */
993 /* Fills in the token struct.
994 * T_STRING will return a pointer to the json_parse_t temporary string
995 * T_ERROR will leave the json->index pointer at the error.
997 static void json_next_token(json_parse_t
*json
, json_token_t
*token
)
999 json_token_type_t
*ch2token
= json
->cfg
->ch2token
;
1002 /* Eat whitespace. FIXME: UGLY */
1003 token
->type
= ch2token
[(unsigned char)json
->data
[json
->index
]];
1004 while (token
->type
== T_WHITESPACE
)
1005 token
->type
= ch2token
[(unsigned char)json
->data
[++json
->index
]];
1007 token
->index
= json
->index
;
1009 /* Don't advance the pointer for an error or the end */
1010 if (token
->type
== T_ERROR
) {
1011 json_set_token_error(token
, json
, "invalid token");
1015 if (token
->type
== T_END
) {
1019 /* Found a known single character token, advance index and return */
1020 if (token
->type
!= T_UNKNOWN
) {
1025 /* Process characters which triggered T_UNKNOWN */
1026 ch
= json
->data
[json
->index
];
1028 /* Must use strncmp() to match the front of the JSON string.
1029 * JSON identifier must be lowercase.
1030 * When strict_numbers if disabled, either case is allowed for
1031 * Infinity/NaN (since we are no longer following the spec..) */
1033 json_next_string_token(json
, token
);
1035 } else if (ch
== '-' || ('0' <= ch
&& ch
<= '9')) {
1036 if (json
->cfg
->decode_refuse_badnum
&& json_is_invalid_number(json
)) {
1037 json_set_token_error(token
, json
, "invalid number");
1040 json_next_number_token(json
, token
);
1042 } else if (!strncmp(&json
->data
[json
->index
], "true", 4)) {
1043 token
->type
= T_BOOLEAN
;
1044 token
->value
.boolean
= 1;
1047 } else if (!strncmp(&json
->data
[json
->index
], "false", 5)) {
1048 token
->type
= T_BOOLEAN
;
1049 token
->value
.boolean
= 0;
1052 } else if (!strncmp(&json
->data
[json
->index
], "null", 4)) {
1053 token
->type
= T_NULL
;
1056 } else if (!json
->cfg
->decode_refuse_badnum
&&
1057 json_is_invalid_number(json
)) {
1058 /* When refuse_badnum is disabled, only attempt to process
1059 * numbers we know are invalid JSON (Inf, NaN, hex)
1060 * This is required to generate an appropriate token error,
1061 * otherwise all bad tokens will register as "invalid number"
1063 json_next_number_token(json
, token
);
1067 /* Token starts with t/f/n but isn't recognised above. */
1068 json_set_token_error(token
, json
, "invalid token");
1071 /* This function does not return.
1072 * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.
1073 * The only supported exception is the temporary parser string
1075 * json and token should exist on the stack somewhere.
1076 * luaL_error() will long_jmp and release the stack */
1077 static void json_throw_parse_error(lua_State
*l
, json_parse_t
*json
,
1078 const char *exp
, json_token_t
*token
)
1082 strbuf_free(json
->tmp
);
1084 if (token
->type
== T_ERROR
)
1085 found
= token
->value
.string
;
1087 found
= json_token_type_name
[token
->type
];
1089 /* Note: token->index is 0 based, display starting from 1 */
1090 luaL_error(l
, "Expected %s but found %s at character %d",
1091 exp
, found
, token
->index
+ 1);
1094 static void json_decode_checkstack(lua_State
*l
, json_parse_t
*json
, int n
)
1096 if (lua_checkstack(l
, n
))
1099 strbuf_free(json
->tmp
);
1100 luaL_error(l
, "Too many nested data structures");
1103 static void json_parse_object_context(lua_State
*l
, json_parse_t
*json
)
1107 /* 3 slots required:
1108 * .., table, key, value */
1109 json_decode_checkstack(l
, json
, 3);
1113 json_next_token(json
, &token
);
1115 /* Handle empty objects */
1116 if (token
.type
== T_OBJ_END
) {
1121 if (token
.type
!= T_STRING
)
1122 json_throw_parse_error(l
, json
, "object key string", &token
);
1125 lua_pushlstring(l
, token
.value
.string
, token
.string_len
);
1127 json_next_token(json
, &token
);
1128 if (token
.type
!= T_COLON
)
1129 json_throw_parse_error(l
, json
, "colon", &token
);
1132 json_next_token(json
, &token
);
1133 json_process_value(l
, json
, &token
);
1135 /* Set key = value */
1138 json_next_token(json
, &token
);
1140 if (token
.type
== T_OBJ_END
)
1143 if (token
.type
!= T_COMMA
)
1144 json_throw_parse_error(l
, json
, "comma or object end", &token
);
1146 json_next_token(json
, &token
);
1150 /* Handle the array context */
1151 static void json_parse_array_context(lua_State
*l
, json_parse_t
*json
)
1156 /* 2 slots required:
1157 * .., table, value */
1158 json_decode_checkstack(l
, json
, 2);
1162 json_next_token(json
, &token
);
1164 /* Handle empty arrays */
1165 if (token
.type
== T_ARR_END
)
1168 for (i
= 1; ; i
++) {
1169 json_process_value(l
, json
, &token
);
1170 lua_rawseti(l
, -2, i
); /* arr[i] = value */
1172 json_next_token(json
, &token
);
1174 if (token
.type
== T_ARR_END
)
1177 if (token
.type
!= T_COMMA
)
1178 json_throw_parse_error(l
, json
, "comma or array end", &token
);
1180 json_next_token(json
, &token
);
1184 /* Handle the "value" context */
1185 static void json_process_value(lua_State
*l
, json_parse_t
*json
,
1186 json_token_t
*token
)
1188 switch (token
->type
) {
1190 lua_pushlstring(l
, token
->value
.string
, token
->string_len
);
1193 lua_pushnumber(l
, token
->value
.number
);
1196 lua_pushboolean(l
, token
->value
.boolean
);
1199 json_parse_object_context(l
, json
);
1202 json_parse_array_context(l
, json
);
1205 /* In Lua, setting "t[k] = nil" will delete k from the table.
1206 * Hence a NULL pointer lightuserdata object is used instead */
1207 lua_pushlightuserdata(l
, NULL
);
1210 json_throw_parse_error(l
, json
, "value", token
);
1214 /* json_text must be null terminated string */
1215 static void lua_json_decode(lua_State
*l
, const char *json_text
, int json_len
)
1220 json
.cfg
= json_fetch_config(l
);
1221 json
.data
= json_text
;
1224 /* Ensure the temporary buffer can hold the entire string.
1225 * This means we no longer need to do length checks since the decoded
1226 * string must be smaller than the entire json string */
1227 json
.tmp
= strbuf_new(json_len
);
1229 json_next_token(&json
, &token
);
1230 json_process_value(l
, &json
, &token
);
1232 /* Ensure there is no more input left */
1233 json_next_token(&json
, &token
);
1235 if (token
.type
!= T_END
)
1236 json_throw_parse_error(l
, &json
, "the end", &token
);
1238 strbuf_free(json
.tmp
);
1241 static int json_decode(lua_State
*l
)
1246 json_verify_arg_count(l
, 1);
1248 json
= luaL_checklstring(l
, 1, &len
);
1250 /* Detect Unicode other than UTF-8 (see RFC 4627, Sec 3)
1252 * CJSON can support any simple data type, hence only the first
1253 * character is guaranteed to be ASCII (at worst: '"'). This is
1254 * still enough to detect whether the wrong encoding is in use. */
1255 if (len
>= 2 && (!json
[0] || !json
[1]))
1256 luaL_error(l
, "JSON parser does not support UTF-16 or UTF-32");
1258 lua_json_decode(l
, json
, len
);
1263 /* ===== INITIALISATION ===== */
1265 int luaopen_cjson(lua_State
*l
)
1268 { "encode", json_encode
},
1269 { "decode", json_decode
},
1270 { "encode_sparse_array", json_cfg_encode_sparse_array
},
1271 { "encode_max_depth", json_cfg_encode_max_depth
},
1272 { "encode_number_precision", json_cfg_encode_number_precision
},
1273 { "encode_keep_buffer", json_cfg_encode_keep_buffer
},
1274 { "refuse_invalid_numbers", json_cfg_refuse_invalid_numbers
},
1278 /* Use json_fetch_config as a pointer.
1279 * It's faster than using a config string, and more unique */
1280 lua_pushlightuserdata(l
, &json_config_key
);
1281 json_create_config(l
);
1282 lua_settable(l
, LUA_REGISTRYINDEX
);
1284 luaL_register(l
, "cjson", reg
);
1286 /* Set cjson.null */
1287 lua_pushlightuserdata(l
, NULL
);
1288 lua_setfield(l
, -2, "null");
1290 /* Set cjson.version */
1291 lua_pushliteral(l
, VERSION
);
1292 lua_setfield(l
, -2, "version");
1294 /* Return cjson table */
1298 /* vi:ai et sw=4 ts=4: