]> git.saurik.com Git - redis.git/blame - deps/lua/src/lua_cjson.c
lua_cmsgpack.c added
[redis.git] / deps / lua / src / lua_cjson.c
CommitLineData
15108778 1#define VERSION "1.0.3"
2
3/* CJSON - JSON support for Lua
4 *
5 * Copyright (c) 2010-2011 Mark Pulford <mark@kyne.com.au>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27/* Caveats:
28 * - JSON "null" values are represented as lightuserdata since Lua
29 * tables cannot contain "nil". Compare with cjson.null.
30 * - Invalid UTF-8 characters are not detected and will be passed
31 * untouched. If required, UTF-8 error checking should be done
32 * outside this library.
33 * - Javascript comments are not part of the JSON spec, and are not
34 * currently supported.
35 *
36 * Note: Decoding is slower than encoding. Lua spends significant
37 * time (30%) managing tables when parsing JSON since it is
38 * difficult to know object/array sizes ahead of time.
39 */
40
41#include <assert.h>
42#include <string.h>
43#include <math.h>
34296ae8 44#include "lua.h"
45#include "lauxlib.h"
15108778 46
47#include "strbuf.h"
48
49#ifdef MISSING_ISINF
50#define isinf(x) (!isnan(x) && isnan((x) - (x)))
51#endif
52
53#define DEFAULT_SPARSE_CONVERT 0
54#define DEFAULT_SPARSE_RATIO 2
55#define DEFAULT_SPARSE_SAFE 10
56#define DEFAULT_MAX_DEPTH 20
57#define DEFAULT_ENCODE_REFUSE_BADNUM 1
58#define DEFAULT_DECODE_REFUSE_BADNUM 0
59#define DEFAULT_ENCODE_KEEP_BUFFER 1
60
61typedef enum {
62 T_OBJ_BEGIN,
63 T_OBJ_END,
64 T_ARR_BEGIN,
65 T_ARR_END,
66 T_STRING,
67 T_NUMBER,
68 T_BOOLEAN,
69 T_NULL,
70 T_COLON,
71 T_COMMA,
72 T_END,
73 T_WHITESPACE,
74 T_ERROR,
75 T_UNKNOWN
76} json_token_type_t;
77
78static const char *json_token_type_name[] = {
79 "T_OBJ_BEGIN",
80 "T_OBJ_END",
81 "T_ARR_BEGIN",
82 "T_ARR_END",
83 "T_STRING",
84 "T_NUMBER",
85 "T_BOOLEAN",
86 "T_NULL",
87 "T_COLON",
88 "T_COMMA",
89 "T_END",
90 "T_WHITESPACE",
91 "T_ERROR",
92 "T_UNKNOWN",
93 NULL
94};
95
96typedef struct {
97 json_token_type_t ch2token[256];
98 char escape2char[256]; /* Decoding */
99#if 0
100 char escapes[35][8]; /* Pre-generated escape string buffer */
101 char *char2escape[256]; /* Encoding */
102#endif
103 strbuf_t encode_buf;
104 char number_fmt[8]; /* "%.XXg\0" */
105 int current_depth;
106
107 int encode_sparse_convert;
108 int encode_sparse_ratio;
109 int encode_sparse_safe;
110 int encode_max_depth;
111 int encode_refuse_badnum;
112 int decode_refuse_badnum;
113 int encode_keep_buffer;
114 int encode_number_precision;
115} json_config_t;
116
117typedef struct {
118 const char *data;
119 int index;
120 strbuf_t *tmp; /* Temporary storage for strings */
121 json_config_t *cfg;
122} json_parse_t;
123
124typedef struct {
125 json_token_type_t type;
126 int index;
127 union {
128 const char *string;
129 double number;
130 int boolean;
131 } value;
132 int string_len;
133} json_token_t;
134
135static const char *char2escape[256] = {
136 "\\u0000", "\\u0001", "\\u0002", "\\u0003",
137 "\\u0004", "\\u0005", "\\u0006", "\\u0007",
138 "\\b", "\\t", "\\n", "\\u000b",
139 "\\f", "\\r", "\\u000e", "\\u000f",
140 "\\u0010", "\\u0011", "\\u0012", "\\u0013",
141 "\\u0014", "\\u0015", "\\u0016", "\\u0017",
142 "\\u0018", "\\u0019", "\\u001a", "\\u001b",
143 "\\u001c", "\\u001d", "\\u001e", "\\u001f",
144 NULL, NULL, "\\\"", NULL, NULL, NULL, NULL, NULL,
145 NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\/",
146 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
147 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
148 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
149 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
150 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
151 NULL, NULL, NULL, NULL, "\\\\", NULL, NULL, NULL,
152 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
153 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
154 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
155 NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\u007f",
156 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
157 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
158 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
159 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
160 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
161 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
162 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
163 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
164 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
165 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
166 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
167 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
168 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
169 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
170 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
171 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
172};
173
174static int json_config_key;
175
176/* ===== CONFIGURATION ===== */
177
178static json_config_t *json_fetch_config(lua_State *l)
179{
180 json_config_t *cfg;
181
182 lua_pushlightuserdata(l, &json_config_key);
183 lua_gettable(l, LUA_REGISTRYINDEX);
184 cfg = lua_touserdata(l, -1);
185 if (!cfg)
186 luaL_error(l, "BUG: Unable to fetch CJSON configuration");
187
188 lua_pop(l, 1);
189
190 return cfg;
191}
192
193static void json_verify_arg_count(lua_State *l, int args)
194{
195 luaL_argcheck(l, lua_gettop(l) <= args, args + 1,
196 "found too many arguments");
197}
198
199/* Configures handling of extremely sparse arrays:
200 * convert: Convert extremely sparse arrays into objects? Otherwise error.
201 * ratio: 0: always allow sparse; 1: never allow sparse; >1: use ratio
202 * safe: Always use an array when the max index <= safe */
203static int json_cfg_encode_sparse_array(lua_State *l)
204{
205 json_config_t *cfg;
206 int val;
207
208 json_verify_arg_count(l, 3);
209 cfg = json_fetch_config(l);
210
211 switch (lua_gettop(l)) {
212 case 3:
213 val = luaL_checkinteger(l, 3);
214 luaL_argcheck(l, val >= 0, 3, "expected integer >= 0");
215 cfg->encode_sparse_safe = val;
216 case 2:
217 val = luaL_checkinteger(l, 2);
218 luaL_argcheck(l, val >= 0, 2, "expected integer >= 0");
219 cfg->encode_sparse_ratio = val;
220 case 1:
221 luaL_argcheck(l, lua_isboolean(l, 1), 1, "expected boolean");
222 cfg->encode_sparse_convert = lua_toboolean(l, 1);
223 }
224
225 lua_pushboolean(l, cfg->encode_sparse_convert);
226 lua_pushinteger(l, cfg->encode_sparse_ratio);
227 lua_pushinteger(l, cfg->encode_sparse_safe);
228
229 return 3;
230}
231
232/* Configures the maximum number of nested arrays/objects allowed when
233 * encoding */
234static int json_cfg_encode_max_depth(lua_State *l)
235{
236 json_config_t *cfg;
237 int depth;
238
239 json_verify_arg_count(l, 1);
240 cfg = json_fetch_config(l);
241
242 if (lua_gettop(l)) {
243 depth = luaL_checkinteger(l, 1);
244 luaL_argcheck(l, depth > 0, 1, "expected positive integer");
245 cfg->encode_max_depth = depth;
246 }
247
248 lua_pushinteger(l, cfg->encode_max_depth);
249
250 return 1;
251}
252
253static void json_set_number_precision(json_config_t *cfg, int prec)
254{
255 cfg->encode_number_precision = prec;
256 sprintf(cfg->number_fmt, "%%.%dg", prec);
257}
258
259/* Configures number precision when converting doubles to text */
260static int json_cfg_encode_number_precision(lua_State *l)
261{
262 json_config_t *cfg;
263 int precision;
264
265 json_verify_arg_count(l, 1);
266 cfg = json_fetch_config(l);
267
268 if (lua_gettop(l)) {
269 precision = luaL_checkinteger(l, 1);
270 luaL_argcheck(l, 1 <= precision && precision <= 14, 1,
271 "expected integer between 1 and 14");
272 json_set_number_precision(cfg, precision);
273 }
274
275 lua_pushinteger(l, cfg->encode_number_precision);
276
277 return 1;
278}
279
280/* Configures JSON encoding buffer persistence */
281static int json_cfg_encode_keep_buffer(lua_State *l)
282{
283 json_config_t *cfg;
284
285 json_verify_arg_count(l, 1);
286 cfg = json_fetch_config(l);
287
288 if (lua_gettop(l)) {
289 luaL_checktype(l, 1, LUA_TBOOLEAN);
290 cfg->encode_keep_buffer = lua_toboolean(l, 1);
291 }
292
293 lua_pushboolean(l, cfg->encode_keep_buffer);
294
295 return 1;
296}
297
298/* On argument: decode enum and set config variables
299 * **options must point to a NULL terminated array of 4 enums
300 * Returns: current enum value */
301static void json_enum_option(lua_State *l, const char **options,
302 int *opt1, int *opt2)
303{
304 int setting;
305
306 if (lua_gettop(l)) {
307 if (lua_isboolean(l, 1))
308 setting = lua_toboolean(l, 1) * 3;
309 else
310 setting = luaL_checkoption(l, 1, NULL, options);
311
312 *opt1 = setting & 1 ? 1 : 0;
313 *opt2 = setting & 2 ? 1 : 0;
314 } else {
315 setting = *opt1 | (*opt2 << 1);
316 }
317
318 if (setting)
319 lua_pushstring(l, options[setting]);
320 else
321 lua_pushboolean(l, 0);
322}
323
324
325/* When enabled, rejects: NaN, Infinity, hexidecimal numbers */
326static int json_cfg_refuse_invalid_numbers(lua_State *l)
327{
328 static const char *options_enc_dec[] = { "none", "encode", "decode",
329 "both", NULL };
330 json_config_t *cfg;
331
332 json_verify_arg_count(l, 1);
333 cfg = json_fetch_config(l);
334
335 json_enum_option(l, options_enc_dec,
336 &cfg->encode_refuse_badnum,
337 &cfg->decode_refuse_badnum);
338
339 return 1;
340}
341
342static int json_destroy_config(lua_State *l)
343{
344 json_config_t *cfg;
345
346 cfg = lua_touserdata(l, 1);
347 if (cfg)
348 strbuf_free(&cfg->encode_buf);
349 cfg = NULL;
350
351 return 0;
352}
353
354static void json_create_config(lua_State *l)
355{
356 json_config_t *cfg;
357 int i;
358
359 cfg = lua_newuserdata(l, sizeof(*cfg));
360
361 /* Create GC method to clean up strbuf */
362 lua_newtable(l);
363 lua_pushcfunction(l, json_destroy_config);
364 lua_setfield(l, -2, "__gc");
365 lua_setmetatable(l, -2);
366
367 strbuf_init(&cfg->encode_buf, 0);
368
369 cfg->encode_sparse_convert = DEFAULT_SPARSE_CONVERT;
370 cfg->encode_sparse_ratio = DEFAULT_SPARSE_RATIO;
371 cfg->encode_sparse_safe = DEFAULT_SPARSE_SAFE;
372 cfg->encode_max_depth = DEFAULT_MAX_DEPTH;
373 cfg->encode_refuse_badnum = DEFAULT_ENCODE_REFUSE_BADNUM;
374 cfg->decode_refuse_badnum = DEFAULT_DECODE_REFUSE_BADNUM;
375 cfg->encode_keep_buffer = DEFAULT_ENCODE_KEEP_BUFFER;
376 json_set_number_precision(cfg, 14);
377
378 /* Decoding init */
379
380 /* Tag all characters as an error */
381 for (i = 0; i < 256; i++)
382 cfg->ch2token[i] = T_ERROR;
383
384 /* Set tokens that require no further processing */
385 cfg->ch2token['{'] = T_OBJ_BEGIN;
386 cfg->ch2token['}'] = T_OBJ_END;
387 cfg->ch2token['['] = T_ARR_BEGIN;
388 cfg->ch2token[']'] = T_ARR_END;
389 cfg->ch2token[','] = T_COMMA;
390 cfg->ch2token[':'] = T_COLON;
391 cfg->ch2token['\0'] = T_END;
392 cfg->ch2token[' '] = T_WHITESPACE;
393 cfg->ch2token['\t'] = T_WHITESPACE;
394 cfg->ch2token['\n'] = T_WHITESPACE;
395 cfg->ch2token['\r'] = T_WHITESPACE;
396
397 /* Update characters that require further processing */
398 cfg->ch2token['f'] = T_UNKNOWN; /* false? */
399 cfg->ch2token['i'] = T_UNKNOWN; /* inf, ininity? */
400 cfg->ch2token['I'] = T_UNKNOWN;
401 cfg->ch2token['n'] = T_UNKNOWN; /* null, nan? */
402 cfg->ch2token['N'] = T_UNKNOWN;
403 cfg->ch2token['t'] = T_UNKNOWN; /* true? */
404 cfg->ch2token['"'] = T_UNKNOWN; /* string? */
405 cfg->ch2token['+'] = T_UNKNOWN; /* number? */
406 cfg->ch2token['-'] = T_UNKNOWN;
407 for (i = 0; i < 10; i++)
408 cfg->ch2token['0' + i] = T_UNKNOWN;
409
410 /* Lookup table for parsing escape characters */
411 for (i = 0; i < 256; i++)
412 cfg->escape2char[i] = 0; /* String error */
413 cfg->escape2char['"'] = '"';
414 cfg->escape2char['\\'] = '\\';
415 cfg->escape2char['/'] = '/';
416 cfg->escape2char['b'] = '\b';
417 cfg->escape2char['t'] = '\t';
418 cfg->escape2char['n'] = '\n';
419 cfg->escape2char['f'] = '\f';
420 cfg->escape2char['r'] = '\r';
421 cfg->escape2char['u'] = 'u'; /* Unicode parsing required */
422
423
424#if 0
425 /* Initialise separate storage for pre-generated escape codes.
426 * Escapes 0-31 map directly, 34, 92, 127 follow afterwards to
427 * save memory. */
428 for (i = 0 ; i < 32; i++)
429 sprintf(cfg->escapes[i], "\\u%04x", i);
430 strcpy(cfg->escapes[8], "\b"); /* Override simpler escapes */
431 strcpy(cfg->escapes[9], "\t");
432 strcpy(cfg->escapes[10], "\n");
433 strcpy(cfg->escapes[12], "\f");
434 strcpy(cfg->escapes[13], "\r");
435 strcpy(cfg->escapes[32], "\\\""); /* chr(34) */
436 strcpy(cfg->escapes[33], "\\\\"); /* chr(92) */
437 sprintf(cfg->escapes[34], "\\u%04x", 127); /* char(127) */
438
439 /* Initialise encoding escape lookup table */
440 for (i = 0; i < 32; i++)
441 cfg->char2escape[i] = cfg->escapes[i];
442 for (i = 32; i < 256; i++)
443 cfg->char2escape[i] = NULL;
444 cfg->char2escape[34] = cfg->escapes[32];
445 cfg->char2escape[92] = cfg->escapes[33];
446 cfg->char2escape[127] = cfg->escapes[34];
447#endif
448}
449
450/* ===== ENCODING ===== */
451
452static void json_encode_exception(lua_State *l, json_config_t *cfg, int lindex,
453 const char *reason)
454{
455 if (!cfg->encode_keep_buffer)
456 strbuf_free(&cfg->encode_buf);
457 luaL_error(l, "Cannot serialise %s: %s",
458 lua_typename(l, lua_type(l, lindex)), reason);
459}
460
461/* json_append_string args:
462 * - lua_State
463 * - JSON strbuf
464 * - String (Lua stack index)
465 *
466 * Returns nothing. Doesn't remove string from Lua stack */
467static void json_append_string(lua_State *l, strbuf_t *json, int lindex)
468{
469 const char *escstr;
470 int i;
471 const char *str;
472 size_t len;
473
474 str = lua_tolstring(l, lindex, &len);
475
476 /* Worst case is len * 6 (all unicode escapes).
477 * This buffer is reused constantly for small strings
478 * If there are any excess pages, they won't be hit anyway.
479 * This gains ~5% speedup. */
480 strbuf_ensure_empty_length(json, len * 6 + 2);
481
482 strbuf_append_char_unsafe(json, '\"');
483 for (i = 0; i < len; i++) {
484 escstr = char2escape[(unsigned char)str[i]];
485 if (escstr)
486 strbuf_append_string(json, escstr);
487 else
488 strbuf_append_char_unsafe(json, str[i]);
489 }
490 strbuf_append_char_unsafe(json, '\"');
491}
492
493/* Find the size of the array on the top of the Lua stack
494 * -1 object (not a pure array)
495 * >=0 elements in array
496 */
497static int lua_array_length(lua_State *l, json_config_t *cfg)
498{
499 double k;
500 int max;
501 int items;
502
503 max = 0;
504 items = 0;
505
506 lua_pushnil(l);
507 /* table, startkey */
508 while (lua_next(l, -2) != 0) {
509 /* table, key, value */
510 if (lua_type(l, -2) == LUA_TNUMBER &&
511 (k = lua_tonumber(l, -2))) {
512 /* Integer >= 1 ? */
513 if (floor(k) == k && k >= 1) {
514 if (k > max)
515 max = k;
516 items++;
517 lua_pop(l, 1);
518 continue;
519 }
520 }
521
522 /* Must not be an array (non integer key) */
523 lua_pop(l, 2);
524 return -1;
525 }
526
527 /* Encode excessively sparse arrays as objects (if enabled) */
528 if (cfg->encode_sparse_ratio > 0 &&
529 max > items * cfg->encode_sparse_ratio &&
530 max > cfg->encode_sparse_safe) {
531 if (!cfg->encode_sparse_convert)
532 json_encode_exception(l, cfg, -1, "excessively sparse array");
533
534 return -1;
535 }
536
537 return max;
538}
539
540static void json_encode_descend(lua_State *l, json_config_t *cfg)
541{
542 cfg->current_depth++;
543
544 if (cfg->current_depth > cfg->encode_max_depth) {
545 if (!cfg->encode_keep_buffer)
546 strbuf_free(&cfg->encode_buf);
547 luaL_error(l, "Cannot serialise, excessive nesting (%d)",
548 cfg->current_depth);
549 }
550}
551
552static void json_append_data(lua_State *l, json_config_t *cfg, strbuf_t *json);
553
554/* json_append_array args:
555 * - lua_State
556 * - JSON strbuf
557 * - Size of passwd Lua array (top of stack) */
558static void json_append_array(lua_State *l, json_config_t *cfg, strbuf_t *json,
559 int array_length)
560{
561 int comma, i;
562
563 json_encode_descend(l, cfg);
564
565 strbuf_append_char(json, '[');
566
567 comma = 0;
568 for (i = 1; i <= array_length; i++) {
569 if (comma)
570 strbuf_append_char(json, ',');
571 else
572 comma = 1;
573
574 lua_rawgeti(l, -1, i);
575 json_append_data(l, cfg, json);
576 lua_pop(l, 1);
577 }
578
579 strbuf_append_char(json, ']');
580
581 cfg->current_depth--;
582}
583
584static void json_append_number(lua_State *l, strbuf_t *json, int index,
585 json_config_t *cfg)
586{
587 double num = lua_tonumber(l, index);
588
589 if (cfg->encode_refuse_badnum && (isinf(num) || isnan(num)))
590 json_encode_exception(l, cfg, index, "must not be NaN or Inf");
591
592 /* Lowest double printed with %.14g is 21 characters long:
593 * -1.7976931348623e+308
594 *
595 * Use 32 to include the \0, and a few extra just in case..
596 */
597 strbuf_append_fmt(json, 32, cfg->number_fmt, num);
598}
599
600static void json_append_object(lua_State *l, json_config_t *cfg,
601 strbuf_t *json)
602{
603 int comma, keytype;
604
605 json_encode_descend(l, cfg);
606
607 /* Object */
608 strbuf_append_char(json, '{');
609
610 lua_pushnil(l);
611 /* table, startkey */
612 comma = 0;
613 while (lua_next(l, -2) != 0) {
614 if (comma)
615 strbuf_append_char(json, ',');
616 else
617 comma = 1;
618
619 /* table, key, value */
620 keytype = lua_type(l, -2);
621 if (keytype == LUA_TNUMBER) {
622 strbuf_append_char(json, '"');
623 json_append_number(l, json, -2, cfg);
624 strbuf_append_mem(json, "\":", 2);
625 } else if (keytype == LUA_TSTRING) {
626 json_append_string(l, json, -2);
627 strbuf_append_char(json, ':');
628 } else {
629 json_encode_exception(l, cfg, -2,
630 "table key must be a number or string");
631 /* never returns */
632 }
633
634 /* table, key, value */
635 json_append_data(l, cfg, json);
636 lua_pop(l, 1);
637 /* table, key */
638 }
639
640 strbuf_append_char(json, '}');
641
642 cfg->current_depth--;
643}
644
645/* Serialise Lua data into JSON string. */
646static void json_append_data(lua_State *l, json_config_t *cfg, strbuf_t *json)
647{
648 int len;
649
650 switch (lua_type(l, -1)) {
651 case LUA_TSTRING:
652 json_append_string(l, json, -1);
653 break;
654 case LUA_TNUMBER:
655 json_append_number(l, json, -1, cfg);
656 break;
657 case LUA_TBOOLEAN:
658 if (lua_toboolean(l, -1))
659 strbuf_append_mem(json, "true", 4);
660 else
661 strbuf_append_mem(json, "false", 5);
662 break;
663 case LUA_TTABLE:
664 len = lua_array_length(l, cfg);
665 if (len > 0)
666 json_append_array(l, cfg, json, len);
667 else
668 json_append_object(l, cfg, json);
669 break;
670 case LUA_TNIL:
671 strbuf_append_mem(json, "null", 4);
672 break;
673 case LUA_TLIGHTUSERDATA:
674 if (lua_touserdata(l, -1) == NULL) {
675 strbuf_append_mem(json, "null", 4);
676 break;
677 }
678 default:
679 /* Remaining types (LUA_TFUNCTION, LUA_TUSERDATA, LUA_TTHREAD,
680 * and LUA_TLIGHTUSERDATA) cannot be serialised */
681 json_encode_exception(l, cfg, -1, "type not supported");
682 /* never returns */
683 }
684}
685
686static int json_encode(lua_State *l)
687{
688 json_config_t *cfg;
689 char *json;
690 int len;
691
692 /* Can't use json_verify_arg_count() since we need to ensure
693 * there is only 1 argument */
694 luaL_argcheck(l, lua_gettop(l) == 1, 1, "expected 1 argument");
695
696 cfg = json_fetch_config(l);
697 cfg->current_depth = 0;
698
699 /* Reset the persistent buffer if it exists.
700 * Otherwise allocate a new buffer. */
701 if (strbuf_allocated(&cfg->encode_buf))
702 strbuf_reset(&cfg->encode_buf);
703 else
704 strbuf_init(&cfg->encode_buf, 0);
705
706 json_append_data(l, cfg, &cfg->encode_buf);
707 json = strbuf_string(&cfg->encode_buf, &len);
708
709 lua_pushlstring(l, json, len);
710
711 if (!cfg->encode_keep_buffer)
712 strbuf_free(&cfg->encode_buf);
713
714 return 1;
715}
716
717/* ===== DECODING ===== */
718
719static void json_process_value(lua_State *l, json_parse_t *json,
720 json_token_t *token);
721
722static int hexdigit2int(char hex)
723{
724 if ('0' <= hex && hex <= '9')
725 return hex - '0';
726
727 /* Force lowercase */
728 hex |= 0x20;
729 if ('a' <= hex && hex <= 'f')
730 return 10 + hex - 'a';
731
732 return -1;
733}
734
735static int decode_hex4(const char *hex)
736{
737 int digit[4];
738 int i;
739
740 /* Convert ASCII hex digit to numeric digit
741 * Note: this returns an error for invalid hex digits, including
742 * NULL */
743 for (i = 0; i < 4; i++) {
744 digit[i] = hexdigit2int(hex[i]);
745 if (digit[i] < 0) {
746 return -1;
747 }
748 }
749
750 return (digit[0] << 12) +
751 (digit[1] << 8) +
752 (digit[2] << 4) +
753 digit[3];
754}
755
756/* Converts a Unicode codepoint to UTF-8.
757 * Returns UTF-8 string length, and up to 4 bytes in *utf8 */
758static int codepoint_to_utf8(char *utf8, int codepoint)
759{
760 /* 0xxxxxxx */
761 if (codepoint <= 0x7F) {
762 utf8[0] = codepoint;
763 return 1;
764 }
765
766 /* 110xxxxx 10xxxxxx */
767 if (codepoint <= 0x7FF) {
768 utf8[0] = (codepoint >> 6) | 0xC0;
769 utf8[1] = (codepoint & 0x3F) | 0x80;
770 return 2;
771 }
772
773 /* 1110xxxx 10xxxxxx 10xxxxxx */
774 if (codepoint <= 0xFFFF) {
775 utf8[0] = (codepoint >> 12) | 0xE0;
776 utf8[1] = ((codepoint >> 6) & 0x3F) | 0x80;
777 utf8[2] = (codepoint & 0x3F) | 0x80;
778 return 3;
779 }
780
781 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
782 if (codepoint <= 0x1FFFFF) {
783 utf8[0] = (codepoint >> 18) | 0xF0;
784 utf8[1] = ((codepoint >> 12) & 0x3F) | 0x80;
785 utf8[2] = ((codepoint >> 6) & 0x3F) | 0x80;
786 utf8[3] = (codepoint & 0x3F) | 0x80;
787 return 4;
788 }
789
790 return 0;
791}
792
793
794/* Called when index pointing to beginning of UTF-16 code escape: \uXXXX
795 * \u is guaranteed to exist, but the remaining hex characters may be
796 * missing.
797 * Translate to UTF-8 and append to temporary token string.
798 * Must advance index to the next character to be processed.
799 * Returns: 0 success
800 * -1 error
801 */
802static int json_append_unicode_escape(json_parse_t *json)
803{
804 char utf8[4]; /* Surrogate pairs require 4 UTF-8 bytes */
805 int codepoint;
806 int surrogate_low;
807 int len;
808 int escape_len = 6;
809
810 /* Fetch UTF-16 code unit */
811 codepoint = decode_hex4(&json->data[json->index + 2]);
812 if (codepoint < 0)
813 return -1;
814
815 /* UTF-16 surrogate pairs take the following 2 byte form:
816 * 11011 x yyyyyyyyyy
817 * When x = 0: y is the high 10 bits of the codepoint
818 * x = 1: y is the low 10 bits of the codepoint
819 *
820 * Check for a surrogate pair (high or low) */
821 if ((codepoint & 0xF800) == 0xD800) {
822 /* Error if the 1st surrogate is not high */
823 if (codepoint & 0x400)
824 return -1;
825
826 /* Ensure the next code is a unicode escape */
827 if (json->data[json->index + escape_len] != '\\' ||
828 json->data[json->index + escape_len + 1] != 'u') {
829 return -1;
830 }
831
832 /* Fetch the next codepoint */
833 surrogate_low = decode_hex4(&json->data[json->index + 2 + escape_len]);
834 if (surrogate_low < 0)
835 return -1;
836
837 /* Error if the 2nd code is not a low surrogate */
838 if ((surrogate_low & 0xFC00) != 0xDC00)
839 return -1;
840
841 /* Calculate Unicode codepoint */
842 codepoint = (codepoint & 0x3FF) << 10;
843 surrogate_low &= 0x3FF;
844 codepoint = (codepoint | surrogate_low) + 0x10000;
845 escape_len = 12;
846 }
847
848 /* Convert codepoint to UTF-8 */
849 len = codepoint_to_utf8(utf8, codepoint);
850 if (!len)
851 return -1;
852
853 /* Append bytes and advance parse index */
854 strbuf_append_mem_unsafe(json->tmp, utf8, len);
855 json->index += escape_len;
856
857 return 0;
858}
859
860static void json_set_token_error(json_token_t *token, json_parse_t *json,
861 const char *errtype)
862{
863 token->type = T_ERROR;
864 token->index = json->index;
865 token->value.string = errtype;
866}
867
868static void json_next_string_token(json_parse_t *json, json_token_t *token)
869{
870 char *escape2char = json->cfg->escape2char;
871 char ch;
872
873 /* Caller must ensure a string is next */
874 assert(json->data[json->index] == '"');
875
876 /* Skip " */
877 json->index++;
878
879 /* json->tmp is the temporary strbuf used to accumulate the
880 * decoded string value. */
881 strbuf_reset(json->tmp);
882 while ((ch = json->data[json->index]) != '"') {
883 if (!ch) {
884 /* Premature end of the string */
885 json_set_token_error(token, json, "unexpected end of string");
886 return;
887 }
888
889 /* Handle escapes */
890 if (ch == '\\') {
891 /* Fetch escape character */
892 ch = json->data[json->index + 1];
893
894 /* Translate escape code and append to tmp string */
895 ch = escape2char[(unsigned char)ch];
896 if (ch == 'u') {
897 if (json_append_unicode_escape(json) == 0)
898 continue;
899
900 json_set_token_error(token, json,
901 "invalid unicode escape code");
902 return;
903 }
904 if (!ch) {
905 json_set_token_error(token, json, "invalid escape code");
906 return;
907 }
908
909 /* Skip '\' */
910 json->index++;
911 }
912 /* Append normal character or translated single character
913 * Unicode escapes are handled above */
914 strbuf_append_char_unsafe(json->tmp, ch);
915 json->index++;
916 }
917 json->index++; /* Eat final quote (") */
918
919 strbuf_ensure_null(json->tmp);
920
921 token->type = T_STRING;
922 token->value.string = strbuf_string(json->tmp, &token->string_len);
923}
924
925/* JSON numbers should take the following form:
926 * -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
927 *
928 * json_next_number_token() uses strtod() which allows other forms:
929 * - numbers starting with '+'
930 * - NaN, -NaN, infinity, -infinity
931 * - hexidecimal numbers
932 * - numbers with leading zeros
933 *
934 * json_is_invalid_number() detects "numbers" which may pass strtod()'s
935 * error checking, but should not be allowed with strict JSON.
936 *
937 * json_is_invalid_number() may pass numbers which cause strtod()
938 * to generate an error.
939 */
940static int json_is_invalid_number(json_parse_t *json)
941{
942 int i = json->index;
943
944 /* Reject numbers starting with + */
945 if (json->data[i] == '+')
946 return 1;
947
948 /* Skip minus sign if it exists */
949 if (json->data[i] == '-')
950 i++;
951
952 /* Reject numbers starting with 0x, or leading zeros */
953 if (json->data[i] == '0') {
954 int ch2 = json->data[i + 1];
955
956 if ((ch2 | 0x20) == 'x' || /* Hex */
957 ('0' <= ch2 && ch2 <= '9')) /* Leading zero */
958 return 1;
959
960 return 0;
961 } else if (json->data[i] <= '9') {
962 return 0; /* Ordinary number */
963 }
964
965
966 /* Reject inf/nan */
967 if (!strncasecmp(&json->data[i], "inf", 3))
968 return 1;
969 if (!strncasecmp(&json->data[i], "nan", 3))
970 return 1;
971
972 /* Pass all other numbers which may still be invalid, but
973 * strtod() will catch them. */
974 return 0;
975}
976
977static void json_next_number_token(json_parse_t *json, json_token_t *token)
978{
979 const char *startptr;
980 char *endptr;
981
982 token->type = T_NUMBER;
983 startptr = &json->data[json->index];
984 token->value.number = strtod(&json->data[json->index], &endptr);
985 if (startptr == endptr)
986 json_set_token_error(token, json, "invalid number");
987 else
988 json->index += endptr - startptr; /* Skip the processed number */
989
990 return;
991}
992
993/* Fills in the token struct.
994 * T_STRING will return a pointer to the json_parse_t temporary string
995 * T_ERROR will leave the json->index pointer at the error.
996 */
997static void json_next_token(json_parse_t *json, json_token_t *token)
998{
999 json_token_type_t *ch2token = json->cfg->ch2token;
1000 int ch;
1001
1002 /* Eat whitespace. FIXME: UGLY */
1003 token->type = ch2token[(unsigned char)json->data[json->index]];
1004 while (token->type == T_WHITESPACE)
1005 token->type = ch2token[(unsigned char)json->data[++json->index]];
1006
1007 token->index = json->index;
1008
1009 /* Don't advance the pointer for an error or the end */
1010 if (token->type == T_ERROR) {
1011 json_set_token_error(token, json, "invalid token");
1012 return;
1013 }
1014
1015 if (token->type == T_END) {
1016 return;
1017 }
1018
1019 /* Found a known single character token, advance index and return */
1020 if (token->type != T_UNKNOWN) {
1021 json->index++;
1022 return;
1023 }
1024
1025 /* Process characters which triggered T_UNKNOWN */
1026 ch = json->data[json->index];
1027
1028 /* Must use strncmp() to match the front of the JSON string.
1029 * JSON identifier must be lowercase.
1030 * When strict_numbers if disabled, either case is allowed for
1031 * Infinity/NaN (since we are no longer following the spec..) */
1032 if (ch == '"') {
1033 json_next_string_token(json, token);
1034 return;
1035 } else if (ch == '-' || ('0' <= ch && ch <= '9')) {
1036 if (json->cfg->decode_refuse_badnum && json_is_invalid_number(json)) {
1037 json_set_token_error(token, json, "invalid number");
1038 return;
1039 }
1040 json_next_number_token(json, token);
1041 return;
1042 } else if (!strncmp(&json->data[json->index], "true", 4)) {
1043 token->type = T_BOOLEAN;
1044 token->value.boolean = 1;
1045 json->index += 4;
1046 return;
1047 } else if (!strncmp(&json->data[json->index], "false", 5)) {
1048 token->type = T_BOOLEAN;
1049 token->value.boolean = 0;
1050 json->index += 5;
1051 return;
1052 } else if (!strncmp(&json->data[json->index], "null", 4)) {
1053 token->type = T_NULL;
1054 json->index += 4;
1055 return;
1056 } else if (!json->cfg->decode_refuse_badnum &&
1057 json_is_invalid_number(json)) {
1058 /* When refuse_badnum is disabled, only attempt to process
1059 * numbers we know are invalid JSON (Inf, NaN, hex)
1060 * This is required to generate an appropriate token error,
1061 * otherwise all bad tokens will register as "invalid number"
1062 */
1063 json_next_number_token(json, token);
1064 return;
1065 }
1066
1067 /* Token starts with t/f/n but isn't recognised above. */
1068 json_set_token_error(token, json, "invalid token");
1069}
1070
1071/* This function does not return.
1072 * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.
1073 * The only supported exception is the temporary parser string
1074 * json->tmp struct.
1075 * json and token should exist on the stack somewhere.
1076 * luaL_error() will long_jmp and release the stack */
1077static void json_throw_parse_error(lua_State *l, json_parse_t *json,
1078 const char *exp, json_token_t *token)
1079{
1080 const char *found;
1081
1082 strbuf_free(json->tmp);
1083
1084 if (token->type == T_ERROR)
1085 found = token->value.string;
1086 else
1087 found = json_token_type_name[token->type];
1088
1089 /* Note: token->index is 0 based, display starting from 1 */
1090 luaL_error(l, "Expected %s but found %s at character %d",
1091 exp, found, token->index + 1);
1092}
1093
1094static void json_decode_checkstack(lua_State *l, json_parse_t *json, int n)
1095{
1096 if (lua_checkstack(l, n))
1097 return;
1098
1099 strbuf_free(json->tmp);
1100 luaL_error(l, "Too many nested data structures");
1101}
1102
1103static void json_parse_object_context(lua_State *l, json_parse_t *json)
1104{
1105 json_token_t token;
1106
1107 /* 3 slots required:
1108 * .., table, key, value */
1109 json_decode_checkstack(l, json, 3);
1110
1111 lua_newtable(l);
1112
1113 json_next_token(json, &token);
1114
1115 /* Handle empty objects */
1116 if (token.type == T_OBJ_END) {
1117 return;
1118 }
1119
1120 while (1) {
1121 if (token.type != T_STRING)
1122 json_throw_parse_error(l, json, "object key string", &token);
1123
1124 /* Push key */
1125 lua_pushlstring(l, token.value.string, token.string_len);
1126
1127 json_next_token(json, &token);
1128 if (token.type != T_COLON)
1129 json_throw_parse_error(l, json, "colon", &token);
1130
1131 /* Fetch value */
1132 json_next_token(json, &token);
1133 json_process_value(l, json, &token);
1134
1135 /* Set key = value */
1136 lua_rawset(l, -3);
1137
1138 json_next_token(json, &token);
1139
1140 if (token.type == T_OBJ_END)
1141 return;
1142
1143 if (token.type != T_COMMA)
1144 json_throw_parse_error(l, json, "comma or object end", &token);
1145
1146 json_next_token(json, &token);
1147 }
1148}
1149
1150/* Handle the array context */
1151static void json_parse_array_context(lua_State *l, json_parse_t *json)
1152{
1153 json_token_t token;
1154 int i;
1155
1156 /* 2 slots required:
1157 * .., table, value */
1158 json_decode_checkstack(l, json, 2);
1159
1160 lua_newtable(l);
1161
1162 json_next_token(json, &token);
1163
1164 /* Handle empty arrays */
1165 if (token.type == T_ARR_END)
1166 return;
1167
1168 for (i = 1; ; i++) {
1169 json_process_value(l, json, &token);
1170 lua_rawseti(l, -2, i); /* arr[i] = value */
1171
1172 json_next_token(json, &token);
1173
1174 if (token.type == T_ARR_END)
1175 return;
1176
1177 if (token.type != T_COMMA)
1178 json_throw_parse_error(l, json, "comma or array end", &token);
1179
1180 json_next_token(json, &token);
1181 }
1182}
1183
1184/* Handle the "value" context */
1185static void json_process_value(lua_State *l, json_parse_t *json,
1186 json_token_t *token)
1187{
1188 switch (token->type) {
1189 case T_STRING:
1190 lua_pushlstring(l, token->value.string, token->string_len);
1191 break;;
1192 case T_NUMBER:
1193 lua_pushnumber(l, token->value.number);
1194 break;;
1195 case T_BOOLEAN:
1196 lua_pushboolean(l, token->value.boolean);
1197 break;;
1198 case T_OBJ_BEGIN:
1199 json_parse_object_context(l, json);
1200 break;;
1201 case T_ARR_BEGIN:
1202 json_parse_array_context(l, json);
1203 break;;
1204 case T_NULL:
1205 /* In Lua, setting "t[k] = nil" will delete k from the table.
1206 * Hence a NULL pointer lightuserdata object is used instead */
1207 lua_pushlightuserdata(l, NULL);
1208 break;;
1209 default:
1210 json_throw_parse_error(l, json, "value", token);
1211 }
1212}
1213
1214/* json_text must be null terminated string */
1215static void lua_json_decode(lua_State *l, const char *json_text, int json_len)
1216{
1217 json_parse_t json;
1218 json_token_t token;
1219
1220 json.cfg = json_fetch_config(l);
1221 json.data = json_text;
1222 json.index = 0;
1223
1224 /* Ensure the temporary buffer can hold the entire string.
1225 * This means we no longer need to do length checks since the decoded
1226 * string must be smaller than the entire json string */
1227 json.tmp = strbuf_new(json_len);
1228
1229 json_next_token(&json, &token);
1230 json_process_value(l, &json, &token);
1231
1232 /* Ensure there is no more input left */
1233 json_next_token(&json, &token);
1234
1235 if (token.type != T_END)
1236 json_throw_parse_error(l, &json, "the end", &token);
1237
1238 strbuf_free(json.tmp);
1239}
1240
1241static int json_decode(lua_State *l)
1242{
1243 const char *json;
1244 size_t len;
1245
1246 json_verify_arg_count(l, 1);
1247
1248 json = luaL_checklstring(l, 1, &len);
1249
1250 /* Detect Unicode other than UTF-8 (see RFC 4627, Sec 3)
1251 *
1252 * CJSON can support any simple data type, hence only the first
1253 * character is guaranteed to be ASCII (at worst: '"'). This is
1254 * still enough to detect whether the wrong encoding is in use. */
1255 if (len >= 2 && (!json[0] || !json[1]))
1256 luaL_error(l, "JSON parser does not support UTF-16 or UTF-32");
1257
1258 lua_json_decode(l, json, len);
1259
1260 return 1;
1261}
1262
1263/* ===== INITIALISATION ===== */
1264
1265int luaopen_cjson(lua_State *l)
1266{
1267 luaL_Reg reg[] = {
1268 { "encode", json_encode },
1269 { "decode", json_decode },
1270 { "encode_sparse_array", json_cfg_encode_sparse_array },
1271 { "encode_max_depth", json_cfg_encode_max_depth },
1272 { "encode_number_precision", json_cfg_encode_number_precision },
1273 { "encode_keep_buffer", json_cfg_encode_keep_buffer },
1274 { "refuse_invalid_numbers", json_cfg_refuse_invalid_numbers },
1275 { NULL, NULL }
1276 };
1277
1278 /* Use json_fetch_config as a pointer.
1279 * It's faster than using a config string, and more unique */
1280 lua_pushlightuserdata(l, &json_config_key);
1281 json_create_config(l);
1282 lua_settable(l, LUA_REGISTRYINDEX);
1283
1284 luaL_register(l, "cjson", reg);
1285
1286 /* Set cjson.null */
1287 lua_pushlightuserdata(l, NULL);
1288 lua_setfield(l, -2, "null");
1289
1290 /* Set cjson.version */
1291 lua_pushliteral(l, VERSION);
1292 lua_setfield(l, -2, "version");
1293
1294 /* Return cjson table */
1295 return 1;
1296}
1297
1298/* vi:ai et sw=4 ts=4:
1299 */