]> git.saurik.com Git - redis.git/blob - src/redis-check-dump.c
More robust handling of AOF rewrite child.
[redis.git] / src / redis-check-dump.c
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <unistd.h>
4 #include <fcntl.h>
5 #include <sys/stat.h>
6 #include <sys/mman.h>
7 #include <string.h>
8 #include <arpa/inet.h>
9 #include <stdint.h>
10 #include <limits.h>
11 #include "lzf.h"
12
13 /* Object types */
14 #define REDIS_STRING 0
15 #define REDIS_LIST 1
16 #define REDIS_SET 2
17 #define REDIS_ZSET 3
18 #define REDIS_HASH 4
19 #define REDIS_HASH_ZIPMAP 9
20 #define REDIS_LIST_ZIPLIST 10
21 #define REDIS_SET_INTSET 11
22 #define REDIS_ZSET_ZIPLIST 12
23 #define REDIS_HASH_ZIPLIST 13
24
25 /* Objects encoding. Some kind of objects like Strings and Hashes can be
26 * internally represented in multiple ways. The 'encoding' field of the object
27 * is set to one of this fields for this object. */
28 #define REDIS_ENCODING_RAW 0 /* Raw representation */
29 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
30 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
31 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
32
33 /* Object types only used for dumping to disk */
34 #define REDIS_EXPIRETIME_MS 252
35 #define REDIS_EXPIRETIME 253
36 #define REDIS_SELECTDB 254
37 #define REDIS_EOF 255
38
39 /* Defines related to the dump file format. To store 32 bits lengths for short
40 * keys requires a lot of space, so we check the most significant 2 bits of
41 * the first byte to interpreter the length:
42 *
43 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
44 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
45 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
46 * 11|000000 this means: specially encoded object will follow. The six bits
47 * number specify the kind of object that follows.
48 * See the REDIS_RDB_ENC_* defines.
49 *
50 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
51 * values, will fit inside. */
52 #define REDIS_RDB_6BITLEN 0
53 #define REDIS_RDB_14BITLEN 1
54 #define REDIS_RDB_32BITLEN 2
55 #define REDIS_RDB_ENCVAL 3
56 #define REDIS_RDB_LENERR UINT_MAX
57
58 /* When a length of a string object stored on disk has the first two bits
59 * set, the remaining two bits specify a special encoding for the object
60 * accordingly to the following defines: */
61 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
62 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
63 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
64 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
65
66 #define ERROR(...) { \
67 printf(__VA_ARGS__); \
68 exit(1); \
69 }
70
71 /* data type to hold offset in file and size */
72 typedef struct {
73 void *data;
74 size_t size;
75 size_t offset;
76 } pos;
77
78 static unsigned char level = 0;
79 static pos positions[16];
80
81 #define CURR_OFFSET (positions[level].offset)
82
83 /* Hold a stack of errors */
84 typedef struct {
85 char error[16][1024];
86 size_t offset[16];
87 size_t level;
88 } errors_t;
89 static errors_t errors;
90
91 #define SHIFT_ERROR(provided_offset, ...) { \
92 sprintf(errors.error[errors.level], __VA_ARGS__); \
93 errors.offset[errors.level] = provided_offset; \
94 errors.level++; \
95 }
96
97 /* Data type to hold opcode with optional key name an success status */
98 typedef struct {
99 char* key;
100 int type;
101 char success;
102 } entry;
103
104 /* Global vars that are actally used as constants. The following double
105 * values are used for double on-disk serialization, and are initialized
106 * at runtime to avoid strange compiler optimizations. */
107 static double R_Zero, R_PosInf, R_NegInf, R_Nan;
108
109 /* store string types for output */
110 static char types[256][16];
111
112 /* Prototypes */
113 uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l);
114
115 /* Return true if 't' is a valid object type. */
116 int checkType(unsigned char t) {
117 /* In case a new object type is added, update the following
118 * condition as necessary. */
119 return
120 (t >= REDIS_HASH_ZIPMAP && t <= REDIS_HASH_ZIPLIST) ||
121 t <= REDIS_HASH ||
122 t >= REDIS_EXPIRETIME_MS;
123 }
124
125 /* when number of bytes to read is negative, do a peek */
126 int readBytes(void *target, long num) {
127 char peek = (num < 0) ? 1 : 0;
128 num = (num < 0) ? -num : num;
129
130 pos p = positions[level];
131 if (p.offset + num > p.size) {
132 return 0;
133 } else {
134 memcpy(target, (void*)((size_t)p.data + p.offset), num);
135 if (!peek) positions[level].offset += num;
136 }
137 return 1;
138 }
139
140 int processHeader() {
141 char buf[10] = "_________";
142 int dump_version;
143
144 if (!readBytes(buf, 9)) {
145 ERROR("Cannot read header\n");
146 }
147
148 /* expect the first 5 bytes to equal REDIS */
149 if (memcmp(buf,"REDIS",5) != 0) {
150 ERROR("Wrong signature in header\n");
151 }
152
153 dump_version = (int)strtol(buf + 5, NULL, 10);
154 if (dump_version < 1 || dump_version > 6) {
155 ERROR("Unknown RDB format version: %d\n", dump_version);
156 }
157 return dump_version;
158 }
159
160 int loadType(entry *e) {
161 uint32_t offset = CURR_OFFSET;
162
163 /* this byte needs to qualify as type */
164 unsigned char t;
165 if (readBytes(&t, 1)) {
166 if (checkType(t)) {
167 e->type = t;
168 return 1;
169 } else {
170 SHIFT_ERROR(offset, "Unknown type (0x%02x)", t);
171 }
172 } else {
173 SHIFT_ERROR(offset, "Could not read type");
174 }
175
176 /* failure */
177 return 0;
178 }
179
180 int peekType() {
181 unsigned char t;
182 if (readBytes(&t, -1) && (checkType(t)))
183 return t;
184 return -1;
185 }
186
187 /* discard time, just consume the bytes */
188 int processTime(int type) {
189 uint32_t offset = CURR_OFFSET;
190 unsigned char t[8];
191 int timelen = (type == REDIS_EXPIRETIME_MS) ? 8 : 4;
192
193 if (readBytes(t,timelen)) {
194 return 1;
195 } else {
196 SHIFT_ERROR(offset, "Could not read time");
197 }
198
199 /* failure */
200 return 0;
201 }
202
203 uint32_t loadLength(int *isencoded) {
204 unsigned char buf[2];
205 uint32_t len;
206 int type;
207
208 if (isencoded) *isencoded = 0;
209 if (!readBytes(buf, 1)) return REDIS_RDB_LENERR;
210 type = (buf[0] & 0xC0) >> 6;
211 if (type == REDIS_RDB_6BITLEN) {
212 /* Read a 6 bit len */
213 return buf[0] & 0x3F;
214 } else if (type == REDIS_RDB_ENCVAL) {
215 /* Read a 6 bit len encoding type */
216 if (isencoded) *isencoded = 1;
217 return buf[0] & 0x3F;
218 } else if (type == REDIS_RDB_14BITLEN) {
219 /* Read a 14 bit len */
220 if (!readBytes(buf+1,1)) return REDIS_RDB_LENERR;
221 return ((buf[0] & 0x3F) << 8) | buf[1];
222 } else {
223 /* Read a 32 bit len */
224 if (!readBytes(&len, 4)) return REDIS_RDB_LENERR;
225 return (unsigned int)ntohl(len);
226 }
227 }
228
229 char *loadIntegerObject(int enctype) {
230 uint32_t offset = CURR_OFFSET;
231 unsigned char enc[4];
232 long long val;
233
234 if (enctype == REDIS_RDB_ENC_INT8) {
235 uint8_t v;
236 if (!readBytes(enc, 1)) return NULL;
237 v = enc[0];
238 val = (int8_t)v;
239 } else if (enctype == REDIS_RDB_ENC_INT16) {
240 uint16_t v;
241 if (!readBytes(enc, 2)) return NULL;
242 v = enc[0]|(enc[1]<<8);
243 val = (int16_t)v;
244 } else if (enctype == REDIS_RDB_ENC_INT32) {
245 uint32_t v;
246 if (!readBytes(enc, 4)) return NULL;
247 v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
248 val = (int32_t)v;
249 } else {
250 SHIFT_ERROR(offset, "Unknown integer encoding (0x%02x)", enctype);
251 return NULL;
252 }
253
254 /* convert val into string */
255 char *buf;
256 buf = malloc(sizeof(char) * 128);
257 sprintf(buf, "%lld", val);
258 return buf;
259 }
260
261 char* loadLzfStringObject() {
262 unsigned int slen, clen;
263 char *c, *s;
264
265 if ((clen = loadLength(NULL)) == REDIS_RDB_LENERR) return NULL;
266 if ((slen = loadLength(NULL)) == REDIS_RDB_LENERR) return NULL;
267
268 c = malloc(clen);
269 if (!readBytes(c, clen)) {
270 free(c);
271 return NULL;
272 }
273
274 s = malloc(slen+1);
275 if (lzf_decompress(c,clen,s,slen) == 0) {
276 free(c); free(s);
277 return NULL;
278 }
279
280 free(c);
281 return s;
282 }
283
284 /* returns NULL when not processable, char* when valid */
285 char* loadStringObject() {
286 uint32_t offset = CURR_OFFSET;
287 int isencoded;
288 uint32_t len;
289
290 len = loadLength(&isencoded);
291 if (isencoded) {
292 switch(len) {
293 case REDIS_RDB_ENC_INT8:
294 case REDIS_RDB_ENC_INT16:
295 case REDIS_RDB_ENC_INT32:
296 return loadIntegerObject(len);
297 case REDIS_RDB_ENC_LZF:
298 return loadLzfStringObject();
299 default:
300 /* unknown encoding */
301 SHIFT_ERROR(offset, "Unknown string encoding (0x%02x)", len);
302 return NULL;
303 }
304 }
305
306 if (len == REDIS_RDB_LENERR) return NULL;
307
308 char *buf = malloc(sizeof(char) * (len+1));
309 buf[len] = '\0';
310 if (!readBytes(buf, len)) {
311 free(buf);
312 return NULL;
313 }
314 return buf;
315 }
316
317 int processStringObject(char** store) {
318 unsigned long offset = CURR_OFFSET;
319 char *key = loadStringObject();
320 if (key == NULL) {
321 SHIFT_ERROR(offset, "Error reading string object");
322 free(key);
323 return 0;
324 }
325
326 if (store != NULL) {
327 *store = key;
328 } else {
329 free(key);
330 }
331 return 1;
332 }
333
334 double* loadDoubleValue() {
335 char buf[256];
336 unsigned char len;
337 double* val;
338
339 if (!readBytes(&len,1)) return NULL;
340
341 val = malloc(sizeof(double));
342 switch(len) {
343 case 255: *val = R_NegInf; return val;
344 case 254: *val = R_PosInf; return val;
345 case 253: *val = R_Nan; return val;
346 default:
347 if (!readBytes(buf, len)) {
348 free(val);
349 return NULL;
350 }
351 buf[len] = '\0';
352 sscanf(buf, "%lg", val);
353 return val;
354 }
355 }
356
357 int processDoubleValue(double** store) {
358 unsigned long offset = CURR_OFFSET;
359 double *val = loadDoubleValue();
360 if (val == NULL) {
361 SHIFT_ERROR(offset, "Error reading double value");
362 free(val);
363 return 0;
364 }
365
366 if (store != NULL) {
367 *store = val;
368 } else {
369 free(val);
370 }
371 return 1;
372 }
373
374 int loadPair(entry *e) {
375 uint32_t offset = CURR_OFFSET;
376 uint32_t i;
377
378 /* read key first */
379 char *key;
380 if (processStringObject(&key)) {
381 e->key = key;
382 } else {
383 SHIFT_ERROR(offset, "Error reading entry key");
384 return 0;
385 }
386
387 uint32_t length = 0;
388 if (e->type == REDIS_LIST ||
389 e->type == REDIS_SET ||
390 e->type == REDIS_ZSET ||
391 e->type == REDIS_HASH) {
392 if ((length = loadLength(NULL)) == REDIS_RDB_LENERR) {
393 SHIFT_ERROR(offset, "Error reading %s length", types[e->type]);
394 return 0;
395 }
396 }
397
398 switch(e->type) {
399 case REDIS_STRING:
400 case REDIS_HASH_ZIPMAP:
401 case REDIS_LIST_ZIPLIST:
402 case REDIS_SET_INTSET:
403 case REDIS_ZSET_ZIPLIST:
404 case REDIS_HASH_ZIPLIST:
405 if (!processStringObject(NULL)) {
406 SHIFT_ERROR(offset, "Error reading entry value");
407 return 0;
408 }
409 break;
410 case REDIS_LIST:
411 case REDIS_SET:
412 for (i = 0; i < length; i++) {
413 offset = CURR_OFFSET;
414 if (!processStringObject(NULL)) {
415 SHIFT_ERROR(offset, "Error reading element at index %d (length: %d)", i, length);
416 return 0;
417 }
418 }
419 break;
420 case REDIS_ZSET:
421 for (i = 0; i < length; i++) {
422 offset = CURR_OFFSET;
423 if (!processStringObject(NULL)) {
424 SHIFT_ERROR(offset, "Error reading element key at index %d (length: %d)", i, length);
425 return 0;
426 }
427 offset = CURR_OFFSET;
428 if (!processDoubleValue(NULL)) {
429 SHIFT_ERROR(offset, "Error reading element value at index %d (length: %d)", i, length);
430 return 0;
431 }
432 }
433 break;
434 case REDIS_HASH:
435 for (i = 0; i < length; i++) {
436 offset = CURR_OFFSET;
437 if (!processStringObject(NULL)) {
438 SHIFT_ERROR(offset, "Error reading element key at index %d (length: %d)", i, length);
439 return 0;
440 }
441 offset = CURR_OFFSET;
442 if (!processStringObject(NULL)) {
443 SHIFT_ERROR(offset, "Error reading element value at index %d (length: %d)", i, length);
444 return 0;
445 }
446 }
447 break;
448 default:
449 SHIFT_ERROR(offset, "Type not implemented");
450 return 0;
451 }
452 /* because we're done, we assume success */
453 e->success = 1;
454 return 1;
455 }
456
457 entry loadEntry() {
458 entry e = { NULL, -1, 0 };
459 uint32_t length, offset[4];
460
461 /* reset error container */
462 errors.level = 0;
463
464 offset[0] = CURR_OFFSET;
465 if (!loadType(&e)) {
466 return e;
467 }
468
469 offset[1] = CURR_OFFSET;
470 if (e.type == REDIS_SELECTDB) {
471 if ((length = loadLength(NULL)) == REDIS_RDB_LENERR) {
472 SHIFT_ERROR(offset[1], "Error reading database number");
473 return e;
474 }
475 if (length > 63) {
476 SHIFT_ERROR(offset[1], "Database number out of range (%d)", length);
477 return e;
478 }
479 } else if (e.type == REDIS_EOF) {
480 if (positions[level].offset < positions[level].size) {
481 SHIFT_ERROR(offset[0], "Unexpected EOF");
482 } else {
483 e.success = 1;
484 }
485 return e;
486 } else {
487 /* optionally consume expire */
488 if (e.type == REDIS_EXPIRETIME ||
489 e.type == REDIS_EXPIRETIME_MS) {
490 if (!processTime(e.type)) return e;
491 if (!loadType(&e)) return e;
492 }
493
494 offset[1] = CURR_OFFSET;
495 if (!loadPair(&e)) {
496 SHIFT_ERROR(offset[1], "Error for type %s", types[e.type]);
497 return e;
498 }
499 }
500
501 /* all entries are followed by a valid type:
502 * e.g. a new entry, SELECTDB, EXPIRE, EOF */
503 offset[2] = CURR_OFFSET;
504 if (peekType() == -1) {
505 SHIFT_ERROR(offset[2], "Followed by invalid type");
506 SHIFT_ERROR(offset[0], "Error for type %s", types[e.type]);
507 e.success = 0;
508 } else {
509 e.success = 1;
510 }
511
512 return e;
513 }
514
515 void printCentered(int indent, int width, char* body) {
516 char head[256], tail[256];
517 memset(head, '\0', 256);
518 memset(tail, '\0', 256);
519
520 memset(head, '=', indent);
521 memset(tail, '=', width - 2 - indent - strlen(body));
522 printf("%s %s %s\n", head, body, tail);
523 }
524
525 void printValid(uint64_t ops, uint64_t bytes) {
526 char body[80];
527 sprintf(body, "Processed %llu valid opcodes (in %llu bytes)",
528 (unsigned long long) ops, (unsigned long long) bytes);
529 printCentered(4, 80, body);
530 }
531
532 void printSkipped(uint64_t bytes, uint64_t offset) {
533 char body[80];
534 sprintf(body, "Skipped %llu bytes (resuming at 0x%08llx)",
535 (unsigned long long) bytes, (unsigned long long) offset);
536 printCentered(4, 80, body);
537 }
538
539 void printErrorStack(entry *e) {
540 unsigned int i;
541 char body[64];
542
543 if (e->type == -1) {
544 sprintf(body, "Error trace");
545 } else if (e->type >= 253) {
546 sprintf(body, "Error trace (%s)", types[e->type]);
547 } else if (!e->key) {
548 sprintf(body, "Error trace (%s: (unknown))", types[e->type]);
549 } else {
550 char tmp[41];
551 strncpy(tmp, e->key, 40);
552
553 /* display truncation at the last 3 chars */
554 if (strlen(e->key) > 40) {
555 memset(&tmp[37], '.', 3);
556 }
557
558 /* display unprintable characters as ? */
559 for (i = 0; i < strlen(tmp); i++) {
560 if (tmp[i] <= 32) tmp[i] = '?';
561 }
562 sprintf(body, "Error trace (%s: %s)", types[e->type], tmp);
563 }
564
565 printCentered(4, 80, body);
566
567 /* display error stack */
568 for (i = 0; i < errors.level; i++) {
569 printf("0x%08lx - %s\n",
570 (unsigned long) errors.offset[i], errors.error[i]);
571 }
572 }
573
574 void process() {
575 uint64_t num_errors = 0, num_valid_ops = 0, num_valid_bytes = 0;
576 entry entry;
577 int dump_version = processHeader();
578
579 /* Exclude the final checksum for RDB >= 5. Will be checked at the end. */
580 if (dump_version >= 5) {
581 if (positions[0].size < 8) {
582 printf("RDB version >= 5 but no room for checksum.\n");
583 exit(1);
584 }
585 positions[0].size -= 8;;
586 }
587
588 level = 1;
589 while(positions[0].offset < positions[0].size) {
590 positions[1] = positions[0];
591
592 entry = loadEntry();
593 if (!entry.success) {
594 printValid(num_valid_ops, num_valid_bytes);
595 printErrorStack(&entry);
596 num_errors++;
597 num_valid_ops = 0;
598 num_valid_bytes = 0;
599
600 /* search for next valid entry */
601 uint64_t offset = positions[0].offset + 1;
602 int i = 0;
603
604 while (!entry.success && offset < positions[0].size) {
605 positions[1].offset = offset;
606
607 /* find 3 consecutive valid entries */
608 for (i = 0; i < 3; i++) {
609 entry = loadEntry();
610 if (!entry.success) break;
611 }
612 /* check if we found 3 consecutive valid entries */
613 if (i < 3) {
614 offset++;
615 }
616 }
617
618 /* print how many bytes we have skipped to find a new valid opcode */
619 if (offset < positions[0].size) {
620 printSkipped(offset - positions[0].offset, offset);
621 }
622
623 positions[0].offset = offset;
624 } else {
625 num_valid_ops++;
626 num_valid_bytes += positions[1].offset - positions[0].offset;
627
628 /* advance position */
629 positions[0] = positions[1];
630 }
631 free(entry.key);
632 }
633
634 /* because there is another potential error,
635 * print how many valid ops we have processed */
636 printValid(num_valid_ops, num_valid_bytes);
637
638 /* expect an eof */
639 if (entry.type != REDIS_EOF) {
640 /* last byte should be EOF, add error */
641 errors.level = 0;
642 SHIFT_ERROR(positions[0].offset, "Expected EOF, got %s", types[entry.type]);
643
644 /* this is an EOF error so reset type */
645 entry.type = -1;
646 printErrorStack(&entry);
647
648 num_errors++;
649 }
650
651 /* Verify checksum */
652 if (dump_version >= 5) {
653 uint64_t crc = crc64(0,positions[0].data,positions[0].size);
654 uint64_t crc2;
655 unsigned char *p = (unsigned char*)positions[0].data+positions[0].size;
656 crc2 = ((uint64_t)p[0] << 0) |
657 ((uint64_t)p[1] << 8) |
658 ((uint64_t)p[2] << 16) |
659 ((uint64_t)p[3] << 24) |
660 ((uint64_t)p[4] << 32) |
661 ((uint64_t)p[5] << 40) |
662 ((uint64_t)p[6] << 48) |
663 ((uint64_t)p[7] << 56);
664 if (crc != crc2) {
665 SHIFT_ERROR(positions[0].offset, "RDB CRC64 does not match.");
666 } else {
667 printf("CRC64 checksum is OK\n");
668 }
669 }
670
671 /* print summary on errors */
672 if (num_errors) {
673 printf("\n");
674 printf("Total unprocessable opcodes: %llu\n",
675 (unsigned long long) num_errors);
676 }
677 }
678
679 int main(int argc, char **argv) {
680 /* expect the first argument to be the dump file */
681 if (argc <= 1) {
682 printf("Usage: %s <dump.rdb>\n", argv[0]);
683 exit(0);
684 }
685
686 int fd;
687 off_t size;
688 struct stat stat;
689 void *data;
690
691 fd = open(argv[1], O_RDONLY);
692 if (fd < 1) {
693 ERROR("Cannot open file: %s\n", argv[1]);
694 }
695 if (fstat(fd, &stat) == -1) {
696 ERROR("Cannot stat: %s\n", argv[1]);
697 } else {
698 size = stat.st_size;
699 }
700
701 if (sizeof(size_t) == sizeof(int32_t) && size >= INT_MAX) {
702 ERROR("Cannot check dump files >2GB on a 32-bit platform\n");
703 }
704
705 data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
706 if (data == MAP_FAILED) {
707 ERROR("Cannot mmap: %s\n", argv[1]);
708 }
709
710 /* Initialize static vars */
711 positions[0].data = data;
712 positions[0].size = size;
713 positions[0].offset = 0;
714 errors.level = 0;
715
716 /* Object types */
717 sprintf(types[REDIS_STRING], "STRING");
718 sprintf(types[REDIS_LIST], "LIST");
719 sprintf(types[REDIS_SET], "SET");
720 sprintf(types[REDIS_ZSET], "ZSET");
721 sprintf(types[REDIS_HASH], "HASH");
722
723 /* Object types only used for dumping to disk */
724 sprintf(types[REDIS_EXPIRETIME], "EXPIRETIME");
725 sprintf(types[REDIS_SELECTDB], "SELECTDB");
726 sprintf(types[REDIS_EOF], "EOF");
727
728 /* Double constants initialization */
729 R_Zero = 0.0;
730 R_PosInf = 1.0/R_Zero;
731 R_NegInf = -1.0/R_Zero;
732 R_Nan = R_Zero/R_Zero;
733
734 process();
735
736 munmap(data, size);
737 close(fd);
738 return 0;
739 }