]> git.saurik.com Git - redis.git/blob - src/aof.c
0ebe4457d95a3e26d823ceb5a03010dc76945124
[redis.git] / src / aof.c
1 #include "redis.h"
2
3 #include <signal.h>
4 #include <fcntl.h>
5 #include <sys/stat.h>
6 #include <sys/types.h>
7 #include <sys/time.h>
8 #include <sys/resource.h>
9 #include <sys/wait.h>
10
11 void aofUpdateCurrentSize(void);
12
13 /* Called when the user switches from "appendonly yes" to "appendonly no"
14 * at runtime using the CONFIG command. */
15 void stopAppendOnly(void) {
16 flushAppendOnlyFile();
17 aof_fsync(server.appendfd);
18 close(server.appendfd);
19
20 server.appendfd = -1;
21 server.appendseldb = -1;
22 server.appendonly = 0;
23 /* rewrite operation in progress? kill it, wait child exit */
24 if (server.bgrewritechildpid != -1) {
25 int statloc;
26
27 if (kill(server.bgrewritechildpid,SIGKILL) != -1)
28 wait3(&statloc,0,NULL);
29 /* reset the buffer accumulating changes while the child saves */
30 sdsfree(server.bgrewritebuf);
31 server.bgrewritebuf = sdsempty();
32 server.bgrewritechildpid = -1;
33 }
34 }
35
36 /* Called when the user switches from "appendonly no" to "appendonly yes"
37 * at runtime using the CONFIG command. */
38 int startAppendOnly(void) {
39 server.appendonly = 1;
40 server.lastfsync = time(NULL);
41 server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT,0644);
42 if (server.appendfd == -1) {
43 redisLog(REDIS_WARNING,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno));
44 return REDIS_ERR;
45 }
46 if (rewriteAppendOnlyFileBackground() == REDIS_ERR) {
47 server.appendonly = 0;
48 close(server.appendfd);
49 redisLog(REDIS_WARNING,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno));
50 return REDIS_ERR;
51 }
52 return REDIS_OK;
53 }
54
55 /* Write the append only file buffer on disk.
56 *
57 * Since we are required to write the AOF before replying to the client,
58 * and the only way the client socket can get a write is entering when the
59 * the event loop, we accumulate all the AOF writes in a memory
60 * buffer and write it on disk using this function just before entering
61 * the event loop again. */
62 void flushAppendOnlyFile(void) {
63 ssize_t nwritten;
64
65 if (sdslen(server.aofbuf) == 0) return;
66
67 /* We want to perform a single write. This should be guaranteed atomic
68 * at least if the filesystem we are writing is a real physical one.
69 * While this will save us against the server being killed I don't think
70 * there is much to do about the whole server stopping for power problems
71 * or alike */
72 nwritten = write(server.appendfd,server.aofbuf,sdslen(server.aofbuf));
73 if (nwritten != (signed)sdslen(server.aofbuf)) {
74 /* Ooops, we are in troubles. The best thing to do for now is
75 * aborting instead of giving the illusion that everything is
76 * working as expected. */
77 if (nwritten == -1) {
78 redisLog(REDIS_WARNING,"Exiting on error writing to the append-only file: %s",strerror(errno));
79 } else {
80 redisLog(REDIS_WARNING,"Exiting on short write while writing to the append-only file: %s",strerror(errno));
81 }
82 exit(1);
83 }
84 sdsfree(server.aofbuf);
85 server.aofbuf = sdsempty();
86 server.appendonly_current_size += nwritten;
87
88 /* Don't fsync if no-appendfsync-on-rewrite is set to yes and there are
89 * children doing I/O in the background. */
90 if (server.no_appendfsync_on_rewrite &&
91 (server.bgrewritechildpid != -1 || server.bgsavechildpid != -1))
92 return;
93
94 /* Perform the fsync if needed. */
95 if (server.appendfsync == APPENDFSYNC_ALWAYS ||
96 (server.appendfsync == APPENDFSYNC_EVERYSEC &&
97 server.unixtime > server.lastfsync))
98 {
99 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
100 * flushing metadata. */
101 aof_fsync(server.appendfd); /* Let's try to get this data on the disk */
102 server.lastfsync = server.unixtime;
103 }
104 }
105
106 sds catAppendOnlyGenericCommand(sds buf, int argc, robj **argv) {
107 int j;
108 buf = sdscatprintf(buf,"*%d\r\n",argc);
109 for (j = 0; j < argc; j++) {
110 robj *o = getDecodedObject(argv[j]);
111 buf = sdscatprintf(buf,"$%lu\r\n",(unsigned long)sdslen(o->ptr));
112 buf = sdscatlen(buf,o->ptr,sdslen(o->ptr));
113 buf = sdscatlen(buf,"\r\n",2);
114 decrRefCount(o);
115 }
116 return buf;
117 }
118
119 sds catAppendOnlyExpireAtCommand(sds buf, robj *key, robj *seconds) {
120 int argc = 3;
121 long when;
122 robj *argv[3];
123
124 /* Make sure we can use strtol */
125 seconds = getDecodedObject(seconds);
126 when = time(NULL)+strtol(seconds->ptr,NULL,10);
127 decrRefCount(seconds);
128
129 argv[0] = createStringObject("EXPIREAT",8);
130 argv[1] = key;
131 argv[2] = createObject(REDIS_STRING,
132 sdscatprintf(sdsempty(),"%ld",when));
133 buf = catAppendOnlyGenericCommand(buf, argc, argv);
134 decrRefCount(argv[0]);
135 decrRefCount(argv[2]);
136 return buf;
137 }
138
139 void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
140 sds buf = sdsempty();
141 robj *tmpargv[3];
142
143 /* The DB this command was targetting is not the same as the last command
144 * we appendend. To issue a SELECT command is needed. */
145 if (dictid != server.appendseldb) {
146 char seldb[64];
147
148 snprintf(seldb,sizeof(seldb),"%d",dictid);
149 buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
150 (unsigned long)strlen(seldb),seldb);
151 server.appendseldb = dictid;
152 }
153
154 if (cmd->proc == expireCommand) {
155 /* Translate EXPIRE into EXPIREAT */
156 buf = catAppendOnlyExpireAtCommand(buf,argv[1],argv[2]);
157 } else if (cmd->proc == setexCommand) {
158 /* Translate SETEX to SET and EXPIREAT */
159 tmpargv[0] = createStringObject("SET",3);
160 tmpargv[1] = argv[1];
161 tmpargv[2] = argv[3];
162 buf = catAppendOnlyGenericCommand(buf,3,tmpargv);
163 decrRefCount(tmpargv[0]);
164 buf = catAppendOnlyExpireAtCommand(buf,argv[1],argv[2]);
165 } else {
166 buf = catAppendOnlyGenericCommand(buf,argc,argv);
167 }
168
169 /* Append to the AOF buffer. This will be flushed on disk just before
170 * of re-entering the event loop, so before the client will get a
171 * positive reply about the operation performed. */
172 server.aofbuf = sdscatlen(server.aofbuf,buf,sdslen(buf));
173
174 /* If a background append only file rewriting is in progress we want to
175 * accumulate the differences between the child DB and the current one
176 * in a buffer, so that when the child process will do its work we
177 * can append the differences to the new append only file. */
178 if (server.bgrewritechildpid != -1)
179 server.bgrewritebuf = sdscatlen(server.bgrewritebuf,buf,sdslen(buf));
180
181 sdsfree(buf);
182 }
183
184 /* In Redis commands are always executed in the context of a client, so in
185 * order to load the append only file we need to create a fake client. */
186 struct redisClient *createFakeClient(void) {
187 struct redisClient *c = zmalloc(sizeof(*c));
188
189 selectDb(c,0);
190 c->fd = -1;
191 c->querybuf = sdsempty();
192 c->argc = 0;
193 c->argv = NULL;
194 c->bufpos = 0;
195 c->flags = 0;
196 /* We set the fake client as a slave waiting for the synchronization
197 * so that Redis will not try to send replies to this client. */
198 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
199 c->reply = listCreate();
200 c->watched_keys = listCreate();
201 listSetFreeMethod(c->reply,decrRefCount);
202 listSetDupMethod(c->reply,dupClientReplyValue);
203 initClientMultiState(c);
204 return c;
205 }
206
207 void freeFakeClient(struct redisClient *c) {
208 sdsfree(c->querybuf);
209 listRelease(c->reply);
210 listRelease(c->watched_keys);
211 freeClientMultiState(c);
212 zfree(c);
213 }
214
215 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
216 * error (the append only file is zero-length) REDIS_ERR is returned. On
217 * fatal error an error message is logged and the program exists. */
218 int loadAppendOnlyFile(char *filename) {
219 struct redisClient *fakeClient;
220 FILE *fp = fopen(filename,"r");
221 struct redis_stat sb;
222 int appendonly = server.appendonly;
223 long loops = 0;
224
225 if (fp && redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0) {
226 server.appendonly_current_size = 0;
227 fclose(fp);
228 return REDIS_ERR;
229 }
230
231 if (fp == NULL) {
232 redisLog(REDIS_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno));
233 exit(1);
234 }
235
236 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
237 * to the same file we're about to read. */
238 server.appendonly = 0;
239
240 fakeClient = createFakeClient();
241 startLoading(fp);
242
243 while(1) {
244 int argc, j;
245 unsigned long len;
246 robj **argv;
247 char buf[128];
248 sds argsds;
249 struct redisCommand *cmd;
250
251 /* Serve the clients from time to time */
252 if (!(loops++ % 1000)) {
253 loadingProgress(ftello(fp));
254 aeProcessEvents(server.el, AE_FILE_EVENTS|AE_DONT_WAIT);
255 }
256
257 if (fgets(buf,sizeof(buf),fp) == NULL) {
258 if (feof(fp))
259 break;
260 else
261 goto readerr;
262 }
263 if (buf[0] != '*') goto fmterr;
264 argc = atoi(buf+1);
265 argv = zmalloc(sizeof(robj*)*argc);
266 for (j = 0; j < argc; j++) {
267 if (fgets(buf,sizeof(buf),fp) == NULL) goto readerr;
268 if (buf[0] != '$') goto fmterr;
269 len = strtol(buf+1,NULL,10);
270 argsds = sdsnewlen(NULL,len);
271 if (len && fread(argsds,len,1,fp) == 0) goto fmterr;
272 argv[j] = createObject(REDIS_STRING,argsds);
273 if (fread(buf,2,1,fp) == 0) goto fmterr; /* discard CRLF */
274 }
275
276 /* Command lookup */
277 cmd = lookupCommand(argv[0]->ptr);
278 if (!cmd) {
279 redisLog(REDIS_WARNING,"Unknown command '%s' reading the append only file", argv[0]->ptr);
280 exit(1);
281 }
282 /* Run the command in the context of a fake client */
283 fakeClient->argc = argc;
284 fakeClient->argv = argv;
285 cmd->proc(fakeClient);
286
287 /* The fake client should not have a reply */
288 redisAssert(fakeClient->bufpos == 0 && listLength(fakeClient->reply) == 0);
289 /* The fake client should never get blocked */
290 redisAssert((fakeClient->flags & REDIS_BLOCKED) == 0);
291
292 /* Clean up. Command code may have changed argv/argc so we use the
293 * argv/argc of the client instead of the local variables. */
294 for (j = 0; j < fakeClient->argc; j++)
295 decrRefCount(fakeClient->argv[j]);
296 zfree(fakeClient->argv);
297 }
298
299 /* This point can only be reached when EOF is reached without errors.
300 * If the client is in the middle of a MULTI/EXEC, log error and quit. */
301 if (fakeClient->flags & REDIS_MULTI) goto readerr;
302
303 fclose(fp);
304 freeFakeClient(fakeClient);
305 server.appendonly = appendonly;
306 stopLoading();
307 aofUpdateCurrentSize();
308 server.auto_aofrewrite_base_size = server.appendonly_current_size;
309 return REDIS_OK;
310
311 readerr:
312 if (feof(fp)) {
313 redisLog(REDIS_WARNING,"Unexpected end of file reading the append only file");
314 } else {
315 redisLog(REDIS_WARNING,"Unrecoverable error reading the append only file: %s", strerror(errno));
316 }
317 exit(1);
318 fmterr:
319 redisLog(REDIS_WARNING,"Bad file format reading the append only file: make a backup of your AOF file, then use ./redis-check-aof --fix <filename>");
320 exit(1);
321 }
322
323 /* Write a sequence of commands able to fully rebuild the dataset into
324 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
325 int rewriteAppendOnlyFile(char *filename) {
326 dictIterator *di = NULL;
327 dictEntry *de;
328 FILE *fp;
329 char tmpfile[256];
330 int j;
331 time_t now = time(NULL);
332
333 /* Note that we have to use a different temp name here compared to the
334 * one used by rewriteAppendOnlyFileBackground() function. */
335 snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid());
336 fp = fopen(tmpfile,"w");
337 if (!fp) {
338 redisLog(REDIS_WARNING, "Failed rewriting the append only file: %s", strerror(errno));
339 return REDIS_ERR;
340 }
341 for (j = 0; j < server.dbnum; j++) {
342 char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n";
343 redisDb *db = server.db+j;
344 dict *d = db->dict;
345 if (dictSize(d) == 0) continue;
346 di = dictGetSafeIterator(d);
347 if (!di) {
348 fclose(fp);
349 return REDIS_ERR;
350 }
351
352 /* SELECT the new DB */
353 if (fwrite(selectcmd,sizeof(selectcmd)-1,1,fp) == 0) goto werr;
354 if (fwriteBulkLongLong(fp,j) == 0) goto werr;
355
356 /* Iterate this DB writing every entry */
357 while((de = dictNext(di)) != NULL) {
358 sds keystr;
359 robj key, *o;
360 time_t expiretime;
361
362 keystr = dictGetEntryKey(de);
363 o = dictGetEntryVal(de);
364 initStaticStringObject(key,keystr);
365
366 expiretime = getExpire(db,&key);
367
368 /* Save the key and associated value */
369 if (o->type == REDIS_STRING) {
370 /* Emit a SET command */
371 char cmd[]="*3\r\n$3\r\nSET\r\n";
372 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
373 /* Key and value */
374 if (fwriteBulkObject(fp,&key) == 0) goto werr;
375 if (fwriteBulkObject(fp,o) == 0) goto werr;
376 } else if (o->type == REDIS_LIST) {
377 /* Emit the RPUSHes needed to rebuild the list */
378 char cmd[]="*3\r\n$5\r\nRPUSH\r\n";
379 if (o->encoding == REDIS_ENCODING_ZIPLIST) {
380 unsigned char *zl = o->ptr;
381 unsigned char *p = ziplistIndex(zl,0);
382 unsigned char *vstr;
383 unsigned int vlen;
384 long long vlong;
385
386 while(ziplistGet(p,&vstr,&vlen,&vlong)) {
387 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
388 if (fwriteBulkObject(fp,&key) == 0) goto werr;
389 if (vstr) {
390 if (fwriteBulkString(fp,(char*)vstr,vlen) == 0)
391 goto werr;
392 } else {
393 if (fwriteBulkLongLong(fp,vlong) == 0)
394 goto werr;
395 }
396 p = ziplistNext(zl,p);
397 }
398 } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
399 list *list = o->ptr;
400 listNode *ln;
401 listIter li;
402
403 listRewind(list,&li);
404 while((ln = listNext(&li))) {
405 robj *eleobj = listNodeValue(ln);
406
407 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
408 if (fwriteBulkObject(fp,&key) == 0) goto werr;
409 if (fwriteBulkObject(fp,eleobj) == 0) goto werr;
410 }
411 } else {
412 redisPanic("Unknown list encoding");
413 }
414 } else if (o->type == REDIS_SET) {
415 char cmd[]="*3\r\n$4\r\nSADD\r\n";
416
417 /* Emit the SADDs needed to rebuild the set */
418 if (o->encoding == REDIS_ENCODING_INTSET) {
419 int ii = 0;
420 int64_t llval;
421 while(intsetGet(o->ptr,ii++,&llval)) {
422 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
423 if (fwriteBulkObject(fp,&key) == 0) goto werr;
424 if (fwriteBulkLongLong(fp,llval) == 0) goto werr;
425 }
426 } else if (o->encoding == REDIS_ENCODING_HT) {
427 dictIterator *di = dictGetIterator(o->ptr);
428 dictEntry *de;
429 while((de = dictNext(di)) != NULL) {
430 robj *eleobj = dictGetEntryKey(de);
431 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
432 if (fwriteBulkObject(fp,&key) == 0) goto werr;
433 if (fwriteBulkObject(fp,eleobj) == 0) goto werr;
434 }
435 dictReleaseIterator(di);
436 } else {
437 redisPanic("Unknown set encoding");
438 }
439 } else if (o->type == REDIS_ZSET) {
440 /* Emit the ZADDs needed to rebuild the sorted set */
441 char cmd[]="*4\r\n$4\r\nZADD\r\n";
442
443 if (o->encoding == REDIS_ENCODING_ZIPLIST) {
444 unsigned char *zl = o->ptr;
445 unsigned char *eptr, *sptr;
446 unsigned char *vstr;
447 unsigned int vlen;
448 long long vll;
449 double score;
450
451 eptr = ziplistIndex(zl,0);
452 redisAssert(eptr != NULL);
453 sptr = ziplistNext(zl,eptr);
454 redisAssert(sptr != NULL);
455
456 while (eptr != NULL) {
457 redisAssert(ziplistGet(eptr,&vstr,&vlen,&vll));
458 score = zzlGetScore(sptr);
459
460 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
461 if (fwriteBulkObject(fp,&key) == 0) goto werr;
462 if (fwriteBulkDouble(fp,score) == 0) goto werr;
463 if (vstr != NULL) {
464 if (fwriteBulkString(fp,(char*)vstr,vlen) == 0)
465 goto werr;
466 } else {
467 if (fwriteBulkLongLong(fp,vll) == 0)
468 goto werr;
469 }
470 zzlNext(zl,&eptr,&sptr);
471 }
472 } else if (o->encoding == REDIS_ENCODING_SKIPLIST) {
473 zset *zs = o->ptr;
474 dictIterator *di = dictGetIterator(zs->dict);
475 dictEntry *de;
476
477 while((de = dictNext(di)) != NULL) {
478 robj *eleobj = dictGetEntryKey(de);
479 double *score = dictGetEntryVal(de);
480
481 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
482 if (fwriteBulkObject(fp,&key) == 0) goto werr;
483 if (fwriteBulkDouble(fp,*score) == 0) goto werr;
484 if (fwriteBulkObject(fp,eleobj) == 0) goto werr;
485 }
486 dictReleaseIterator(di);
487 } else {
488 redisPanic("Unknown sorted set encoding");
489 }
490 } else if (o->type == REDIS_HASH) {
491 char cmd[]="*4\r\n$4\r\nHSET\r\n";
492
493 /* Emit the HSETs needed to rebuild the hash */
494 if (o->encoding == REDIS_ENCODING_ZIPMAP) {
495 unsigned char *p = zipmapRewind(o->ptr);
496 unsigned char *field, *val;
497 unsigned int flen, vlen;
498
499 while((p = zipmapNext(p,&field,&flen,&val,&vlen)) != NULL) {
500 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
501 if (fwriteBulkObject(fp,&key) == 0) goto werr;
502 if (fwriteBulkString(fp,(char*)field,flen) == 0)
503 goto werr;
504 if (fwriteBulkString(fp,(char*)val,vlen) == 0)
505 goto werr;
506 }
507 } else {
508 dictIterator *di = dictGetIterator(o->ptr);
509 dictEntry *de;
510
511 while((de = dictNext(di)) != NULL) {
512 robj *field = dictGetEntryKey(de);
513 robj *val = dictGetEntryVal(de);
514
515 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
516 if (fwriteBulkObject(fp,&key) == 0) goto werr;
517 if (fwriteBulkObject(fp,field) == 0) goto werr;
518 if (fwriteBulkObject(fp,val) == 0) goto werr;
519 }
520 dictReleaseIterator(di);
521 }
522 } else {
523 redisPanic("Unknown object type");
524 }
525 /* Save the expire time */
526 if (expiretime != -1) {
527 char cmd[]="*3\r\n$8\r\nEXPIREAT\r\n";
528 /* If this key is already expired skip it */
529 if (expiretime < now) continue;
530 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
531 if (fwriteBulkObject(fp,&key) == 0) goto werr;
532 if (fwriteBulkLongLong(fp,expiretime) == 0) goto werr;
533 }
534 }
535 dictReleaseIterator(di);
536 }
537
538 /* Make sure data will not remain on the OS's output buffers */
539 fflush(fp);
540 aof_fsync(fileno(fp));
541 fclose(fp);
542
543 /* Use RENAME to make sure the DB file is changed atomically only
544 * if the generate DB file is ok. */
545 if (rename(tmpfile,filename) == -1) {
546 redisLog(REDIS_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno));
547 unlink(tmpfile);
548 return REDIS_ERR;
549 }
550 redisLog(REDIS_NOTICE,"SYNC append only file rewrite performed");
551 return REDIS_OK;
552
553 werr:
554 fclose(fp);
555 unlink(tmpfile);
556 redisLog(REDIS_WARNING,"Write error writing append only file on disk: %s", strerror(errno));
557 if (di) dictReleaseIterator(di);
558 return REDIS_ERR;
559 }
560
561 /* This is how rewriting of the append only file in background works:
562 *
563 * 1) The user calls BGREWRITEAOF
564 * 2) Redis calls this function, that forks():
565 * 2a) the child rewrite the append only file in a temp file.
566 * 2b) the parent accumulates differences in server.bgrewritebuf.
567 * 3) When the child finished '2a' exists.
568 * 4) The parent will trap the exit code, if it's OK, will append the
569 * data accumulated into server.bgrewritebuf into the temp file, and
570 * finally will rename(2) the temp file in the actual file name.
571 * The the new file is reopened as the new append only file. Profit!
572 */
573 int rewriteAppendOnlyFileBackground(void) {
574 pid_t childpid;
575 long long start;
576
577 if (server.bgrewritechildpid != -1) return REDIS_ERR;
578 start = ustime();
579 if ((childpid = fork()) == 0) {
580 char tmpfile[256];
581
582 /* Child */
583 if (server.ipfd > 0) close(server.ipfd);
584 if (server.sofd > 0) close(server.sofd);
585 snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
586 if (rewriteAppendOnlyFile(tmpfile) == REDIS_OK) {
587 _exit(0);
588 } else {
589 _exit(1);
590 }
591 } else {
592 /* Parent */
593 server.stat_fork_time = ustime()-start;
594 if (childpid == -1) {
595 redisLog(REDIS_WARNING,
596 "Can't rewrite append only file in background: fork: %s",
597 strerror(errno));
598 return REDIS_ERR;
599 }
600 redisLog(REDIS_NOTICE,
601 "Background append only file rewriting started by pid %d",childpid);
602 server.bgrewritechildpid = childpid;
603 updateDictResizePolicy();
604 /* We set appendseldb to -1 in order to force the next call to the
605 * feedAppendOnlyFile() to issue a SELECT command, so the differences
606 * accumulated by the parent into server.bgrewritebuf will start
607 * with a SELECT statement and it will be safe to merge. */
608 server.appendseldb = -1;
609 return REDIS_OK;
610 }
611 return REDIS_OK; /* unreached */
612 }
613
614 void bgrewriteaofCommand(redisClient *c) {
615 if (server.bgrewritechildpid != -1) {
616 addReplyError(c,"Background append only file rewriting already in progress");
617 } else if (server.bgsavechildpid != -1) {
618 server.aofrewrite_scheduled = 1;
619 addReplyStatus(c,"Background append only file rewriting scheduled");
620 } else if (rewriteAppendOnlyFileBackground() == REDIS_OK) {
621 addReplyStatus(c,"Background append only file rewriting started");
622 } else {
623 addReply(c,shared.err);
624 }
625 }
626
627 void aofRemoveTempFile(pid_t childpid) {
628 char tmpfile[256];
629
630 snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) childpid);
631 unlink(tmpfile);
632 }
633
634 /* Update the server.appendonly_current_size filed explicitly using stat(2)
635 * to check the size of the file. This is useful after a rewrite or after
636 * a restart, normally the size is updated just adding the write length
637 * to the current lenght, that is much faster. */
638 void aofUpdateCurrentSize(void) {
639 struct redis_stat sb;
640
641 if (redis_fstat(server.appendfd,&sb) == -1) {
642 redisLog(REDIS_WARNING,"Unable to check the AOF length: %s",
643 strerror(errno));
644 } else {
645 server.appendonly_current_size = sb.st_size;
646 }
647 }
648
649 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
650 * Handle this. */
651 void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
652 if (!bysignal && exitcode == 0) {
653 int fd;
654 char tmpfile[256];
655
656 redisLog(REDIS_NOTICE,
657 "Background append only file rewriting terminated with success");
658 /* Now it's time to flush the differences accumulated by the parent */
659 snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) server.bgrewritechildpid);
660 fd = open(tmpfile,O_WRONLY|O_APPEND);
661 if (fd == -1) {
662 redisLog(REDIS_WARNING, "Not able to open the temp append only file produced by the child: %s", strerror(errno));
663 goto cleanup;
664 }
665 /* Flush our data... */
666 if (write(fd,server.bgrewritebuf,sdslen(server.bgrewritebuf)) !=
667 (signed) sdslen(server.bgrewritebuf)) {
668 redisLog(REDIS_WARNING, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno));
669 close(fd);
670 goto cleanup;
671 }
672 redisLog(REDIS_NOTICE,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server.bgrewritebuf));
673 /* Now our work is to rename the temp file into the stable file. And
674 * switch the file descriptor used by the server for append only. */
675 if (rename(tmpfile,server.appendfilename) == -1) {
676 redisLog(REDIS_WARNING,"Can't rename the temp append only file into the stable one: %s", strerror(errno));
677 close(fd);
678 goto cleanup;
679 }
680 /* Mission completed... almost */
681 redisLog(REDIS_NOTICE,"Append only file successfully rewritten.");
682 if (server.appendfd != -1) {
683 /* If append only is actually enabled... */
684 close(server.appendfd);
685 server.appendfd = fd;
686 if (server.appendfsync != APPENDFSYNC_NO) aof_fsync(fd);
687 server.appendseldb = -1; /* Make sure it will issue SELECT */
688 redisLog(REDIS_NOTICE,"The new append only file was selected for future appends.");
689 aofUpdateCurrentSize();
690 server.auto_aofrewrite_base_size = server.appendonly_current_size;
691 } else {
692 /* If append only is disabled we just generate a dump in this
693 * format. Why not? */
694 close(fd);
695 }
696 } else if (!bysignal && exitcode != 0) {
697 redisLog(REDIS_WARNING, "Background append only file rewriting error");
698 } else {
699 redisLog(REDIS_WARNING,
700 "Background append only file rewriting terminated by signal %d",
701 bysignal);
702 }
703 cleanup:
704 sdsfree(server.bgrewritebuf);
705 server.bgrewritebuf = sdsempty();
706 aofRemoveTempFile(server.bgrewritechildpid);
707 server.bgrewritechildpid = -1;
708 }