X-Git-Url: https://git.saurik.com/redis.git/blobdiff_plain/50be9b97bc975439cb34703acad5ac108952e672..0e793fbe84f5796597ce1021a2ea840cdcba1924:/src/aof.c diff --git a/src/aof.c b/src/aof.c index b1b0d9dc..8d654281 100644 --- a/src/aof.c +++ b/src/aof.c @@ -11,10 +11,14 @@ void aofUpdateCurrentSize(void); +void aof_background_fsync(int fd) { + bioCreateBackgroundJob(REDIS_BIO_AOF_FSYNC,(void*)(long)fd,NULL,NULL); +} + /* Called when the user switches from "appendonly yes" to "appendonly no" * at runtime using the CONFIG command. */ void stopAppendOnly(void) { - flushAppendOnlyFile(); + flushAppendOnlyFile(1); aof_fsync(server.appendfd); close(server.appendfd); @@ -59,12 +63,51 @@ int startAppendOnly(void) { * and the only way the client socket can get a write is entering when the * the event loop, we accumulate all the AOF writes in a memory * buffer and write it on disk using this function just before entering - * the event loop again. */ -void flushAppendOnlyFile(void) { + * the event loop again. + * + * About the 'force' argument: + * + * When the fsync policy is set to 'everysec' we may delay the flush if there + * is still an fsync() going on in the background thread, since for instance + * on Linux write(2) will be blocked by the background fsync anyway. + * When this happens we remember that there is some aof buffer to be + * flushed ASAP, and will try to do that in the serverCron() function. + * + * However if force is set to 1 we'll write regardless of the background + * fsync. */ +void flushAppendOnlyFile(int force) { ssize_t nwritten; + int sync_in_progress = 0; if (sdslen(server.aofbuf) == 0) return; + if (server.appendfsync == APPENDFSYNC_EVERYSEC) + sync_in_progress = bioPendingJobsOfType(REDIS_BIO_AOF_FSYNC) != 0; + + if (server.appendfsync == APPENDFSYNC_EVERYSEC && !force) { + /* With this append fsync policy we do background fsyncing. + * If the fsync is still in progress we can try to delay + * the write for a couple of seconds. */ + if (sync_in_progress) { + if (server.aof_flush_postponed_start == 0) { + /* No previous write postponinig, remember that we are + * postponing the flush and return. */ + server.aof_flush_postponed_start = server.unixtime; + return; + } else if (server.unixtime - server.aof_flush_postponed_start < 2) { + /* We were already waiting for fsync to finish, but for less + * than two seconds this is still ok. Postpone again. */ + return; + } + /* Otherwise fall trough, and go write since we can't wait + * over two seconds. */ + redisLog(REDIS_NOTICE,"Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down Redis."); + } + } + /* If you are following this code path, then we are going to write so + * set reset the postponed flush sentinel to zero. */ + server.aof_flush_postponed_start = 0; + /* We want to perform a single write. This should be guaranteed atomic * at least if the filesystem we are writing is a real physical one. * While this will save us against the server being killed I don't think @@ -100,14 +143,15 @@ void flushAppendOnlyFile(void) { return; /* Perform the fsync if needed. */ - if (server.appendfsync == APPENDFSYNC_ALWAYS || - (server.appendfsync == APPENDFSYNC_EVERYSEC && - server.unixtime > server.lastfsync)) - { + if (server.appendfsync == APPENDFSYNC_ALWAYS) { /* aof_fsync is defined as fdatasync() for Linux in order to avoid * flushing metadata. */ aof_fsync(server.appendfd); /* Let's try to get this data on the disk */ server.lastfsync = server.unixtime; + } else if ((server.appendfsync == APPENDFSYNC_EVERYSEC && + server.unixtime > server.lastfsync)) { + if (!sync_in_progress) aof_background_fsync(server.appendfd); + server.lastfsync = server.unixtime; } } @@ -282,6 +326,8 @@ int loadAppendOnlyFile(char *filename) { } if (buf[0] != '*') goto fmterr; argc = atoi(buf+1); + if (argc < 1) goto fmterr; + argv = zmalloc(sizeof(robj*)*argc); for (j = 0; j < argc; j++) { if (fgets(buf,sizeof(buf),fp) == NULL) goto readerr; @@ -762,10 +808,18 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) { /* AOF enabled, replace the old fd with the new one. */ oldfd = server.appendfd; server.appendfd = newfd; - if (server.appendfsync != APPENDFSYNC_NO) aof_fsync(newfd); + if (server.appendfsync == APPENDFSYNC_ALWAYS) + aof_fsync(newfd); + else if (server.appendfsync == APPENDFSYNC_EVERYSEC) + aof_background_fsync(newfd); server.appendseldb = -1; /* Make sure SELECT is re-issued */ aofUpdateCurrentSize(); server.auto_aofrewrite_base_size = server.appendonly_current_size; + + /* Clear regular AOF buffer since its contents was just written to + * the new AOF from the background rewrite buffer. */ + sdsfree(server.aofbuf); + server.aofbuf = sdsempty(); } redisLog(REDIS_NOTICE, "Background AOF rewrite successful");