pid_t bgsavechildpid;
pid_t bgrewritechildpid;
sds bgrewritebuf; /* buffer taken by parent during oppend only rewrite */
+ sds aofbuf; /* AOF buffer, written before entering the event loop */
struct saveparam *saveparams;
int saveparamslen;
char *logfile;
static robj *dupStringObject(robj *o);
static void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc);
static void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc);
+static void flushAppendOnlyFile(void);
static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
static int syncWithMaster(void);
static robj *tryObjectEncoding(robj *o);
static void beforeSleep(struct aeEventLoop *eventLoop) {
REDIS_NOTUSED(eventLoop);
+ /* Awake clients that got all the swapped keys they requested */
if (server.vm_enabled && listLength(server.io_ready_clients)) {
listIter li;
listNode *ln;
processInputBuffer(c);
}
}
+ /* Write the AOF buffer on disk */
+ flushAppendOnlyFile();
}
static void createSharedObjects(void) {
server.bgsavechildpid = -1;
server.bgrewritechildpid = -1;
server.bgrewritebuf = sdsempty();
+ server.aofbuf = sdsempty();
server.lastsave = time(NULL);
server.dirty = 0;
server.stat_numcommands = 0;
if ((server.appendonly = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
+ } else if (!strcasecmp(argv[0],"appendfilename") && argc == 2) {
+ zfree(server.appendfilename);
+ server.appendfilename = zstrdup(argv[1]);
} else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) {
if (!strcasecmp(argv[1],"no")) {
server.appendfsync = APPENDFSYNC_NO;
} else if (o->encoding == REDIS_ENCODING_INT) {
value = (long)o->ptr;
} else {
- redisAssert(1 != 1);
+ redisPanic("Unknown string encoding");
}
}
} else if (o->encoding == REDIS_ENCODING_INT) {
value = (long)o->ptr;
} else {
- redisAssert(1 != 1);
+ redisPanic("Unknown string encoding");
}
}
}
static void existsCommand(redisClient *c) {
- addReply(c,lookupKeyRead(c->db,c->argv[1]) ? shared.cone : shared.czero);
+ expireIfNeeded(c->db,c->argv[1]);
+ if (dictFind(c->db->dict,c->argv[1])) {
+ addReply(c, shared.cone);
+ } else {
+ addReply(c, shared.czero);
+ }
}
static void selectCommand(redisClient *c) {
unlink(server.pidfile);
redisLog(REDIS_WARNING,"%zu bytes used at exit",zmalloc_used_memory());
redisLog(REDIS_WARNING,"Server exit now, bye bye...");
- if (server.vm_enabled) unlink(server.vm_swap_file);
exit(0);
} else {
/* Ooops.. error saving! The best we can do is to continue
if (getLongLongFromObjectOrReply(c,c->argv[3],&incr,NULL) != REDIS_OK) return;
if ((o = hashLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
if ((current = hashGet(o,c->argv[2])) != NULL) {
- if (current->encoding == REDIS_ENCODING_RAW)
- value = strtoll(current->ptr,NULL,10);
- else if (current->encoding == REDIS_ENCODING_INT)
- value = (long)current->ptr;
- else
- redisAssert(1 != 1);
+ if (getLongLongFromObjectOrReply(c,current,&value,
+ "hash value is not an integer") != REDIS_OK) {
+ decrRefCount(current);
+ return;
+ }
decrRefCount(current);
} else {
value = 0;
/* ============================== Append Only file ========================== */
+/* Write the append only file buffer on disk.
+ *
+ * Since we are required to write the AOF before replying to the client,
+ * and the only way the client socket can get a write is entering when the
+ * the event loop, we accumulate all the AOF writes in a memory
+ * buffer and write it on disk using this function just before entering
+ * the event loop again. */
+static void flushAppendOnlyFile(void) {
+ time_t now;
+ ssize_t nwritten;
+
+ if (sdslen(server.aofbuf) == 0) return;
+
+ /* We want to perform a single write. This should be guaranteed atomic
+ * at least if the filesystem we are writing is a real physical one.
+ * While this will save us against the server being killed I don't think
+ * there is much to do about the whole server stopping for power problems
+ * or alike */
+ nwritten = write(server.appendfd,server.aofbuf,sdslen(server.aofbuf));
+ if (nwritten != (signed)sdslen(server.aofbuf)) {
+ /* Ooops, we are in troubles. The best thing to do for now is
+ * aborting instead of giving the illusion that everything is
+ * working as expected. */
+ if (nwritten == -1) {
+ redisLog(REDIS_WARNING,"Exiting on error writing to the append-only file: %s",strerror(errno));
+ } else {
+ redisLog(REDIS_WARNING,"Exiting on short write while writing to the append-only file: %s",strerror(errno));
+ }
+ exit(1);
+ }
+ sdsfree(server.aofbuf);
+ server.aofbuf = sdsempty();
+
+ /* Fsync if needed */
+ now = time(NULL);
+ if (server.appendfsync == APPENDFSYNC_ALWAYS ||
+ (server.appendfsync == APPENDFSYNC_EVERYSEC &&
+ now-server.lastfsync > 1))
+ {
+ /* aof_fsync is defined as fdatasync() for Linux in order to avoid
+ * flushing metadata. */
+ aof_fsync(server.appendfd); /* Let's try to get this data on the disk */
+ server.lastfsync = now;
+ }
+}
+
static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
sds buf = sdsempty();
int j;
- ssize_t nwritten;
- time_t now;
robj *tmpargv[3];
/* The DB this command was targetting is not the same as the last command
decrRefCount(argv[j]);
}
- /* We want to perform a single write. This should be guaranteed atomic
- * at least if the filesystem we are writing is a real physical one.
- * While this will save us against the server being killed I don't think
- * there is much to do about the whole server stopping for power problems
- * or alike */
- nwritten = write(server.appendfd,buf,sdslen(buf));
- if (nwritten != (signed)sdslen(buf)) {
- /* Ooops, we are in troubles. The best thing to do for now is
- * to simply exit instead to give the illusion that everything is
- * working as expected. */
- if (nwritten == -1) {
- redisLog(REDIS_WARNING,"Exiting on error writing to the append-only file: %s",strerror(errno));
- } else {
- redisLog(REDIS_WARNING,"Exiting on short write while writing to the append-only file: %s",strerror(errno));
- }
- exit(1);
- }
+ /* Append to the AOF buffer. This will be flushed on disk just before
+ * of re-entering the event loop, so before the client will get a
+ * positive reply about the operation performed. */
+ server.aofbuf = sdscatlen(server.aofbuf,buf,sdslen(buf));
+
/* If a background append only file rewriting is in progress we want to
* accumulate the differences between the child DB and the current one
* in a buffer, so that when the child process will do its work we
server.bgrewritebuf = sdscatlen(server.bgrewritebuf,buf,sdslen(buf));
sdsfree(buf);
- now = time(NULL);
- if (server.appendfsync == APPENDFSYNC_ALWAYS ||
- (server.appendfsync == APPENDFSYNC_EVERYSEC &&
- now-server.lastfsync > 1))
- {
- /* aof_fsync is defined as fdatasync() for Linux in order to avoid
- * flushing metadata. */
- aof_fsync(server.appendfd); /* Let's try to get this data on the disk */
- server.lastfsync = now;
- }
}
/* In Redis commands are always executed in the context of a client, so in
c->reply = listCreate();
listSetFreeMethod(c->reply,decrRefCount);
listSetDupMethod(c->reply,dupClientReplyValue);
+ initClientMultiState(c);
return c;
}
static void freeFakeClient(struct redisClient *c) {
sdsfree(c->querybuf);
listRelease(c->reply);
+ freeClientMultiState(c);
zfree(c);
}
FILE *fp = fopen(filename,"r");
struct redis_stat sb;
unsigned long long loadedkeys = 0;
+ int appendonly = server.appendonly;
if (redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0)
return REDIS_ERR;
exit(1);
}
+ /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
+ * to the same file we're about to read. */
+ server.appendonly = 0;
+
fakeClient = createFakeClient();
while(1) {
int argc, j;
}
}
}
+
+ /* This point can only be reached when EOF is reached without errors.
+ * If the client is in the middle of a MULTI/EXEC, log error and quit. */
+ if (fakeClient->flags & REDIS_MULTI) goto readerr;
+
fclose(fp);
freeFakeClient(fakeClient);
+ server.appendonly = appendonly;
return REDIS_OK;
readerr:
/* =================== Virtual Memory - Blocking Side ====================== */
-/* substitute the first occurrence of '%p' with the process pid in the
- * swap file name. */
-static void expandVmSwapFilename(void) {
- char *p = strstr(server.vm_swap_file,"%p");
- sds new;
-
- if (!p) return;
- new = sdsempty();
- *p = '\0';
- new = sdscat(new,server.vm_swap_file);
- new = sdscatprintf(new,"%ld",(long) getpid());
- new = sdscat(new,p+2);
- zfree(server.vm_swap_file);
- server.vm_swap_file = new;
-}
-
static void vmInit(void) {
off_t totsize;
int pipefds[2];
size_t stacksize;
+ struct flock fl;
if (server.vm_max_threads != 0)
zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
- expandVmSwapFilename();
redisLog(REDIS_NOTICE,"Using '%s' as swap file",server.vm_swap_file);
+ /* Try to open the old swap file, otherwise create it */
if ((server.vm_fp = fopen(server.vm_swap_file,"r+b")) == NULL) {
server.vm_fp = fopen(server.vm_swap_file,"w+b");
}
if (server.vm_fp == NULL) {
redisLog(REDIS_WARNING,
- "Impossible to open the swap file: %s. Exiting.",
+ "Can't open the swap file: %s. Exiting.",
strerror(errno));
exit(1);
}
server.vm_fd = fileno(server.vm_fp);
+ /* Lock the swap file for writing, this is useful in order to avoid
+ * another instance to use the same swap file for a config error. */
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = fl.l_len = 0;
+ if (fcntl(server.vm_fd,F_SETLK,&fl) == -1) {
+ redisLog(REDIS_WARNING,
+ "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server.vm_swap_file, strerror(errno));
+ exit(1);
+ }
+ /* Initialize */
server.vm_next_page = 0;
server.vm_near_pages = 0;
server.vm_stats_used_pages = 0;
return 1;
}
+/* Preload keys for any command with first, last and step values for
+ * the command keys prototype, as defined in the command table. */
+static void waitForMultipleSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv) {
+ int j, last;
+ if (cmd->vm_firstkey == 0) return;
+ last = cmd->vm_lastkey;
+ if (last < 0) last = argc+last;
+ for (j = cmd->vm_firstkey; j <= last; j += cmd->vm_keystep) {
+ redisAssert(j < argc);
+ waitForSwappedKey(c,argv[j]);
+ }
+}
+
/* Preload keys needed for the ZUNION and ZINTER commands. */
static void zunionInterBlockClientOnSwappedKeys(redisClient *c) {
int i, num;
* Return 1 if the client is marked as blocked, 0 if the client can
* continue as the keys it is going to access appear to be in memory. */
static int blockClientOnSwappedKeys(struct redisCommand *cmd, redisClient *c) {
- int j, last;
-
if (cmd->vm_preload_proc != NULL) {
cmd->vm_preload_proc(c);
} else {
- if (cmd->vm_firstkey == 0) return 0;
- last = cmd->vm_lastkey;
- if (last < 0) last = c->argc+last;
- for (j = cmd->vm_firstkey; j <= last; j += cmd->vm_keystep)
- waitForSwappedKey(c,c->argv[j]);
+ waitForMultipleSwappedKeys(c,cmd,c->argc,c->argv);
}
/* If the client was blocked for at least one key, mark it as blocked. */