* POSSIBILITY OF SUCH DAMAGE.
*/
-#define REDIS_VERSION "1.1.91"
+#define REDIS_VERSION "1.3.2"
#include "fmacros.h"
#include "config.h"
#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
+/* Virtual memory object->where field. */
+#define REDIS_VM_MEMORY 0 /* The object is on memory */
+#define REDIS_VM_SWAPPED 1 /* The object is on disk */
+#define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
+#define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
+
/* Client flags */
#define REDIS_CLOSE 1 /* This client connection should be closed ASAP */
#define REDIS_SLAVE 2 /* This client is a slave server */
#define REDIS_MASTER 4 /* This client is a master server */
#define REDIS_MONITOR 8 /* This client is a slave monitor, see MONITOR */
+#define REDIS_MULTI 16 /* This client is in a MULTI context */
+#define REDIS_BLOCKED 32 /* The client is waiting in a blocking operation */
/* Slave replication state - slave side */
#define REDIS_REPL_NONE 0 /* No active replication */
/*================================= Data types ============================== */
/* A redis object, that is a type able to hold a string / list / set */
+
+/* The VM object structure */
+struct redisObjectVM {
+ off_t offset; /* the page at witch the object is stored on disk */
+ int pages; /* number of pages used on disk */
+} vm;
+
+/* The actual Redis Object */
typedef struct redisObject {
void *ptr;
unsigned char type;
unsigned char encoding;
- unsigned char notused[2];
+ unsigned char storage; /* where? REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
+ unsigned char notused;
int refcount;
+ /* VM fields, this are only allocated if VM is active, otherwise the
+ * object allocation function will just allocate
+ * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
+ * Redis without VM active will not have any overhead. */
+ struct redisObjectVM vm;
} robj;
/* Macro used to initalize a Redis object allocated on the stack.
} while(0);
typedef struct redisDb {
- dict *dict;
- dict *expires;
+ dict *dict; /* The keyspace for this DB */
+ dict *expires; /* Timeout of keys with a timeout set */
+ dict *blockingkeys; /* Keys with clients waiting for data (BLPOP) */
int id;
} redisDb;
+/* Client MULTI/EXEC state */
+typedef struct multiCmd {
+ robj **argv;
+ int argc;
+ struct redisCommand *cmd;
+} multiCmd;
+
+typedef struct multiState {
+ multiCmd *commands; /* Array of MULTI commands */
+ int count; /* Total number of MULTI commands */
+} multiState;
+
/* With multiplexing we need to take per-clinet state.
* Clients are taken in a liked list. */
typedef struct redisClient {
int sentlen;
time_t lastinteraction; /* time of the last interaction, used for timeout */
int flags; /* REDIS_CLOSE | REDIS_SLAVE | REDIS_MONITOR */
+ /* REDIS_MULTI */
int slaveseldb; /* slave selected db, if this client is a slave */
int authenticated; /* when requirepass is non-NULL */
int replstate; /* replication state if this is a slave */
int repldbfd; /* replication DB file descriptor */
- long repldboff; /* replication DB file offset */
+ long repldboff; /* replication DB file offset */
off_t repldbsize; /* replication DB file size */
+ multiState mstate; /* MULTI/EXEC state */
+ robj **blockingkeys; /* The key we waiting to terminate a blocking
+ * operation such as BLPOP. Otherwise NULL. */
+ int blockingkeysnum; /* Number of blocking keys */
+ time_t blockingto; /* Blocking operation timeout. If UNIX current time
+ * is >= blockingto then the operation timed out. */
} redisClient;
struct saveparam {
int port;
int fd;
redisDb *db;
- dict *sharingpool;
+ dict *sharingpool; /* Poll used for object sharing */
unsigned int sharingpoolsize;
long long dirty; /* changes to DB from the last save */
list *clients;
char *appendfilename;
char *requirepass;
int shareobjects;
+ int rdbcompression;
/* Replication related */
int isslave;
char *masterauth;
int replstate;
unsigned int maxclients;
unsigned long maxmemory;
+ unsigned int blockedclients;
/* Sort parameters - qsort_r() is only available under BSD so we
* have to take this state global, in order to pass it to sortCompare() */
int sort_desc;
int sort_alpha;
int sort_bypattern;
+ /* Virtual memory configuration */
+ int vm_enabled;
+ off_t vm_page_size;
+ off_t vm_pages;
+ long vm_max_memory;
+ /* Virtual memory state */
+ FILE *vm_fp;
+ int vm_fd;
+ off_t vm_next_page; /* Next probably empty page */
+ off_t vm_near_pages; /* Number of pages allocated sequentially */
};
typedef void redisCommandProc(redisClient *c);
struct sharedObjectsStruct {
robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
- *colon, *nullbulk, *nullmultibulk,
+ *colon, *nullbulk, *nullmultibulk, *queued,
*emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
*outofrangeerr, *plus,
*select0, *select1, *select2, *select3, *select4,
static void zslFree(zskiplist *zsl);
static void zslInsert(zskiplist *zsl, double score, robj *obj);
static void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask);
+static void initClientMultiState(redisClient *c);
+static void freeClientMultiState(redisClient *c);
+static void queueMultiCommand(redisClient *c, struct redisCommand *cmd);
+static void unblockClient(redisClient *c);
+static int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele);
+static void vmInit(void);
static void authCommand(redisClient *c);
static void pingCommand(redisClient *c);
static void zremCommand(redisClient *c);
static void zscoreCommand(redisClient *c);
static void zremrangebyscoreCommand(redisClient *c);
+static void multiCommand(redisClient *c);
+static void execCommand(redisClient *c);
+static void blpopCommand(redisClient *c);
+static void brpopCommand(redisClient *c);
/*================================= Globals ================================= */
{"lpush",lpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
{"rpop",rpopCommand,2,REDIS_CMD_INLINE},
{"lpop",lpopCommand,2,REDIS_CMD_INLINE},
+ {"brpop",brpopCommand,-3,REDIS_CMD_INLINE},
+ {"blpop",blpopCommand,-3,REDIS_CMD_INLINE},
{"llen",llenCommand,2,REDIS_CMD_INLINE},
{"lindex",lindexCommand,3,REDIS_CMD_INLINE},
{"lset",lsetCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
{"zincrby",zincrbyCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
{"zrem",zremCommand,3,REDIS_CMD_BULK},
{"zremrangebyscore",zremrangebyscoreCommand,4,REDIS_CMD_INLINE},
- {"zrange",zrangeCommand,4,REDIS_CMD_INLINE},
+ {"zrange",zrangeCommand,-4,REDIS_CMD_INLINE},
{"zrangebyscore",zrangebyscoreCommand,-4,REDIS_CMD_INLINE},
- {"zrevrange",zrevrangeCommand,4,REDIS_CMD_INLINE},
+ {"zrevrange",zrevrangeCommand,-4,REDIS_CMD_INLINE},
{"zcard",zcardCommand,2,REDIS_CMD_INLINE},
{"zscore",zscoreCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
{"incrby",incrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
{"shutdown",shutdownCommand,1,REDIS_CMD_INLINE},
{"lastsave",lastsaveCommand,1,REDIS_CMD_INLINE},
{"type",typeCommand,2,REDIS_CMD_INLINE},
+ {"multi",multiCommand,1,REDIS_CMD_INLINE},
+ {"exec",execCommand,1,REDIS_CMD_INLINE},
{"sync",syncCommand,1,REDIS_CMD_INLINE},
{"flushdb",flushdbCommand,1,REDIS_CMD_INLINE},
{"flushall",flushallCommand,1,REDIS_CMD_INLINE},
zfree(val);
}
+static void dictListDestructor(void *privdata, void *val)
+{
+ DICT_NOTUSED(privdata);
+ listRelease((list*)val);
+}
+
static int sdsDictKeyCompare(void *privdata, const void *key1,
const void *key2)
{
dictRedisObjectDestructor /* val destructor */
};
+/* Keylist hash table type has unencoded redis objects as keys and
+ * lists as values. It's used for blocking operations (BLPOP) */
+static dictType keylistDictType = {
+ dictObjHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictObjKeyCompare, /* key compare */
+ dictRedisObjectDestructor, /* key destructor */
+ dictListDestructor /* val destructor */
+};
+
/* ========================= Random utility functions ======================= */
/* Redis generally does not try to recover from out of memory conditions
listRewind(server.clients);
while ((ln = listYield(server.clients)) != NULL) {
c = listNodeValue(ln);
- if (!(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
+ if (server.maxidletime &&
+ !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
!(c->flags & REDIS_MASTER) && /* no timeout for masters */
- (now - c->lastinteraction > server.maxidletime)) {
+ (now - c->lastinteraction > server.maxidletime))
+ {
redisLog(REDIS_DEBUG,"Closing idle client");
freeClient(c);
+ } else if (c->flags & REDIS_BLOCKED) {
+ if (c->blockingto != 0 && c->blockingto < now) {
+ addReply(c,shared.nullmultibulk);
+ unblockClient(c);
+ }
}
}
}
}
/* Close connections of timedout clients */
- if (server.maxidletime && !(loops % 10))
+ if ((server.maxidletime && !(loops % 10)) || server.blockedclients)
closeTimedoutClients();
/* Check if a background saving or AOF rewrite in progress terminated */
shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
- /* no such key */
shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
+ shared.queued = createObject(REDIS_STRING,sdsnew("+QUEUED\r\n"));
shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
"-ERR Operation against a key holding the wrong kind of value\r\n"));
shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
server.appendfilename = "appendonly.aof";
server.requirepass = NULL;
server.shareobjects = 0;
+ server.rdbcompression = 1;
server.sharingpoolsize = 1024;
server.maxclients = 0;
+ server.blockedclients = 0;
server.maxmemory = 0;
+ server.vm_enabled = 0;
+ server.vm_page_size = 256; /* 256 bytes per page */
+ server.vm_pages = 1024*1024*100; /* 104 millions of pages */
+ server.vm_max_memory = 1024LL*1024*1024*1; /* 1 GB of RAM */
+
resetServerSaveParams();
appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
for (j = 0; j < server.dbnum; j++) {
server.db[j].dict = dictCreate(&hashDictType,NULL);
server.db[j].expires = dictCreate(&setDictType,NULL);
+ server.db[j].blockingkeys = dictCreate(&keylistDictType,NULL);
server.db[j].id = j;
}
server.cronloops = 0;
exit(1);
}
}
+
+ if (server.vm_enabled) vmInit();
}
/* Empty the whole database */
if ((server.shareobjects = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
+ } else if (!strcasecmp(argv[0],"rdbcompression") && argc == 2) {
+ if ((server.rdbcompression = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"shareobjectspoolsize") && argc == 2) {
server.sharingpoolsize = atoi(argv[1]);
if (server.sharingpoolsize < 1) {
server.pidfile = zstrdup(argv[1]);
} else if (!strcasecmp(argv[0],"dbfilename") && argc == 2) {
server.dbfilename = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"vm-enabled") && argc == 2) {
+ if ((server.vm_enabled = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
} else {
err = "Bad directive or wrong number of arguments"; goto loaderr;
}
static void freeClient(redisClient *c) {
listNode *ln;
+ /* Note that if the client we are freeing is blocked into a blocking
+ * call, we have to set querybuf to NULL *before* to call unblockClient()
+ * to avoid processInputBuffer() will get called. Also it is important
+ * to remove the file events after this, because this call adds
+ * the READABLE event. */
+ sdsfree(c->querybuf);
+ c->querybuf = NULL;
+ if (c->flags & REDIS_BLOCKED)
+ unblockClient(c);
+
aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
- sdsfree(c->querybuf);
listRelease(c->reply);
freeClientArgv(c);
close(c->fd);
}
zfree(c->argv);
zfree(c->mbargv);
+ freeClientMultiState(c);
zfree(c);
}
c->multibulk = 0;
}
+/* Call() is the core of Redis execution of a command */
+static void call(redisClient *c, struct redisCommand *cmd) {
+ long long dirty;
+
+ dirty = server.dirty;
+ cmd->proc(c);
+ if (server.appendonly && server.dirty-dirty)
+ feedAppendOnlyFile(cmd,c->db->id,c->argv,c->argc);
+ if (server.dirty-dirty && listLength(server.slaves))
+ replicationFeedSlaves(server.slaves,cmd,c->db->id,c->argv,c->argc);
+ if (listLength(server.monitors))
+ replicationFeedSlaves(server.monitors,cmd,c->db->id,c->argv,c->argc);
+ server.stat_numcommands++;
+}
+
/* If this function gets called we already read a whole
* command, argments are in the client argv/argc fields.
* processCommand() execute the command or prepare the
* if 0 is returned the client was destroied (i.e. after QUIT). */
static int processCommand(redisClient *c) {
struct redisCommand *cmd;
- long long dirty;
/* Free some memory if needed (maxmemory setting) */
if (server.maxmemory) freeMemoryIfNeeded();
}
cmd = lookupCommand(c->argv[0]->ptr);
if (!cmd) {
- addReplySds(c,sdsnew("-ERR unknown command\r\n"));
+ addReplySds(c,
+ sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
+ (char*)c->argv[0]->ptr));
resetClient(c);
return 1;
} else if ((cmd->arity > 0 && cmd->arity != c->argc) ||
}
/* Exec the command */
- dirty = server.dirty;
- cmd->proc(c);
- if (server.appendonly && server.dirty-dirty)
- feedAppendOnlyFile(cmd,c->db->id,c->argv,c->argc);
- if (server.dirty-dirty && listLength(server.slaves))
- replicationFeedSlaves(server.slaves,cmd,c->db->id,c->argv,c->argc);
- if (listLength(server.monitors))
- replicationFeedSlaves(server.monitors,cmd,c->db->id,c->argv,c->argc);
- server.stat_numcommands++;
+ if (c->flags & REDIS_MULTI && cmd->proc != execCommand) {
+ queueMultiCommand(c,cmd);
+ addReply(c,shared.queued);
+ } else {
+ call(c,cmd);
+ }
/* Prepare the client for the next command */
if (c->flags & REDIS_CLOSE) {
static void processInputBuffer(redisClient *c) {
again:
+ /* Before to process the input buffer, make sure the client is not
+ * waitig for a blocking operation such as BLPOP. Note that the first
+ * iteration the client is never blocked, otherwise the processInputBuffer
+ * would not be called at all, but after the execution of the first commands
+ * in the input buffer the client may be blocked, and the "goto again"
+ * will try to reiterate. The following line will make it return asap. */
+ if (c->flags & REDIS_BLOCKED) return;
if (c->bulklen == -1) {
/* Read the first line of the query */
char *p = strchr(c->querybuf,'\n');
c->authenticated = 0;
c->replstate = REDIS_REPL_NONE;
c->reply = listCreate();
+ c->blockingkeys = NULL;
+ c->blockingkeysnum = 0;
listSetFreeMethod(c->reply,decrRefCount);
listSetDupMethod(c->reply,dupClientReplyValue);
if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
return NULL;
}
listAddNodeTail(server.clients,c);
+ initClientMultiState(c);
return c;
}
o = listNodeValue(head);
listDelNode(server.objfreelist,head);
} else {
- o = zmalloc(sizeof(*o));
+ if (server.vm_enabled) {
+ o = zmalloc(sizeof(*o));
+ } else {
+ o = zmalloc(sizeof(*o)-sizeof(struct redisObjectVM));
+ }
}
o->type = type;
o->encoding = REDIS_ENCODING_RAW;
}
}
-/*============================ DB saving/loading ============================ */
+/*============================ RDB saving/loading =========================== */
static int rdbSaveType(FILE *fp, unsigned char type) {
if (fwrite(&type,1,1,fp) == 0) return -1;
/* Try LZF compression - under 20 bytes it's unable to compress even
* aaaaaaaaaaaaaaaaaa so skip it */
- if (len > 20) {
+ if (server.rdbcompression && len > 20) {
int retval;
retval = rdbSaveLzfStringObject(fp,obj);
return 0;
}
+/* Save a Redis object. */
+static int rdbSaveObject(FILE *fp, robj *o) {
+ if (o->type == REDIS_STRING) {
+ /* Save a string value */
+ if (rdbSaveStringObject(fp,o) == -1) return -1;
+ } else if (o->type == REDIS_LIST) {
+ /* Save a list value */
+ list *list = o->ptr;
+ listNode *ln;
+
+ listRewind(list);
+ if (rdbSaveLen(fp,listLength(list)) == -1) return -1;
+ while((ln = listYield(list))) {
+ robj *eleobj = listNodeValue(ln);
+
+ if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
+ }
+ } else if (o->type == REDIS_SET) {
+ /* Save a set value */
+ dict *set = o->ptr;
+ dictIterator *di = dictGetIterator(set);
+ dictEntry *de;
+
+ if (rdbSaveLen(fp,dictSize(set)) == -1) return -1;
+ while((de = dictNext(di)) != NULL) {
+ robj *eleobj = dictGetEntryKey(de);
+
+ if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
+ }
+ dictReleaseIterator(di);
+ } else if (o->type == REDIS_ZSET) {
+ /* Save a set value */
+ zset *zs = o->ptr;
+ dictIterator *di = dictGetIterator(zs->dict);
+ dictEntry *de;
+
+ if (rdbSaveLen(fp,dictSize(zs->dict)) == -1) return -1;
+ while((de = dictNext(di)) != NULL) {
+ robj *eleobj = dictGetEntryKey(de);
+ double *score = dictGetEntryVal(de);
+
+ if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
+ if (rdbSaveDoubleValue(fp,*score) == -1) return -1;
+ }
+ dictReleaseIterator(di);
+ } else {
+ redisAssert(0 != 0);
+ }
+ return 0;
+}
+
+/* Return the length the object will have on disk if saved with
+ * the rdbSaveObject() function. Currently we use a trick to get
+ * this length with very little changes to the code. In the future
+ * we could switch to a faster solution. */
+static off_t rdbSavedObjectLen(robj *o) {
+ static FILE *fp = NULL;
+
+ if (fp == NULL) fp = fopen("/dev/null","w");
+ assert(fp != NULL);
+
+ rewind(fp);
+ assert(rdbSaveObject(fp,o) != 1);
+ return ftello(fp);
+}
+
/* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
static int rdbSave(char *filename) {
dictIterator *di = NULL;
/* Save the key and associated value */
if (rdbSaveType(fp,o->type) == -1) goto werr;
if (rdbSaveStringObject(fp,key) == -1) goto werr;
- if (o->type == REDIS_STRING) {
- /* Save a string value */
- if (rdbSaveStringObject(fp,o) == -1) goto werr;
- } else if (o->type == REDIS_LIST) {
- /* Save a list value */
- list *list = o->ptr;
- listNode *ln;
-
- listRewind(list);
- if (rdbSaveLen(fp,listLength(list)) == -1) goto werr;
- while((ln = listYield(list))) {
- robj *eleobj = listNodeValue(ln);
-
- if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
- }
- } else if (o->type == REDIS_SET) {
- /* Save a set value */
- dict *set = o->ptr;
- dictIterator *di = dictGetIterator(set);
- dictEntry *de;
-
- if (rdbSaveLen(fp,dictSize(set)) == -1) goto werr;
- while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetEntryKey(de);
-
- if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
- }
- dictReleaseIterator(di);
- } else if (o->type == REDIS_ZSET) {
- /* Save a set value */
- zset *zs = o->ptr;
- dictIterator *di = dictGetIterator(zs->dict);
- dictEntry *de;
-
- if (rdbSaveLen(fp,dictSize(zs->dict)) == -1) goto werr;
- while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetEntryKey(de);
- double *score = dictGetEntryVal(de);
-
- if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
- if (rdbSaveDoubleValue(fp,*score) == -1) goto werr;
- }
- dictReleaseIterator(di);
- } else {
- redisAssert(0 != 0);
- }
+ /* Save the actual value */
+ if (rdbSaveObject(fp,o) == -1) goto werr;
}
dictReleaseIterator(di);
}
setGenericCommand(c,1);
}
-static void getCommand(redisClient *c) {
+static int getGenericCommand(redisClient *c) {
robj *o = lookupKeyRead(c->db,c->argv[1]);
if (o == NULL) {
addReply(c,shared.nullbulk);
+ return REDIS_OK;
} else {
if (o->type != REDIS_STRING) {
addReply(c,shared.wrongtypeerr);
+ return REDIS_ERR;
} else {
addReplyBulkLen(c,o);
addReply(c,o);
addReply(c,shared.crlf);
+ return REDIS_OK;
}
}
}
+static void getCommand(redisClient *c) {
+ getGenericCommand(c);
+}
+
static void getsetCommand(redisClient *c) {
- getCommand(c);
+ if (getGenericCommand(c) == REDIS_ERR) return;
if (dictAdd(c->db->dict,c->argv[1],c->argv[2]) == DICT_ERR) {
dictReplace(c->db->dict,c->argv[1],c->argv[2]);
} else {
kill(server.bgsavechildpid,SIGKILL);
rdbRemoveTempFile(server.bgsavechildpid);
}
- /* SYNC SAVE */
- if (rdbSave(server.dbfilename) == REDIS_OK) {
- if (server.daemonize)
- unlink(server.pidfile);
- redisLog(REDIS_WARNING,"%zu bytes used at exit",zmalloc_used_memory());
- redisLog(REDIS_WARNING,"Server exit now, bye bye...");
- exit(1);
+ if (server.appendonly) {
+ /* Append only file: fsync() the AOF and exit */
+ fsync(server.appendfd);
+ exit(0);
} else {
- /* Ooops.. error saving! The best we can do is to continue operating.
- * Note that if there was a background saving process, in the next
- * cron() Redis will be notified that the background saving aborted,
- * handling special stuff like slaves pending for synchronization... */
- redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit");
- addReplySds(c,sdsnew("-ERR can't quit, problems saving the DB\r\n"));
+ /* Snapshotting. Perform a SYNC SAVE and exit */
+ if (rdbSave(server.dbfilename) == REDIS_OK) {
+ if (server.daemonize)
+ unlink(server.pidfile);
+ redisLog(REDIS_WARNING,"%zu bytes used at exit",zmalloc_used_memory());
+ redisLog(REDIS_WARNING,"Server exit now, bye bye...");
+ exit(0);
+ } else {
+ /* Ooops.. error saving! The best we can do is to continue operating.
+ * Note that if there was a background saving process, in the next
+ * cron() Redis will be notified that the background saving aborted,
+ * handling special stuff like slaves pending for synchronization... */
+ redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit");
+ addReplySds(c,sdsnew("-ERR can't quit, problems saving the DB\r\n"));
+ }
}
}
lobj = lookupKeyWrite(c->db,c->argv[1]);
if (lobj == NULL) {
+ if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
+ addReply(c,shared.ok);
+ return;
+ }
lobj = createListObject();
list = lobj->ptr;
if (where == REDIS_HEAD) {
addReply(c,shared.wrongtypeerr);
return;
}
+ if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
+ addReply(c,shared.ok);
+ return;
+ }
list = lobj->ptr;
if (where == REDIS_HEAD) {
listAddNodeHead(list,c->argv[2]);
o = lookupKeyWrite(c->db,c->argv[1]);
if (o == NULL) {
- addReply(c,shared.nokeyerr);
+ addReply(c,shared.ok);
} else {
if (o->type != REDIS_LIST) {
addReply(c,shared.wrongtypeerr);
robj *ele = listNodeValue(ln);
list *dstlist;
- if (dobj == NULL) {
-
- /* Create the list if the key does not exist */
- dobj = createListObject();
- dictAdd(c->db->dict,c->argv[2],dobj);
- incrRefCount(c->argv[2]);
- } else if (dobj->type != REDIS_LIST) {
+ if (dobj && dobj->type != REDIS_LIST) {
addReply(c,shared.wrongtypeerr);
return;
}
- /* Add the element to the target list */
- dstlist = dobj->ptr;
- listAddNodeHead(dstlist,ele);
- incrRefCount(ele);
+
+ /* Add the element to the target list (unless it's directly
+ * passed to some BLPOP-ing client */
+ if (!handleClientsWaitingListPush(c,c->argv[2],ele)) {
+ if (dobj == NULL) {
+ /* Create the list if the key does not exist */
+ dobj = createListObject();
+ dictAdd(c->db->dict,c->argv[2],dobj);
+ incrRefCount(c->argv[2]);
+ }
+ dstlist = dobj->ptr;
+ listAddNodeHead(dstlist,ele);
+ incrRefCount(ele);
+ }
/* Send the element to the client as reply as well */
addReplyBulkLen(c,ele);
if (!setobj) {
zfree(dv);
if (dstkey) {
- deleteKey(c->db,dstkey);
+ if (deleteKey(c->db,dstkey))
+ server.dirty++;
addReply(c,shared.czero);
} else {
addReply(c,shared.nullmultibulk);
robj *o;
int start = atoi(c->argv[2]->ptr);
int end = atoi(c->argv[3]->ptr);
+ int withscores = 0;
+
+ if (c->argc == 5 && !strcasecmp(c->argv[4]->ptr,"withscores")) {
+ withscores = 1;
+ } else if (c->argc >= 5) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
o = lookupKeyRead(c->db,c->argv[1]);
if (o == NULL) {
ln = ln->forward[0];
}
- addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
+ addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",
+ withscores ? (rangelen*2) : rangelen));
for (j = 0; j < rangelen; j++) {
ele = ln->obj;
addReplyBulkLen(c,ele);
addReply(c,ele);
addReply(c,shared.crlf);
+ if (withscores)
+ addReplyDouble(c,ln->score);
ln = reverse ? ln->backward : ln->forward[0];
}
}
/* Lookup the key to sort. It must be of the right types */
sortval = lookupKeyRead(c->db,c->argv[1]);
if (sortval == NULL) {
- addReply(c,shared.nokeyerr);
+ addReply(c,shared.nullmultibulk);
return;
}
if (sortval->type != REDIS_SET && sortval->type != REDIS_LIST &&
"uptime_in_days:%ld\r\n"
"connected_clients:%d\r\n"
"connected_slaves:%d\r\n"
+ "blocked_clients:%d\r\n"
"used_memory:%zu\r\n"
"changes_since_last_save:%lld\r\n"
"bgsave_in_progress:%d\r\n"
uptime/(3600*24),
listLength(server.clients)-listLength(server.slaves),
listLength(server.slaves),
+ server.blockedclients,
server.usedmemory,
server.dirty,
server.bgsavechildpid != -1,
addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl));
}
+/* ================================ MULTI/EXEC ============================== */
+
+/* Client state initialization for MULTI/EXEC */
+static void initClientMultiState(redisClient *c) {
+ c->mstate.commands = NULL;
+ c->mstate.count = 0;
+}
+
+/* Release all the resources associated with MULTI/EXEC state */
+static void freeClientMultiState(redisClient *c) {
+ int j;
+
+ for (j = 0; j < c->mstate.count; j++) {
+ int i;
+ multiCmd *mc = c->mstate.commands+j;
+
+ for (i = 0; i < mc->argc; i++)
+ decrRefCount(mc->argv[i]);
+ zfree(mc->argv);
+ }
+ zfree(c->mstate.commands);
+}
+
+/* Add a new command into the MULTI commands queue */
+static void queueMultiCommand(redisClient *c, struct redisCommand *cmd) {
+ multiCmd *mc;
+ int j;
+
+ c->mstate.commands = zrealloc(c->mstate.commands,
+ sizeof(multiCmd)*(c->mstate.count+1));
+ mc = c->mstate.commands+c->mstate.count;
+ mc->cmd = cmd;
+ mc->argc = c->argc;
+ mc->argv = zmalloc(sizeof(robj*)*c->argc);
+ memcpy(mc->argv,c->argv,sizeof(robj*)*c->argc);
+ for (j = 0; j < c->argc; j++)
+ incrRefCount(mc->argv[j]);
+ c->mstate.count++;
+}
+
+static void multiCommand(redisClient *c) {
+ c->flags |= REDIS_MULTI;
+ addReply(c,shared.ok);
+}
+
+static void execCommand(redisClient *c) {
+ int j;
+ robj **orig_argv;
+ int orig_argc;
+
+ if (!(c->flags & REDIS_MULTI)) {
+ addReplySds(c,sdsnew("-ERR EXEC without MULTI\r\n"));
+ return;
+ }
+
+ orig_argv = c->argv;
+ orig_argc = c->argc;
+ addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->mstate.count));
+ for (j = 0; j < c->mstate.count; j++) {
+ c->argc = c->mstate.commands[j].argc;
+ c->argv = c->mstate.commands[j].argv;
+ call(c,c->mstate.commands[j].cmd);
+ }
+ c->argv = orig_argv;
+ c->argc = orig_argc;
+ freeClientMultiState(c);
+ initClientMultiState(c);
+ c->flags &= (~REDIS_MULTI);
+}
+
+/* =========================== Blocking Operations ========================= */
+
+/* Currently Redis blocking operations support is limited to list POP ops,
+ * so the current implementation is not fully generic, but it is also not
+ * completely specific so it will not require a rewrite to support new
+ * kind of blocking operations in the future.
+ *
+ * Still it's important to note that list blocking operations can be already
+ * used as a notification mechanism in order to implement other blocking
+ * operations at application level, so there must be a very strong evidence
+ * of usefulness and generality before new blocking operations are implemented.
+ *
+ * This is how the current blocking POP works, we use BLPOP as example:
+ * - If the user calls BLPOP and the key exists and contains a non empty list
+ * then LPOP is called instead. So BLPOP is semantically the same as LPOP
+ * if there is not to block.
+ * - If instead BLPOP is called and the key does not exists or the list is
+ * empty we need to block. In order to do so we remove the notification for
+ * new data to read in the client socket (so that we'll not serve new
+ * requests if the blocking request is not served). Also we put the client
+ * in a dictionary (db->blockingkeys) mapping keys to a list of clients
+ * blocking for this keys.
+ * - If a PUSH operation against a key with blocked clients waiting is
+ * performed, we serve the first in the list: basically instead to push
+ * the new element inside the list we return it to the (first / oldest)
+ * blocking client, unblock the client, and remove it form the list.
+ *
+ * The above comment and the source code should be enough in order to understand
+ * the implementation and modify / fix it later.
+ */
+
+/* Set a client in blocking mode for the specified key, with the specified
+ * timeout */
+static void blockForKeys(redisClient *c, robj **keys, int numkeys, time_t timeout) {
+ dictEntry *de;
+ list *l;
+ int j;
+
+ c->blockingkeys = zmalloc(sizeof(robj*)*numkeys);
+ c->blockingkeysnum = numkeys;
+ c->blockingto = timeout;
+ for (j = 0; j < numkeys; j++) {
+ /* Add the key in the client structure, to map clients -> keys */
+ c->blockingkeys[j] = keys[j];
+ incrRefCount(keys[j]);
+
+ /* And in the other "side", to map keys -> clients */
+ de = dictFind(c->db->blockingkeys,keys[j]);
+ if (de == NULL) {
+ int retval;
+
+ /* For every key we take a list of clients blocked for it */
+ l = listCreate();
+ retval = dictAdd(c->db->blockingkeys,keys[j],l);
+ incrRefCount(keys[j]);
+ assert(retval == DICT_OK);
+ } else {
+ l = dictGetEntryVal(de);
+ }
+ listAddNodeTail(l,c);
+ }
+ /* Mark the client as a blocked client */
+ c->flags |= REDIS_BLOCKED;
+ aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
+ server.blockedclients++;
+}
+
+/* Unblock a client that's waiting in a blocking operation such as BLPOP */
+static void unblockClient(redisClient *c) {
+ dictEntry *de;
+ list *l;
+ int j;
+
+ assert(c->blockingkeys != NULL);
+ /* The client may wait for multiple keys, so unblock it for every key. */
+ for (j = 0; j < c->blockingkeysnum; j++) {
+ /* Remove this client from the list of clients waiting for this key. */
+ de = dictFind(c->db->blockingkeys,c->blockingkeys[j]);
+ assert(de != NULL);
+ l = dictGetEntryVal(de);
+ listDelNode(l,listSearchKey(l,c));
+ /* If the list is empty we need to remove it to avoid wasting memory */
+ if (listLength(l) == 0)
+ dictDelete(c->db->blockingkeys,c->blockingkeys[j]);
+ decrRefCount(c->blockingkeys[j]);
+ }
+ /* Cleanup the client structure */
+ zfree(c->blockingkeys);
+ c->blockingkeys = NULL;
+ c->flags &= (~REDIS_BLOCKED);
+ server.blockedclients--;
+ /* Ok now we are ready to get read events from socket, note that we
+ * can't trap errors here as it's possible that unblockClients() is
+ * called from freeClient() itself, and the only thing we can do
+ * if we failed to register the READABLE event is to kill the client.
+ * Still the following function should never fail in the real world as
+ * we are sure the file descriptor is sane, and we exit on out of mem. */
+ aeCreateFileEvent(server.el, c->fd, AE_READABLE, readQueryFromClient, c);
+ /* As a final step we want to process data if there is some command waiting
+ * in the input buffer. Note that this is safe even if unblockClient()
+ * gets called from freeClient() because freeClient() will be smart
+ * enough to call this function *after* c->querybuf was set to NULL. */
+ if (c->querybuf && sdslen(c->querybuf) > 0) processInputBuffer(c);
+}
+
+/* This should be called from any function PUSHing into lists.
+ * 'c' is the "pushing client", 'key' is the key it is pushing data against,
+ * 'ele' is the element pushed.
+ *
+ * If the function returns 0 there was no client waiting for a list push
+ * against this key.
+ *
+ * If the function returns 1 there was a client waiting for a list push
+ * against this key, the element was passed to this client thus it's not
+ * needed to actually add it to the list and the caller should return asap. */
+static int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele) {
+ struct dictEntry *de;
+ redisClient *receiver;
+ list *l;
+ listNode *ln;
+
+ de = dictFind(c->db->blockingkeys,key);
+ if (de == NULL) return 0;
+ l = dictGetEntryVal(de);
+ ln = listFirst(l);
+ assert(ln != NULL);
+ receiver = ln->value;
+
+ addReplySds(receiver,sdsnew("*2\r\n"));
+ addReplyBulkLen(receiver,key);
+ addReply(receiver,key);
+ addReply(receiver,shared.crlf);
+ addReplyBulkLen(receiver,ele);
+ addReply(receiver,ele);
+ addReply(receiver,shared.crlf);
+ unblockClient(receiver);
+ return 1;
+}
+
+/* Blocking RPOP/LPOP */
+static void blockingPopGenericCommand(redisClient *c, int where) {
+ robj *o;
+ time_t timeout;
+ int j;
+
+ for (j = 1; j < c->argc-1; j++) {
+ o = lookupKeyWrite(c->db,c->argv[j]);
+ if (o != NULL) {
+ if (o->type != REDIS_LIST) {
+ addReply(c,shared.wrongtypeerr);
+ return;
+ } else {
+ list *list = o->ptr;
+ if (listLength(list) != 0) {
+ /* If the list contains elements fall back to the usual
+ * non-blocking POP operation */
+ robj *argv[2], **orig_argv;
+ int orig_argc;
+
+ /* We need to alter the command arguments before to call
+ * popGenericCommand() as the command takes a single key. */
+ orig_argv = c->argv;
+ orig_argc = c->argc;
+ argv[1] = c->argv[j];
+ c->argv = argv;
+ c->argc = 2;
+
+ /* Also the return value is different, we need to output
+ * the multi bulk reply header and the key name. The
+ * "real" command will add the last element (the value)
+ * for us. If this souds like an hack to you it's just
+ * because it is... */
+ addReplySds(c,sdsnew("*2\r\n"));
+ addReplyBulkLen(c,argv[1]);
+ addReply(c,argv[1]);
+ addReply(c,shared.crlf);
+ popGenericCommand(c,where);
+
+ /* Fix the client structure with the original stuff */
+ c->argv = orig_argv;
+ c->argc = orig_argc;
+ return;
+ }
+ }
+ }
+ }
+ /* If the list is empty or the key does not exists we must block */
+ timeout = strtol(c->argv[c->argc-1]->ptr,NULL,10);
+ if (timeout > 0) timeout += time(NULL);
+ blockForKeys(c,c->argv+1,c->argc-2,timeout);
+}
+
+static void blpopCommand(redisClient *c) {
+ blockingPopGenericCommand(c,REDIS_HEAD);
+}
+
+static void brpopCommand(redisClient *c) {
+ blockingPopGenericCommand(c,REDIS_TAIL);
+}
+
/* =============================== Replication ============================= */
static int syncWrite(int fd, char *ptr, ssize_t size, int timeout) {
obj = getDecodedObject(obj);
snprintf(buf,sizeof(buf),"$%ld\r\n",(long)sdslen(obj->ptr));
if (fwrite(buf,strlen(buf),1,fp) == 0) goto err;
- if (fwrite(obj->ptr,sdslen(obj->ptr),1,fp) == 0) goto err;
+ if (sdslen(obj->ptr) && fwrite(obj->ptr,sdslen(obj->ptr),1,fp) == 0)
+ goto err;
if (fwrite("\r\n",2,1,fp) == 0) goto err;
decrRefCount(obj);
return 1;
}
/* Save the expire time */
if (expiretime != -1) {
- char cmd[]="*3\r\n$6\r\nEXPIRE\r\n";
+ char cmd[]="*3\r\n$8\r\nEXPIREAT\r\n";
/* If this key is already expired skip it */
if (expiretime < now) continue;
if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
werr:
fclose(fp);
unlink(tmpfile);
- redisLog(REDIS_WARNING,"Write error writing append only fileon disk: %s", strerror(errno));
+ redisLog(REDIS_WARNING,"Write error writing append only file on disk: %s", strerror(errno));
if (di) dictReleaseIterator(di);
return REDIS_ERR;
}
unlink(tmpfile);
}
+/* =============================== Virtual Memory =========================== */
+static void vmInit(void) {
+ off_t totsize;
+
+ server.vm_fp = fopen("/tmp/redisvm","w+b");
+ if (server.vm_fp == NULL) {
+ redisLog(REDIS_WARNING,"Impossible to open the swap file. Exiting.");
+ exit(1);
+ }
+ server.vm_fd = fileno(server.vm_fp);
+ server.vm_next_page = 0;
+ server.vm_near_pages = 0;
+ totsize = server.vm_pages*server.vm_page_size;
+ redisLog(REDIS_NOTICE,"Allocating %lld bytes of swap file",totsize);
+ if (ftruncate(server.vm_fd,totsize) == -1) {
+ redisLog(REDIS_WARNING,"Can't ftruncate swap file: %s. Exiting.",
+ strerror(errno));
+ exit(1);
+ } else {
+ redisLog(REDIS_NOTICE,"Swap file allocated with success");
+ }
+ /* Try to remove the swap file, so the OS will really delete it from the
+ * file system when Redis exists. */
+ unlink("/tmp/redisvm");
+}
+
/* ================================= Debugging ============================== */
static void debugCommand(redisClient *c) {
}
redisLog(REDIS_WARNING,"DB reloaded by DEBUG RELOAD");
addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"loadaof")) {
+ emptyDb();
+ if (loadAppendOnlyFile(server.appendfilename) != REDIS_OK) {
+ addReply(c,shared.err);
+ return;
+ }
+ redisLog(REDIS_WARNING,"Append Only File loaded by DEBUG LOADAOF");
+ addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) {
dictEntry *de = dictFind(c->db->dict,c->argv[2]);
robj *key, *val;
key = dictGetEntryKey(de);
val = dictGetEntryVal(de);
addReplySds(c,sdscatprintf(sdsempty(),
- "+Key at:%p refcount:%d, value at:%p refcount:%d encoding:%d\r\n",
+ "+Key at:%p refcount:%d, value at:%p refcount:%d encoding:%d serializedlength:%lld\r\n",
(void*)key, key->refcount, (void*)val, val->refcount,
- val->encoding));
+ val->encoding, rdbSavedObjectLen(val)));
} else {
addReplySds(c,sdsnew(
"-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|RELOAD]\r\n"));
#else
return (void*) uc->uc_mcontext->__ss.__eip;
#endif
-#elif defined(__i386__) || defined(__X86_64__) /* Linux x86 */
- return (void*) uc->uc_mcontext.gregs[REG_EIP];
+#elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
+ return (void*) uc->uc_mcontext.gregs[REG_EIP]; /* Linux 32/64 bit */
#elif defined(__ia64__) /* Linux IA64 */
return (void*) uc->uc_mcontext.sc_ip;
#else
redisLog(REDIS_WARNING,"%d redis-server %p %s + %d", i, trace[i], fn, (unsigned int)offset);
}
}
- // free(messages); Don't call free() with possibly corrupted memory.
+ /* free(messages); Don't call free() with possibly corrupted memory. */
exit(0);
}