* POSSIBILITY OF SUCH DAMAGE.
*/
-#define REDIS_VERSION "1.3.0"
+#define REDIS_VERSION "1.3.2"
#include "fmacros.h"
#include "config.h"
#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
+/* Virtual memory object->where field. */
+#define REDIS_VM_MEMORY 0 /* The object is on memory */
+#define REDIS_VM_SWAPPED 1 /* The object is on disk */
+#define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
+#define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
+
/* Client flags */
#define REDIS_CLOSE 1 /* This client connection should be closed ASAP */
#define REDIS_SLAVE 2 /* This client is a slave server */
/*================================= Data types ============================== */
/* A redis object, that is a type able to hold a string / list / set */
+
+/* The VM object structure */
+struct redisObjectVM {
+ off_t offset; /* the page at witch the object is stored on disk */
+ int pages; /* number of pages used on disk */
+} vm;
+
+/* The actual Redis Object */
typedef struct redisObject {
void *ptr;
unsigned char type;
unsigned char encoding;
- unsigned char notused[2];
+ unsigned char storage; /* where? REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
+ unsigned char notused;
int refcount;
+ /* VM fields, this are only allocated if VM is active, otherwise the
+ * object allocation function will just allocate
+ * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
+ * Redis without VM active will not have any overhead. */
+ struct redisObjectVM vm;
} robj;
/* Macro used to initalize a Redis object allocated on the stack.
long repldboff; /* replication DB file offset */
off_t repldbsize; /* replication DB file size */
multiState mstate; /* MULTI/EXEC state */
- robj *blockingkey; /* The key we waiting to terminate a blocking
+ robj **blockingkeys; /* The key we waiting to terminate a blocking
* operation such as BLPOP. Otherwise NULL. */
+ int blockingkeysnum; /* Number of blocking keys */
time_t blockingto; /* Blocking operation timeout. If UNIX current time
* is >= blockingto then the operation timed out. */
} redisClient;
int replstate;
unsigned int maxclients;
unsigned long maxmemory;
+ unsigned int blockedclients;
/* Sort parameters - qsort_r() is only available under BSD so we
* have to take this state global, in order to pass it to sortCompare() */
int sort_desc;
int sort_alpha;
int sort_bypattern;
+ /* Virtual memory configuration */
+ int vm_enabled;
+ off_t vm_page_size;
+ off_t vm_pages;
+ long vm_max_memory;
+ /* Virtual memory state */
+ FILE *vm_fp;
+ int vm_fd;
+ off_t vm_next_page; /* Next probably empty page */
+ off_t vm_near_pages; /* Number of pages allocated sequentially */
};
typedef void redisCommandProc(redisClient *c);
static void queueMultiCommand(redisClient *c, struct redisCommand *cmd);
static void unblockClient(redisClient *c);
static int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele);
+static void vmInit(void);
static void authCommand(redisClient *c);
static void pingCommand(redisClient *c);
{"lpush",lpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
{"rpop",rpopCommand,2,REDIS_CMD_INLINE},
{"lpop",lpopCommand,2,REDIS_CMD_INLINE},
- {"brpop",brpopCommand,3,REDIS_CMD_INLINE},
- {"blpop",blpopCommand,3,REDIS_CMD_INLINE},
+ {"brpop",brpopCommand,-3,REDIS_CMD_INLINE},
+ {"blpop",blpopCommand,-3,REDIS_CMD_INLINE},
{"llen",llenCommand,2,REDIS_CMD_INLINE},
{"lindex",lindexCommand,3,REDIS_CMD_INLINE},
{"lset",lsetCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
listRewind(server.clients);
while ((ln = listYield(server.clients)) != NULL) {
c = listNodeValue(ln);
- if (!(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
+ if (server.maxidletime &&
+ !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
!(c->flags & REDIS_MASTER) && /* no timeout for masters */
- (now - c->lastinteraction > server.maxidletime)) {
+ (now - c->lastinteraction > server.maxidletime))
+ {
redisLog(REDIS_DEBUG,"Closing idle client");
freeClient(c);
+ } else if (c->flags & REDIS_BLOCKED) {
+ if (c->blockingto != 0 && c->blockingto < now) {
+ addReply(c,shared.nullmultibulk);
+ unblockClient(c);
+ }
}
}
}
}
/* Close connections of timedout clients */
- if (server.maxidletime && !(loops % 10))
+ if ((server.maxidletime && !(loops % 10)) || server.blockedclients)
closeTimedoutClients();
/* Check if a background saving or AOF rewrite in progress terminated */
server.rdbcompression = 1;
server.sharingpoolsize = 1024;
server.maxclients = 0;
+ server.blockedclients = 0;
server.maxmemory = 0;
+ server.vm_enabled = 0;
+ server.vm_page_size = 256; /* 256 bytes per page */
+ server.vm_pages = 1024*1024*100; /* 104 millions of pages */
+ server.vm_max_memory = 1024LL*1024*1024*1; /* 1 GB of RAM */
+
resetServerSaveParams();
appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
exit(1);
}
}
+
+ if (server.vm_enabled) vmInit();
}
/* Empty the whole database */
server.pidfile = zstrdup(argv[1]);
} else if (!strcasecmp(argv[0],"dbfilename") && argc == 2) {
server.dbfilename = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"vm-enabled") && argc == 2) {
+ if ((server.vm_enabled = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
} else {
err = "Bad directive or wrong number of arguments"; goto loaderr;
}
c->authenticated = 0;
c->replstate = REDIS_REPL_NONE;
c->reply = listCreate();
- c->blockingkey = NULL;
+ c->blockingkeys = NULL;
+ c->blockingkeysnum = 0;
listSetFreeMethod(c->reply,decrRefCount);
listSetDupMethod(c->reply,dupClientReplyValue);
if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
o = listNodeValue(head);
listDelNode(server.objfreelist,head);
} else {
- o = zmalloc(sizeof(*o));
+ if (server.vm_enabled) {
+ o = zmalloc(sizeof(*o));
+ } else {
+ o = zmalloc(sizeof(*o)-sizeof(struct redisObjectVM));
+ }
}
o->type = type;
o->encoding = REDIS_ENCODING_RAW;
}
}
-/*============================ DB saving/loading ============================ */
+/*============================ RDB saving/loading =========================== */
static int rdbSaveType(FILE *fp, unsigned char type) {
if (fwrite(&type,1,1,fp) == 0) return -1;
return 0;
}
+/* Save a Redis object. */
+static int rdbSaveObject(FILE *fp, robj *o) {
+ if (o->type == REDIS_STRING) {
+ /* Save a string value */
+ if (rdbSaveStringObject(fp,o) == -1) return -1;
+ } else if (o->type == REDIS_LIST) {
+ /* Save a list value */
+ list *list = o->ptr;
+ listNode *ln;
+
+ listRewind(list);
+ if (rdbSaveLen(fp,listLength(list)) == -1) return -1;
+ while((ln = listYield(list))) {
+ robj *eleobj = listNodeValue(ln);
+
+ if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
+ }
+ } else if (o->type == REDIS_SET) {
+ /* Save a set value */
+ dict *set = o->ptr;
+ dictIterator *di = dictGetIterator(set);
+ dictEntry *de;
+
+ if (rdbSaveLen(fp,dictSize(set)) == -1) return -1;
+ while((de = dictNext(di)) != NULL) {
+ robj *eleobj = dictGetEntryKey(de);
+
+ if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
+ }
+ dictReleaseIterator(di);
+ } else if (o->type == REDIS_ZSET) {
+ /* Save a set value */
+ zset *zs = o->ptr;
+ dictIterator *di = dictGetIterator(zs->dict);
+ dictEntry *de;
+
+ if (rdbSaveLen(fp,dictSize(zs->dict)) == -1) return -1;
+ while((de = dictNext(di)) != NULL) {
+ robj *eleobj = dictGetEntryKey(de);
+ double *score = dictGetEntryVal(de);
+
+ if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
+ if (rdbSaveDoubleValue(fp,*score) == -1) return -1;
+ }
+ dictReleaseIterator(di);
+ } else {
+ redisAssert(0 != 0);
+ }
+ return 0;
+}
+
+/* Return the length the object will have on disk if saved with
+ * the rdbSaveObject() function. Currently we use a trick to get
+ * this length with very little changes to the code. In the future
+ * we could switch to a faster solution. */
+static off_t rdbSavedObjectLen(robj *o) {
+ static FILE *fp = NULL;
+
+ if (fp == NULL) fp = fopen("/dev/null","w");
+ assert(fp != NULL);
+
+ rewind(fp);
+ assert(rdbSaveObject(fp,o) != 1);
+ return ftello(fp);
+}
+
/* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
static int rdbSave(char *filename) {
dictIterator *di = NULL;
/* Save the key and associated value */
if (rdbSaveType(fp,o->type) == -1) goto werr;
if (rdbSaveStringObject(fp,key) == -1) goto werr;
- if (o->type == REDIS_STRING) {
- /* Save a string value */
- if (rdbSaveStringObject(fp,o) == -1) goto werr;
- } else if (o->type == REDIS_LIST) {
- /* Save a list value */
- list *list = o->ptr;
- listNode *ln;
-
- listRewind(list);
- if (rdbSaveLen(fp,listLength(list)) == -1) goto werr;
- while((ln = listYield(list))) {
- robj *eleobj = listNodeValue(ln);
-
- if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
- }
- } else if (o->type == REDIS_SET) {
- /* Save a set value */
- dict *set = o->ptr;
- dictIterator *di = dictGetIterator(set);
- dictEntry *de;
-
- if (rdbSaveLen(fp,dictSize(set)) == -1) goto werr;
- while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetEntryKey(de);
-
- if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
- }
- dictReleaseIterator(di);
- } else if (o->type == REDIS_ZSET) {
- /* Save a set value */
- zset *zs = o->ptr;
- dictIterator *di = dictGetIterator(zs->dict);
- dictEntry *de;
-
- if (rdbSaveLen(fp,dictSize(zs->dict)) == -1) goto werr;
- while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetEntryKey(de);
- double *score = dictGetEntryVal(de);
-
- if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
- if (rdbSaveDoubleValue(fp,*score) == -1) goto werr;
- }
- dictReleaseIterator(di);
- } else {
- redisAssert(0 != 0);
- }
+ /* Save the actual value */
+ if (rdbSaveObject(fp,o) == -1) goto werr;
}
dictReleaseIterator(di);
}
lobj = lookupKeyWrite(c->db,c->argv[1]);
if (lobj == NULL) {
- if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) return;
+ if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
+ addReply(c,shared.ok);
+ return;
+ }
lobj = createListObject();
list = lobj->ptr;
if (where == REDIS_HEAD) {
addReply(c,shared.wrongtypeerr);
return;
}
- if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) return;
+ if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
+ addReply(c,shared.ok);
+ return;
+ }
list = lobj->ptr;
if (where == REDIS_HEAD) {
listAddNodeHead(list,c->argv[2]);
robj *ele = listNodeValue(ln);
list *dstlist;
- if (dobj == NULL) {
-
- /* Create the list if the key does not exist */
- dobj = createListObject();
- dictAdd(c->db->dict,c->argv[2],dobj);
- incrRefCount(c->argv[2]);
- } else if (dobj->type != REDIS_LIST) {
+ if (dobj && dobj->type != REDIS_LIST) {
addReply(c,shared.wrongtypeerr);
return;
}
- /* Add the element to the target list */
- dstlist = dobj->ptr;
- listAddNodeHead(dstlist,ele);
- incrRefCount(ele);
+
+ /* Add the element to the target list (unless it's directly
+ * passed to some BLPOP-ing client */
+ if (!handleClientsWaitingListPush(c,c->argv[2],ele)) {
+ if (dobj == NULL) {
+ /* Create the list if the key does not exist */
+ dobj = createListObject();
+ dictAdd(c->db->dict,c->argv[2],dobj);
+ incrRefCount(c->argv[2]);
+ }
+ dstlist = dobj->ptr;
+ listAddNodeHead(dstlist,ele);
+ incrRefCount(ele);
+ }
/* Send the element to the client as reply as well */
addReplyBulkLen(c,ele);
"uptime_in_days:%ld\r\n"
"connected_clients:%d\r\n"
"connected_slaves:%d\r\n"
+ "blocked_clients:%d\r\n"
"used_memory:%zu\r\n"
"changes_since_last_save:%lld\r\n"
"bgsave_in_progress:%d\r\n"
uptime/(3600*24),
listLength(server.clients)-listLength(server.slaves),
listLength(server.slaves),
+ server.blockedclients,
server.usedmemory,
server.dirty,
server.bgsavechildpid != -1,
* empty we need to block. In order to do so we remove the notification for
* new data to read in the client socket (so that we'll not serve new
* requests if the blocking request is not served). Also we put the client
- * in a dictionary (server.blockingkeys) mapping keys to a list of clients
+ * in a dictionary (db->blockingkeys) mapping keys to a list of clients
* blocking for this keys.
* - If a PUSH operation against a key with blocked clients waiting is
* performed, we serve the first in the list: basically instead to push
/* Set a client in blocking mode for the specified key, with the specified
* timeout */
-static void blockForKey(redisClient *c, robj *key, time_t timeout) {
+static void blockForKeys(redisClient *c, robj **keys, int numkeys, time_t timeout) {
dictEntry *de;
list *l;
+ int j;
- printf("blockForKey %p %s\n", c, key->ptr);
- c->blockingkey = key;
- incrRefCount(key);
+ c->blockingkeys = zmalloc(sizeof(robj*)*numkeys);
+ c->blockingkeysnum = numkeys;
c->blockingto = timeout;
- de = dictFind(c->db->blockingkeys,key);
- if (de == NULL) {
- int retval;
+ for (j = 0; j < numkeys; j++) {
+ /* Add the key in the client structure, to map clients -> keys */
+ c->blockingkeys[j] = keys[j];
+ incrRefCount(keys[j]);
- l = listCreate();
- retval = dictAdd(c->db->blockingkeys,key,l);
- incrRefCount(key);
- assert(retval == DICT_OK);
- } else {
- l = dictGetEntryVal(de);
+ /* And in the other "side", to map keys -> clients */
+ de = dictFind(c->db->blockingkeys,keys[j]);
+ if (de == NULL) {
+ int retval;
+
+ /* For every key we take a list of clients blocked for it */
+ l = listCreate();
+ retval = dictAdd(c->db->blockingkeys,keys[j],l);
+ incrRefCount(keys[j]);
+ assert(retval == DICT_OK);
+ } else {
+ l = dictGetEntryVal(de);
+ }
+ listAddNodeTail(l,c);
}
- listAddNodeTail(l,c);
+ /* Mark the client as a blocked client */
c->flags |= REDIS_BLOCKED;
aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
+ server.blockedclients++;
}
/* Unblock a client that's waiting in a blocking operation such as BLPOP */
static void unblockClient(redisClient *c) {
dictEntry *de;
list *l;
+ int j;
- printf("unblockClient %p %s\n", c, c->blockingkey->ptr);
- /* Remove this client from the list of clients waiting for this key. */
- assert(c->blockingkey != NULL);
- de = dictFind(c->db->blockingkeys,c->blockingkey);
- assert(de != NULL);
- l = dictGetEntryVal(de);
- listDelNode(l,listSearchKey(l,c));
- /* If the list is empty we need to remove it to avoid wasting memory */
- if (listLength(l) == 0)
- dictDelete(c->db->blockingkeys,c->blockingkey);
- /* Finally set the right flags in the client structure */
- decrRefCount(c->blockingkey);
- c->blockingkey = NULL;
+ assert(c->blockingkeys != NULL);
+ /* The client may wait for multiple keys, so unblock it for every key. */
+ for (j = 0; j < c->blockingkeysnum; j++) {
+ /* Remove this client from the list of clients waiting for this key. */
+ de = dictFind(c->db->blockingkeys,c->blockingkeys[j]);
+ assert(de != NULL);
+ l = dictGetEntryVal(de);
+ listDelNode(l,listSearchKey(l,c));
+ /* If the list is empty we need to remove it to avoid wasting memory */
+ if (listLength(l) == 0)
+ dictDelete(c->db->blockingkeys,c->blockingkeys[j]);
+ decrRefCount(c->blockingkeys[j]);
+ }
+ /* Cleanup the client structure */
+ zfree(c->blockingkeys);
+ c->blockingkeys = NULL;
c->flags &= (~REDIS_BLOCKED);
+ server.blockedclients--;
/* Ok now we are ready to get read events from socket, note that we
* can't trap errors here as it's possible that unblockClients() is
* called from freeClient() itself, and the only thing we can do
ln = listFirst(l);
assert(ln != NULL);
receiver = ln->value;
- listDelNode(l,ln);
- if (listLength(l) == 0)
- dictDelete(c->db->blockingkeys,key);
+ addReplySds(receiver,sdsnew("*2\r\n"));
+ addReplyBulkLen(receiver,key);
+ addReply(receiver,key);
+ addReply(receiver,shared.crlf);
addReplyBulkLen(receiver,ele);
addReply(receiver,ele);
addReply(receiver,shared.crlf);
static void blockingPopGenericCommand(redisClient *c, int where) {
robj *o;
time_t timeout;
+ int j;
- o = lookupKeyWrite(c->db,c->argv[1]);
- if (o != NULL) {
- if (o->type != REDIS_LIST) {
- popGenericCommand(c,where);
- return;
- } else {
- list *list = o->ptr;
- if (listLength(list) != 0) {
- /* If the list contains elements fall back to the usual
- * non-blocking POP operation */
- popGenericCommand(c,where);
+ for (j = 1; j < c->argc-1; j++) {
+ o = lookupKeyWrite(c->db,c->argv[j]);
+ if (o != NULL) {
+ if (o->type != REDIS_LIST) {
+ addReply(c,shared.wrongtypeerr);
return;
+ } else {
+ list *list = o->ptr;
+ if (listLength(list) != 0) {
+ /* If the list contains elements fall back to the usual
+ * non-blocking POP operation */
+ robj *argv[2], **orig_argv;
+ int orig_argc;
+
+ /* We need to alter the command arguments before to call
+ * popGenericCommand() as the command takes a single key. */
+ orig_argv = c->argv;
+ orig_argc = c->argc;
+ argv[1] = c->argv[j];
+ c->argv = argv;
+ c->argc = 2;
+
+ /* Also the return value is different, we need to output
+ * the multi bulk reply header and the key name. The
+ * "real" command will add the last element (the value)
+ * for us. If this souds like an hack to you it's just
+ * because it is... */
+ addReplySds(c,sdsnew("*2\r\n"));
+ addReplyBulkLen(c,argv[1]);
+ addReply(c,argv[1]);
+ addReply(c,shared.crlf);
+ popGenericCommand(c,where);
+
+ /* Fix the client structure with the original stuff */
+ c->argv = orig_argv;
+ c->argc = orig_argc;
+ return;
+ }
}
}
}
/* If the list is empty or the key does not exists we must block */
- timeout = strtol(c->argv[2]->ptr,NULL,10);
+ timeout = strtol(c->argv[c->argc-1]->ptr,NULL,10);
if (timeout > 0) timeout += time(NULL);
- blockForKey(c,c->argv[1],timeout);
+ blockForKeys(c,c->argv+1,c->argc-2,timeout);
}
static void blpopCommand(redisClient *c) {
unlink(tmpfile);
}
+/* =============================== Virtual Memory =========================== */
+static void vmInit(void) {
+ off_t totsize;
+
+ server.vm_fp = fopen("/tmp/redisvm","w+b");
+ if (server.vm_fp == NULL) {
+ redisLog(REDIS_WARNING,"Impossible to open the swap file. Exiting.");
+ exit(1);
+ }
+ server.vm_fd = fileno(server.vm_fp);
+ server.vm_next_page = 0;
+ server.vm_near_pages = 0;
+ totsize = server.vm_pages*server.vm_page_size;
+ redisLog(REDIS_NOTICE,"Allocating %lld bytes of swap file",totsize);
+ if (ftruncate(server.vm_fd,totsize) == -1) {
+ redisLog(REDIS_WARNING,"Can't ftruncate swap file: %s. Exiting.",
+ strerror(errno));
+ exit(1);
+ } else {
+ redisLog(REDIS_NOTICE,"Swap file allocated with success");
+ }
+ /* Try to remove the swap file, so the OS will really delete it from the
+ * file system when Redis exists. */
+ unlink("/tmp/redisvm");
+}
+
/* ================================= Debugging ============================== */
static void debugCommand(redisClient *c) {
key = dictGetEntryKey(de);
val = dictGetEntryVal(de);
addReplySds(c,sdscatprintf(sdsempty(),
- "+Key at:%p refcount:%d, value at:%p refcount:%d encoding:%d\r\n",
+ "+Key at:%p refcount:%d, value at:%p refcount:%d encoding:%d serializedlength:%lld\r\n",
(void*)key, key->refcount, (void*)val, val->refcount,
- val->encoding));
+ val->encoding, rdbSavedObjectLen(val)));
} else {
addReplySds(c,sdsnew(
"-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|RELOAD]\r\n"));
redisLog(REDIS_WARNING,"%d redis-server %p %s + %d", i, trace[i], fn, (unsigned int)offset);
}
}
- // free(messages); Don't call free() with possibly corrupted memory.
+ /* free(messages); Don't call free() with possibly corrupted memory. */
exit(0);
}