* POSSIBILITY OF SUCH DAMAGE.
*/
-#define REDIS_VERSION "1.050"
+#define REDIS_VERSION "1.06"
#include "fmacros.h"
#include "config.h"
#include <fcntl.h>
#include <sys/time.h>
#include <sys/resource.h>
+#include <sys/uio.h>
#include <limits.h>
#include <math.h>
#define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
#define REDIS_EXPIRELOOKUPS_PER_CRON 100 /* try to expire 100 keys/second */
#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
-#define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
+#define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
+
+/* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
+#define REDIS_WRITEV_THRESHOLD 3
+/* Max number of iovecs used for each writev call */
+#define REDIS_WRITEV_IOVEC_COUNT 256
/* Hash table parameters */
#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
static zskiplist *zslCreate(void);
static void zslFree(zskiplist *zsl);
static void zslInsert(zskiplist *zsl, double score, robj *obj);
+static void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask);
static void authCommand(redisClient *c);
static void pingCommand(redisClient *c);
static void msetCommand(redisClient *c);
static void msetnxCommand(redisClient *c);
static void zaddCommand(redisClient *c);
+static void zincrbyCommand(redisClient *c);
static void zrangeCommand(redisClient *c);
static void zrangebyscoreCommand(redisClient *c);
static void zrevrangeCommand(redisClient *c);
{"sdiffstore",sdiffstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
{"smembers",sinterCommand,2,REDIS_CMD_INLINE},
{"zadd",zaddCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
+ {"zincrby",zincrbyCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
{"zrem",zremCommand,3,REDIS_CMD_BULK},
{"zremrangebyscore",zremrangebyscoreCommand,4,REDIS_CMD_INLINE},
{"zrange",zrangeCommand,4,REDIS_CMD_INLINE},
/* Check if a background saving in progress terminated */
if (server.bgsaveinprogress) {
int statloc;
- if (wait4(-1,&statloc,WNOHANG,NULL)) {
+ if (wait3(&statloc,WNOHANG,NULL)) {
int exitcode = WEXITSTATUS(statloc);
int bysignal = WIFSIGNALED(statloc);
zfree(c);
}
+#define GLUEREPLY_UP_TO (1024)
static void glueReplyBuffersIfNeeded(redisClient *c) {
- int totlen = 0;
+ int copylen = 0;
+ char buf[GLUEREPLY_UP_TO];
listNode *ln;
robj *o;
listRewind(c->reply);
while((ln = listYield(c->reply))) {
+ int objlen;
+
o = ln->value;
- totlen += sdslen(o->ptr);
- /* This optimization makes more sense if we don't have to copy
- * too much data */
- if (totlen > 1024) return;
- }
- if (totlen > 0) {
- char buf[1024];
- int copylen = 0;
-
- listRewind(c->reply);
- while((ln = listYield(c->reply))) {
- o = ln->value;
- memcpy(buf+copylen,o->ptr,sdslen(o->ptr));
- copylen += sdslen(o->ptr);
+ objlen = sdslen(o->ptr);
+ if (copylen + objlen <= GLUEREPLY_UP_TO) {
+ memcpy(buf+copylen,o->ptr,objlen);
+ copylen += objlen;
listDelNode(c->reply,ln);
+ } else {
+ if (copylen == 0) return;
+ break;
}
- /* Now the output buffer is empty, add the new single element */
- o = createObject(REDIS_STRING,sdsnewlen(buf,totlen));
- listAddNodeTail(c->reply,o);
}
+ /* Now the output buffer is empty, add the new single element */
+ o = createObject(REDIS_STRING,sdsnewlen(buf,copylen));
+ listAddNodeHead(c->reply,o);
}
static void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
REDIS_NOTUSED(el);
REDIS_NOTUSED(mask);
- if (server.glueoutputbuf && listLength(c->reply) > 1)
- glueReplyBuffersIfNeeded(c);
+ /* Use writev() if we have enough buffers to send */
+ if (!server.glueoutputbuf &&
+ listLength(c->reply) > REDIS_WRITEV_THRESHOLD &&
+ !(c->flags & REDIS_MASTER))
+ {
+ sendReplyToClientWritev(el, fd, privdata, mask);
+ return;
+ }
+
while(listLength(c->reply)) {
+ if (server.glueoutputbuf && listLength(c->reply) > 1)
+ glueReplyBuffersIfNeeded(c);
+
o = listNodeValue(listFirst(c->reply));
objlen = sdslen(o->ptr);
}
}
+static void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask)
+{
+ redisClient *c = privdata;
+ int nwritten = 0, totwritten = 0, objlen, willwrite;
+ robj *o;
+ struct iovec iov[REDIS_WRITEV_IOVEC_COUNT];
+ int offset, ion = 0;
+ REDIS_NOTUSED(el);
+ REDIS_NOTUSED(mask);
+
+ listNode *node;
+ while (listLength(c->reply)) {
+ offset = c->sentlen;
+ ion = 0;
+ willwrite = 0;
+
+ /* fill-in the iov[] array */
+ for(node = listFirst(c->reply); node; node = listNextNode(node)) {
+ o = listNodeValue(node);
+ objlen = sdslen(o->ptr);
+
+ if (totwritten + objlen - offset > REDIS_MAX_WRITE_PER_EVENT)
+ break;
+
+ if(ion == REDIS_WRITEV_IOVEC_COUNT)
+ break; /* no more iovecs */
+
+ iov[ion].iov_base = ((char*)o->ptr) + offset;
+ iov[ion].iov_len = objlen - offset;
+ willwrite += objlen - offset;
+ offset = 0; /* just for the first item */
+ ion++;
+ }
+
+ if(willwrite == 0)
+ break;
+
+ /* write all collected blocks at once */
+ if((nwritten = writev(fd, iov, ion)) < 0) {
+ if (errno != EAGAIN) {
+ redisLog(REDIS_DEBUG,
+ "Error writing to client: %s", strerror(errno));
+ freeClient(c);
+ return;
+ }
+ break;
+ }
+
+ totwritten += nwritten;
+ offset = c->sentlen;
+
+ /* remove written robjs from c->reply */
+ while (nwritten && listLength(c->reply)) {
+ o = listNodeValue(listFirst(c->reply));
+ objlen = sdslen(o->ptr);
+
+ if(nwritten >= objlen - offset) {
+ listDelNode(c->reply, listFirst(c->reply));
+ nwritten -= objlen - offset;
+ c->sentlen = 0;
+ } else {
+ /* partial write */
+ c->sentlen += nwritten;
+ break;
+ }
+ offset = 0;
+ }
+ }
+
+ if (totwritten > 0)
+ c->lastinteraction = time(NULL);
+
+ if (listLength(c->reply) == 0) {
+ c->sentlen = 0;
+ aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
+ }
+}
+
static struct redisCommand *lookupCommand(char *name) {
int j = 0;
while(cmdTable[j].name != NULL) {
listSetFreeMethod(c->reply,decrRefCount);
listSetDupMethod(c->reply,dupClientReplyValue);
if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
- readQueryFromClient, c, NULL) == AE_ERR) {
+ readQueryFromClient, c) == AE_ERR) {
freeClient(c);
return NULL;
}
(c->replstate == REDIS_REPL_NONE ||
c->replstate == REDIS_REPL_ONLINE) &&
aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
- sendReplyToClient, c, NULL) == AE_ERR) return;
+ sendReplyToClient, c) == AE_ERR) return;
if (obj->encoding != REDIS_ENCODING_RAW) {
obj = getDecodedObject(obj);
} else {
decrRefCount(o);
}
+static void addReplyDouble(redisClient *c, double d) {
+ char buf[128];
+
+ snprintf(buf,sizeof(buf),"%.17g",d);
+ addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n%s\r\n",
+ strlen(buf),buf));
+}
+
static void addReplyBulkLen(redisClient *c, robj *obj) {
size_t len;
buf[0] = (val < 0) ? 255 : 254;
} else {
snprintf((char*)buf+1,sizeof(buf)-1,"%.17g",val);
- buf[0] = strlen((char*)buf);
+ buf[0] = strlen((char*)buf+1);
len = buf[0]+1;
}
if (fwrite(buf,len,1,fp) == 0) return -1;
}
}
+static void msetGenericCommand(redisClient *c, int nx) {
+ int j;
+
+ if ((c->argc % 2) == 0) {
+ addReplySds(c,sdsnew("-ERR wrong number of arguments\r\n"));
+ return;
+ }
+ /* Handle the NX flag. The MSETNX semantic is to return zero and don't
+ * set nothing at all if at least one already key exists. */
+ if (nx) {
+ for (j = 1; j < c->argc; j += 2) {
+ if (dictFind(c->db->dict,c->argv[j]) != NULL) {
+ addReply(c, shared.czero);
+ return;
+ }
+ }
+ }
+
+ for (j = 1; j < c->argc; j += 2) {
+ int retval;
+
+ retval = dictAdd(c->db->dict,c->argv[j],c->argv[j+1]);
+ if (retval == DICT_ERR) {
+ dictReplace(c->db->dict,c->argv[j],c->argv[j+1]);
+ incrRefCount(c->argv[j+1]);
+ } else {
+ incrRefCount(c->argv[j]);
+ incrRefCount(c->argv[j+1]);
+ }
+ removeExpire(c->db,c->argv[j]);
+ }
+ server.dirty += (c->argc-1)/2;
+ addReply(c, nx ? shared.cone : shared.ok);
+}
+
+static void msetCommand(redisClient *c) {
+ msetGenericCommand(c,0);
+}
+
+static void msetnxCommand(redisClient *c) {
+ msetGenericCommand(c,1);
+}
+
static void incrDecrCommand(redisClient *c, long long incr) {
long long value;
int retval;
/* The actual Z-commands implementations */
-static void zaddCommand(redisClient *c) {
+/* This generic command implements both ZADD and ZINCRBY.
+ * scoreval is the score if the operation is a ZADD (doincrement == 0) or
+ * the increment if the operation is a ZINCRBY (doincrement == 1). */
+static void zaddGenericCommand(redisClient *c, robj *key, robj *ele, double scoreval, int doincrement) {
robj *zsetobj;
zset *zs;
double *score;
- zsetobj = lookupKeyWrite(c->db,c->argv[1]);
+ zsetobj = lookupKeyWrite(c->db,key);
if (zsetobj == NULL) {
zsetobj = createZsetObject();
- dictAdd(c->db->dict,c->argv[1],zsetobj);
- incrRefCount(c->argv[1]);
+ dictAdd(c->db->dict,key,zsetobj);
+ incrRefCount(key);
} else {
if (zsetobj->type != REDIS_ZSET) {
addReply(c,shared.wrongtypeerr);
return;
}
}
- score = zmalloc(sizeof(double));
- *score = strtod(c->argv[2]->ptr,NULL);
zs = zsetobj->ptr;
- if (dictAdd(zs->dict,c->argv[3],score) == DICT_OK) {
+
+ /* Ok now since we implement both ZADD and ZINCRBY here the code
+ * needs to handle the two different conditions. It's all about setting
+ * '*score', that is, the new score to set, to the right value. */
+ score = zmalloc(sizeof(double));
+ if (doincrement) {
+ dictEntry *de;
+
+ /* Read the old score. If the element was not present starts from 0 */
+ de = dictFind(zs->dict,ele);
+ if (de) {
+ double *oldscore = dictGetEntryVal(de);
+ *score = *oldscore + scoreval;
+ } else {
+ *score = scoreval;
+ }
+ } else {
+ *score = scoreval;
+ }
+
+ /* What follows is a simple remove and re-insert operation that is common
+ * to both ZADD and ZINCRBY... */
+ if (dictAdd(zs->dict,ele,score) == DICT_OK) {
/* case 1: New element */
- incrRefCount(c->argv[3]); /* added to hash */
- zslInsert(zs->zsl,*score,c->argv[3]);
- incrRefCount(c->argv[3]); /* added to skiplist */
+ incrRefCount(ele); /* added to hash */
+ zslInsert(zs->zsl,*score,ele);
+ incrRefCount(ele); /* added to skiplist */
server.dirty++;
- addReply(c,shared.cone);
+ if (doincrement)
+ addReplyDouble(c,*score);
+ else
+ addReply(c,shared.cone);
} else {
dictEntry *de;
double *oldscore;
/* case 2: Score update operation */
- de = dictFind(zs->dict,c->argv[3]);
+ de = dictFind(zs->dict,ele);
assert(de != NULL);
oldscore = dictGetEntryVal(de);
if (*score != *oldscore) {
int deleted;
- deleted = zslDelete(zs->zsl,*oldscore,c->argv[3]);
+ /* Remove and insert the element in the skip list with new score */
+ deleted = zslDelete(zs->zsl,*oldscore,ele);
assert(deleted != 0);
- zslInsert(zs->zsl,*score,c->argv[3]);
- incrRefCount(c->argv[3]);
- dictReplace(zs->dict,c->argv[3],score);
+ zslInsert(zs->zsl,*score,ele);
+ incrRefCount(ele);
+ /* Update the score in the hash table */
+ dictReplace(zs->dict,ele,score);
server.dirty++;
} else {
zfree(score);
}
- addReply(c,shared.czero);
+ if (doincrement)
+ addReplyDouble(c,*score);
+ else
+ addReply(c,shared.czero);
}
}
+static void zaddCommand(redisClient *c) {
+ double scoreval;
+
+ scoreval = strtod(c->argv[2]->ptr,NULL);
+ zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,0);
+}
+
+static void zincrbyCommand(redisClient *c) {
+ double scoreval;
+
+ scoreval = strtod(c->argv[2]->ptr,NULL);
+ zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,1);
+}
+
static void zremCommand(redisClient *c) {
robj *zsetobj;
zset *zs;
if (!de) {
addReply(c,shared.nullbulk);
} else {
- char buf[128];
double *score = dictGetEntryVal(de);
- snprintf(buf,sizeof(buf),"%.17g",*score);
- addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n%s\r\n",
- strlen(buf),buf));
+ addReplyDouble(c,*score);
}
}
}
char buf[REDIS_SORTKEY_MAX+1];
} keyname;
+ /* If the pattern is "#" return the substitution object itself in order
+ * to implement the "SORT ... GET #" feature. */
+ spat = pattern->ptr;
+ if (spat[0] == '#' && spat[1] == '\0') {
+ return subst;
+ }
+
+ /* The substitution object may be specially encoded. If so we create
+ * a decoded object on the fly. */
if (subst->encoding == REDIS_ENCODING_RAW)
+ /* If we don't need to get a decoded object increment the refcount
+ * so that the final decrRefCount() call will restore the original
+ * count */
incrRefCount(subst);
else {
subst = getDecodedObject(subst);
}
- spat = pattern->ptr;
ssub = subst->ptr;
if (sdslen(spat)+sdslen(ssub)-1 > REDIS_SORTKEY_MAX) return NULL;
p = strchr(spat,'*');
- if (!p) return NULL;
+ if (!p) {
+ decrRefCount(subst);
+ return NULL;
+ }
prefixlen = p-spat;
sublen = sdslen(ssub);
addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl));
}
-static void msetGenericCommand(redisClient *c, int nx) {
- int j;
-
- if ((c->argc % 2) == 0) {
- addReplySds(c,sdsnew("-ERR wrong number of arguments\r\n"));
- return;
- }
- /* Handle the NX flag. The MSETNX semantic is to return zero and don't
- * set nothing at all if at least one already key exists. */
- if (nx) {
- for (j = 1; j < c->argc; j += 2) {
- if (dictFind(c->db->dict,c->argv[j]) != NULL) {
- addReply(c, shared.czero);
- return;
- }
- }
- }
-
- for (j = 1; j < c->argc; j += 2) {
- int retval;
-
- retval = dictAdd(c->db->dict,c->argv[j],c->argv[j+1]);
- if (retval == DICT_ERR) {
- dictReplace(c->db->dict,c->argv[j],c->argv[j+1]);
- incrRefCount(c->argv[j+1]);
- } else {
- incrRefCount(c->argv[j]);
- incrRefCount(c->argv[j+1]);
- }
- removeExpire(c->db,c->argv[j]);
- }
- server.dirty += (c->argc-1)/2;
- addReply(c, nx ? shared.cone : shared.ok);
-}
-
-static void msetCommand(redisClient *c) {
- msetGenericCommand(c,0);
-}
-
-static void msetnxCommand(redisClient *c) {
- msetGenericCommand(c,1);
-}
-
/* =============================== Replication ============================= */
static int syncWrite(int fd, char *ptr, ssize_t size, int timeout) {
aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
slave->replstate = REDIS_REPL_ONLINE;
if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
- sendReplyToClient, slave, NULL) == AE_ERR) {
+ sendReplyToClient, slave) == AE_ERR) {
freeClient(slave);
return;
}
slave->repldbsize = buf.st_size;
slave->replstate = REDIS_REPL_SEND_BULK;
aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
- if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave, NULL) == AE_ERR) {
+ if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
freeClient(slave);
continue;
}
redisLog(REDIS_NOTICE,"DB loaded from disk");
}
if (aeCreateFileEvent(server.el, server.fd, AE_READABLE,
- acceptHandler, NULL, NULL) == AE_ERR) oom("creating file event");
+ acceptHandler, NULL) == AE_ERR) oom("creating file event");
redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
aeMain(server.el);
aeDeleteEventLoop(server.el);