#include "zmalloc.h" /* total memory usage aware version of malloc/free */
#include "lzf.h" /* LZF compression library */
#include "pqsort.h" /* Partial qsort for SORT+LIMIT */
+#include "zipmap.h"
/* Error codes */
#define REDIS_OK 0
#define REDIS_ZSET 3
#define REDIS_HASH 4
-/* Objects encoding */
+/* Objects encoding. Some kind of objects like Strings and Hashes can be
+ * internally represented in multiple ways. The 'encoding' field of the object
+ * is set to one of this fields for this object. */
#define REDIS_ENCODING_RAW 0 /* Raw representation */
#define REDIS_ENCODING_INT 1 /* Encoded as integer */
+#define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
+#define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
/* Object types only used for dumping to disk */
#define REDIS_EXPIRETIME 253
#define APPENDFSYNC_ALWAYS 1
#define APPENDFSYNC_EVERYSEC 2
+/* Hashes related defaults */
+#define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
+#define REDIS_HASH_MAX_ZIPMAP_VALUE 512
+
/* We can print the stacktrace, so our assert is defined this way: */
#define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
static void _redisAssert(char *estr, char *file, int line);
off_t vm_page_size;
off_t vm_pages;
unsigned long long vm_max_memory;
+ /* Hashes config */
+ size_t hash_max_zipmap_entries;
+ size_t hash_max_zipmap_value;
/* Virtual memory state */
FILE *vm_fp;
int vm_fd;
static void appendCommand(redisClient *c);
static void substrCommand(redisClient *c);
static void zrankCommand(redisClient *c);
+static void hsetCommand(redisClient *c);
+static void hgetCommand(redisClient *c);
+static void zunionCommand(redisClient *c);
+static void zinterCommand(redisClient *c);
/*================================= Globals ================================= */
{"zincrby",zincrbyCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,1,1,1},
{"zrem",zremCommand,3,REDIS_CMD_BULK,1,1,1},
{"zremrangebyscore",zremrangebyscoreCommand,4,REDIS_CMD_INLINE,1,1,1},
+ {"zunion",zunionCommand,-4,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,0,0,0},
+ {"zinter",zinterCommand,-4,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,0,0,0},
{"zrange",zrangeCommand,-4,REDIS_CMD_INLINE,1,1,1},
{"zrangebyscore",zrangebyscoreCommand,-4,REDIS_CMD_INLINE,1,1,1},
{"zcount",zcountCommand,4,REDIS_CMD_INLINE,1,1,1},
{"zcard",zcardCommand,2,REDIS_CMD_INLINE,1,1,1},
{"zscore",zscoreCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,1,1,1},
{"zrank",zrankCommand,3,REDIS_CMD_INLINE,1,1,1},
+ {"hset",hsetCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,1,1,1},
+ {"hget",hgetCommand,3,REDIS_CMD_BULK,1,1,1},
{"incrby",incrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,1,1,1},
{"decrby",decrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,1,1,1},
{"getset",getsetCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,1,1,1},
};
/* Db->dict */
-static dictType hashDictType = {
+static dictType dbDictType = {
dictObjHash, /* hash function */
NULL, /* key dup */
NULL, /* val dup */
NULL /* val destructor */
};
+/* Hash type hash table (note that small hashes are represented with zimpaps) */
+static dictType hashDictType = {
+ dictEncObjHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictEncObjKeyCompare, /* key compare */
+ dictRedisObjectDestructor, /* key destructor */
+ dictRedisObjectDestructor /* val destructor */
+};
+
/* Keylist hash table type has unencoded redis objects as keys and
* lists as values. It's used for blocking operations (BLPOP) and to
* map swapped keys to a list of clients waiting for this keys to be loaded. */
server.vm_max_memory = 1024LL*1024*1024*1; /* 1 GB of RAM */
server.vm_max_threads = 4;
server.vm_blocked_clients = 0;
+ server.hash_max_zipmap_entries = REDIS_HASH_MAX_ZIPMAP_ENTRIES;
+ server.hash_max_zipmap_value = REDIS_HASH_MAX_ZIPMAP_VALUE;
resetServerSaveParams();
exit(1);
}
for (j = 0; j < server.dbnum; j++) {
- server.db[j].dict = dictCreate(&hashDictType,NULL);
+ server.db[j].dict = dictCreate(&dbDictType,NULL);
server.db[j].expires = dictCreate(&keyptrDictType,NULL);
server.db[j].blockingkeys = dictCreate(&keylistDictType,NULL);
if (server.vm_enabled)
server.vm_pages = strtoll(argv[1], NULL, 10);
} else if (!strcasecmp(argv[0],"vm-max-threads") && argc == 2) {
server.vm_max_threads = strtoll(argv[1], NULL, 10);
+ } else if (!strcasecmp(argv[0],"hash-max-zipmap-entries") && argc == 2){
+ server.hash_max_zipmap_entries = strtol(argv[1], NULL, 10);
+ } else if (!strcasecmp(argv[0],"hash-max-zipmap-value") && argc == 2){
+ server.hash_max_zipmap_value = strtol(argv[1], NULL, 10);
+ } else if (!strcasecmp(argv[0],"vm-max-threads") && argc == 2) {
+ server.vm_max_threads = strtoll(argv[1], NULL, 10);
} else {
err = "Bad directive or wrong number of arguments"; goto loaderr;
}
return createObject(REDIS_SET,d);
}
+static robj *createHashObject(void) {
+ /* All the Hashes start as zipmaps. Will be automatically converted
+ * into hash tables if there are enough elements or big elements
+ * inside. */
+ unsigned char *zm = zipmapNew();
+ robj *o = createObject(REDIS_HASH,zm);
+ o->encoding = REDIS_ENCODING_ZIPMAP;
+ return o;
+}
+
static robj *createZsetObject(void) {
zset *zs = zmalloc(sizeof(*zs));
}
static void freeHashObject(robj *o) {
- dictRelease((dict*) o->ptr);
+ switch (o->encoding) {
+ case REDIS_ENCODING_HT:
+ dictRelease((dict*) o->ptr);
+ break;
+ case REDIS_ENCODING_ZIPMAP:
+ zfree(o->ptr);
+ break;
+ default:
+ redisAssert(0);
+ break;
+ }
}
static void incrRefCount(robj *o) {
* to overwrite the old. So we delete the old key in the database.
* This will also make sure that swap pages about the old object
* will be marked as free. */
- if (deleteIfSwapped(c->db,c->argv[1]))
+ if (server.vm_enabled && deleteIfSwapped(c->db,c->argv[1]))
incrRefCount(c->argv[1]);
dictReplace(c->db->dict,c->argv[1],c->argv[2]);
incrRefCount(c->argv[2]);
lobj = lookupKeyWrite(c->db,c->argv[1]);
if (lobj == NULL) {
if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
- addReply(c,shared.ok);
+ addReply(c,shared.cone);
return;
}
lobj = createListObject();
return;
}
if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
- addReply(c,shared.ok);
+ addReply(c,shared.cone);
return;
}
list = lobj->ptr;
incrRefCount(c->argv[2]);
}
server.dirty++;
- addReply(c,shared.ok);
+ addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",listLength(list)));
}
static void lpushCommand(redisClient *c) {
#define REDIS_OP_UNION 0
#define REDIS_OP_DIFF 1
+#define REDIS_OP_INTER 2
static void sunionDiffGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey, int op) {
dict **dv = zmalloc(sizeof(dict*)*setsnum);
zskiplistNode *zn = zmalloc(sizeof(*zn));
zn->forward = zmalloc(sizeof(zskiplistNode*) * level);
- zn->span = zmalloc(sizeof(unsigned int) * level);
+ if (level > 0)
+ zn->span = zmalloc(sizeof(unsigned int) * (level - 1));
zn->score = score;
zn->obj = obj;
return zn;
zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) {
zsl->header->forward[j] = NULL;
- zsl->header->span[j] = 0;
+
+ /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
+ if (j < ZSKIPLIST_MAXLEVEL-1)
+ zsl->header->span[j] = 0;
}
zsl->header->backward = NULL;
zsl->tail = NULL;
static void zslInsert(zskiplist *zsl, double score, robj *obj) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
- unsigned int span[ZSKIPLIST_MAXLEVEL];
+ unsigned int rank[ZSKIPLIST_MAXLEVEL];
int i, level;
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
- /* store span that is crossed to reach the insert position */
- span[i] = i == (zsl->level-1) ? 0 : span[i+1];
+ /* store rank that is crossed to reach the insert position */
+ rank[i] = i == (zsl->level-1) ? 0 : rank[i+1];
while (x->forward[i] &&
(x->forward[i]->score < score ||
(x->forward[i]->score == score &&
compareStringObjects(x->forward[i]->obj,obj) < 0))) {
- span[i] += x->span[i];
+ rank[i] += i > 0 ? x->span[i-1] : 1;
x = x->forward[i];
}
update[i] = x;
level = zslRandomLevel();
if (level > zsl->level) {
for (i = zsl->level; i < level; i++) {
- span[i] = 0;
+ rank[i] = 0;
update[i] = zsl->header;
- update[i]->span[i] = zsl->length;
+ update[i]->span[i-1] = zsl->length;
}
zsl->level = level;
}
update[i]->forward[i] = x;
/* update span covered by update[i] as x is inserted here */
- x->span[i] = update[i]->span[i] - (span[0] - span[i]);
- update[i]->span[i] = (span[0] - span[i]) + 1;
+ if (i > 0) {
+ x->span[i-1] = update[i]->span[i-1] - (rank[0] - rank[i]);
+ update[i]->span[i-1] = (rank[0] - rank[i]) + 1;
+ }
}
/* increment span for untouched levels */
for (i = level; i < zsl->level; i++) {
- update[i]->span[i]++;
+ update[i]->span[i-1]++;
}
x->backward = (update[0] == zsl->header) ? NULL : update[0];
if (x && score == x->score && compareStringObjects(x->obj,obj) == 0) {
for (i = 0; i < zsl->level; i++) {
if (update[i]->forward[i] == x) {
- update[i]->span[i] += x->span[i] - 1;
+ if (i > 0) {
+ update[i]->span[i-1] += x->span[i-1] - 1;
+ }
update[i]->forward[i] = x->forward[i];
} else {
- update[i]->span[i] -= 1;
+ /* invariant: i > 0, because update[0]->forward[0]
+ * is always equal to x */
+ update[i]->span[i-1] -= 1;
}
}
if (x->forward[0]) {
for (i = 0; i < zsl->level; i++) {
if (update[i]->forward[i] == x) {
- update[i]->span[i] += x->span[i] - 1;
+ if (i > 0) {
+ update[i]->span[i-1] += x->span[i-1] - 1;
+ }
update[i]->forward[i] = x->forward[i];
} else {
- update[i]->span[i] -= 1;
+ /* invariant: i > 0, because update[0]->forward[0]
+ * is always equal to x */
+ update[i]->span[i-1] -= 1;
}
}
if (x->forward[0]) {
(x->forward[i]->score < score ||
(x->forward[i]->score == score &&
compareStringObjects(x->forward[i]->obj,o) <= 0))) {
- rank += x->span[i];
+ rank += i > 0 ? x->span[i-1] : 1;
x = x->forward[i];
}
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
- while (x->forward[i] && (traversed + x->span[i]) <= rank) {
- traversed += x->span[i];
+ while (x->forward[i] && (traversed + (i > 0 ? x->span[i-1] : 1)) <= rank) {
+ traversed += i > 0 ? x->span[i-1] : 1;
x = x->forward[i];
}
}
}
+static void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
+ int i, j, k, zsetnum;
+ dict **srcdicts;
+ double *weights;
+ robj *dstobj;
+ zset *dstzset;
+ dictIterator *di;
+ dictEntry *de;
+
+ /* expect zsetnum input keys to be given */
+ zsetnum = atoi(c->argv[2]->ptr);
+ if (zsetnum < 1) {
+ addReplySds(c,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
+ return;
+ }
+
+ /* test if the expected number of keys would overflow */
+ if (3+zsetnum > c->argc) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+
+ /* read keys to be used for input */
+ srcdicts = zmalloc(sizeof(dict*) * zsetnum);
+ weights = zmalloc(sizeof(double) * zsetnum);
+ for (i = 0, j = 3; i < zsetnum; i++, j++) {
+ robj *zsetobj = lookupKeyWrite(c->db,c->argv[j]);
+ if (!zsetobj) {
+ srcdicts[i] = NULL;
+ } else {
+ if (zsetobj->type != REDIS_ZSET) {
+ zfree(srcdicts);
+ zfree(weights);
+ addReply(c,shared.wrongtypeerr);
+ return;
+ }
+ srcdicts[i] = ((zset*)zsetobj->ptr)->dict;
+ }
+
+ /* default all weights to 1 */
+ weights[i] = 1.0;
+ }
+
+ /* parse optional extra arguments */
+ if (j < c->argc) {
+ int remaining = c->argc-j;
+
+ while (remaining) {
+ if (!strcasecmp(c->argv[j]->ptr,"weights")) {
+ j++; remaining--;
+ if (remaining < zsetnum) {
+ zfree(srcdicts);
+ zfree(weights);
+ addReplySds(c,sdsnew("-ERR not enough weights for ZUNION/ZINTER\r\n"));
+ return;
+ }
+ for (i = 0; i < zsetnum; i++, j++, remaining--) {
+ weights[i] = strtod(c->argv[j]->ptr, NULL);
+ }
+ } else {
+ zfree(srcdicts);
+ zfree(weights);
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ }
+ }
+
+ dstobj = createZsetObject();
+ dstzset = dstobj->ptr;
+
+ if (op == REDIS_OP_INTER) {
+ /* store index of smallest zset in variable j */
+ for (i = 0, j = 0; i < zsetnum; i++) {
+ if (!srcdicts[i] || dictSize(srcdicts[i]) == 0) {
+ break;
+ }
+ if (dictSize(srcdicts[i]) < dictSize(srcdicts[j])) {
+ j = i;
+ }
+ }
+ /* skip going over all entries if at least one dict was NULL or empty */
+ if (i == zsetnum) {
+ /* precondition: all srcdicts are non-NULL and non-empty */
+ di = dictGetIterator(srcdicts[j]);
+ while((de = dictNext(di)) != NULL) {
+ double *score = zmalloc(sizeof(double));
+ *score = 0.0;
+
+ for (k = 0; k < zsetnum; k++) {
+ dictEntry *other = (k == j) ? de : dictFind(srcdicts[k],dictGetEntryKey(de));
+ if (other) {
+ *score = *score + weights[k] * (*(double*)dictGetEntryVal(other));
+ } else {
+ break;
+ }
+ }
+
+ /* skip entry when not present in every source dict */
+ if (k != zsetnum) {
+ zfree(score);
+ } else {
+ robj *o = dictGetEntryKey(de);
+ dictAdd(dstzset->dict,o,score);
+ incrRefCount(o); /* added to dictionary */
+ zslInsert(dstzset->zsl,*score,o);
+ incrRefCount(o); /* added to skiplist */
+ }
+ }
+ dictReleaseIterator(di);
+ }
+ } else if (op == REDIS_OP_UNION) {
+ for (i = 0; i < zsetnum; i++) {
+ if (!srcdicts[i]) continue;
+
+ di = dictGetIterator(srcdicts[i]);
+ while((de = dictNext(di)) != NULL) {
+ /* skip key when already processed */
+ if (dictFind(dstzset->dict,dictGetEntryKey(de)) != NULL) continue;
+
+ double *score = zmalloc(sizeof(double));
+ *score = 0.0;
+ for (j = 0; j < zsetnum; j++) {
+ if (!srcdicts[j]) continue;
+
+ dictEntry *other = (i == j) ? de : dictFind(srcdicts[j],dictGetEntryKey(de));
+ if (other) {
+ *score = *score + weights[j] * (*(double*)dictGetEntryVal(other));
+ }
+ }
+
+ robj *o = dictGetEntryKey(de);
+ dictAdd(dstzset->dict,o,score);
+ incrRefCount(o); /* added to dictionary */
+ zslInsert(dstzset->zsl,*score,o);
+ incrRefCount(o); /* added to skiplist */
+ }
+ dictReleaseIterator(di);
+ }
+ } else {
+ /* unknown operator */
+ redisAssert(op == REDIS_OP_INTER || op == REDIS_OP_UNION);
+ }
+
+ deleteKey(c->db,dstkey);
+ dictAdd(c->db->dict,dstkey,dstobj);
+ incrRefCount(dstkey);
+
+ addReplyLong(c, dstzset->zsl->length);
+ server.dirty++;
+ zfree(srcdicts);
+ zfree(weights);
+}
+
+static void zunionCommand(redisClient *c) {
+ zunionInterGenericCommand(c,c->argv[1], REDIS_OP_UNION);
+}
+
+static void zinterCommand(redisClient *c) {
+ zunionInterGenericCommand(c,c->argv[1], REDIS_OP_INTER);
+}
+
static void zrangeGenericCommand(redisClient *c, int reverse) {
robj *o;
int start = atoi(c->argv[2]->ptr);
if (end >= llen) end = llen-1;
rangelen = (end-start)+1;
- /* Return the result in form of a multi-bulk reply */
+ /* check if starting point is trivial, before searching
+ * the element in log(N) time */
if (reverse) {
- ln = zslGetElementByRank(zsl, llen - start);
+ ln = start == 0 ? zsl->tail : zslGetElementByRank(zsl, llen - start);
} else {
- ln = zslGetElementByRank(zsl, start + 1);
+ ln = start == 0 ? zsl->header->forward[0] : zslGetElementByRank(zsl, start + 1);
}
+ /* Return the result in form of a multi-bulk reply */
addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",
withscores ? (rangelen*2) : rangelen));
for (j = 0; j < rangelen; j++) {
}
}
+/* =================================== Hashes =============================== */
+static void hsetCommand(redisClient *c) {
+ int update = 0;
+ robj *o = lookupKeyWrite(c->db,c->argv[1]);
+
+ if (o == NULL) {
+ o = createHashObject();
+ dictAdd(c->db->dict,c->argv[1],o);
+ incrRefCount(c->argv[1]);
+ } else {
+ if (o->type != REDIS_HASH) {
+ addReply(c,shared.wrongtypeerr);
+ return;
+ }
+ }
+ if (o->encoding == REDIS_ENCODING_ZIPMAP) {
+ unsigned char *zm = o->ptr;
+
+ zm = zipmapSet(zm,c->argv[2]->ptr,sdslen(c->argv[2]->ptr),
+ c->argv[3]->ptr,sdslen(c->argv[3]->ptr),&update);
+ o->ptr = zm;
+ } else {
+ if (dictAdd(o->ptr,c->argv[2],c->argv[3]) == DICT_OK) {
+ incrRefCount(c->argv[2]);
+ } else {
+ update = 1;
+ }
+ incrRefCount(c->argv[3]);
+ }
+ server.dirty++;
+ addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",update == 0));
+}
+
+static void hgetCommand(redisClient *c) {
+ robj *o = lookupKeyRead(c->db,c->argv[1]);
+
+ if (o == NULL) {
+ addReply(c,shared.nullbulk);
+ return;
+ } else {
+ if (o->encoding == REDIS_ENCODING_ZIPMAP) {
+ unsigned char *zm = o->ptr;
+ unsigned char *val;
+ unsigned int vlen;
+
+ if (zipmapGet(zm,c->argv[2]->ptr,sdslen(c->argv[2]->ptr), &val,&vlen)) {
+ addReplySds(c,sdscatprintf(sdsempty(),"$%u\r\n", vlen));
+ addReplySds(c,sdsnewlen(val,vlen));
+ addReply(c,shared.crlf);
+ return;
+ } else {
+ addReply(c,shared.nullbulk);
+ return;
+ }
+ } else {
+ struct dictEntry *de;
+
+ de = dictFind(o->ptr,c->argv[2]);
+ if (de == NULL) {
+ addReply(c,shared.nullbulk);
+ } else {
+ robj *e = dictGetEntryVal(de);
+
+ addReplyBulkLen(c,e);
+ addReply(c,e);
+ addReply(c,shared.crlf);
+ }
+ }
+ }
+}
+
/* ========================= Non type-specific commands ==================== */
static void flushdbCommand(redisClient *c) {
static int syncWithMaster(void) {
char buf[1024], tmpfile[256], authcmd[1024];
- int dumpsize;
+ long dumpsize;
int fd = anetTcpConnect(NULL,server.masterhost,server.masterport);
int dfd;
redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
return REDIS_ERR;
}
- dumpsize = atoi(buf+1);
- redisLog(REDIS_NOTICE,"Receiving %d bytes data dump from MASTER",dumpsize);
+ dumpsize = strtol(buf+1,NULL,10);
+ redisLog(REDIS_NOTICE,"Receiving %ld bytes data dump from MASTER",dumpsize);
/* Read the bulk write data on a temp file */
snprintf(tmpfile,256,"temp-%d.%ld.rdb",(int)time(NULL),(long int)random());
dfd = open(tmpfile,O_CREAT|O_WRONLY,0644);