/* Anti-warning macro... */
#define REDIS_NOTUSED(V) ((void) V)
+#define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
+#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
/*================================= Data types ============================== */
robj *pattern;
} redisSortOperation;
+/* ZSETs use a specialized version of Skiplists */
+
+typedef struct zskiplistNode {
+ struct zskiplistNode **forward;
+ double score;
+ robj *obj;
+} zskiplistNode;
+
+typedef struct zskiplist {
+ struct zskiplistNode *header;
+ long length;
+ int level;
+} zskiplist;
+
typedef struct zset {
dict *dict;
- /* tree *tree; */
+ zskiplist *zsl;
} zset;
+/* Our shared "common" objects */
+
struct sharedObjectsStruct {
robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
*colon, *nullbulk, *nullmultibulk,
static void rdbRemoveTempFile(pid_t childpid);
static size_t stringObjectLen(robj *o);
static void processInputBuffer(redisClient *c);
+static zskiplist *zslCreate(void);
static void authCommand(redisClient *c);
static void pingCommand(redisClient *c);
* keys and radis objects as values (objects can hold SDS strings,
* lists, sets). */
-#if 0
static void dictVanillaFree(void *privdata, void *val)
{
DICT_NOTUSED(privdata);
zfree(val);
}
-#endif
static int sdsDictKeyCompare(void *privdata, const void *key1,
const void *key2)
NULL /* val destructor */
};
-#if 0
static dictType zsetDictType = {
dictEncObjHash, /* hash function */
NULL, /* key dup */
dictRedisObjectDestructor, /* key destructor */
dictVanillaFree /* val destructor */
};
-#endif
static dictType hashDictType = {
dictObjHash, /* hash function */
static void appendServerSaveParams(time_t seconds, int changes) {
server.saveparams = zrealloc(server.saveparams,sizeof(struct saveparam)*(server.saveparamslen+1));
- if (server.saveparams == NULL) oom("appendServerSaveParams");
server.saveparams[server.saveparamslen].seconds = seconds;
server.saveparams[server.saveparamslen].changes = changes;
server.saveparamslen++;
server.el = aeCreateEventLoop();
server.db = zmalloc(sizeof(redisDb)*server.dbnum);
server.sharingpool = dictCreate(&setDictType,NULL);
- if (!server.db || !server.clients || !server.slaves || !server.monitors || !server.el || !server.objfreelist)
- oom("server initialization"); /* Fatal OOM */
server.fd = anetTcpServer(server.neterr, server.port, server.bindaddr);
if (server.fd == -1) {
redisLog(REDIS_WARNING, "Opening TCP port: %s", server.neterr);
}
/* Now the output buffer is empty, add the new single element */
o = createObject(REDIS_STRING,sdsnewlen(buf,totlen));
- if (!listAddNodeTail(c->reply,o)) oom("listAddNodeTail");
+ listAddNodeTail(c->reply,o);
}
}
outv = static_outv;
} else {
outv = zmalloc(sizeof(robj*)*(argc*2+1));
- if (!outv) oom("replicationFeedSlaves");
}
for (j = 0; j < argc; j++) {
return;
}
argv = sdssplitlen(query,sdslen(query)," ",1,&argc);
- if (argv == NULL) oom("sdssplitlen");
sdsfree(query);
if (c->argv) zfree(c->argv);
c->argv = zmalloc(sizeof(robj*)*argc);
- if (c->argv == NULL) oom("allocating arguments list for client");
for (j = 0; j < argc; j++) {
if (sdslen(argv[j])) {
c->lastinteraction = time(NULL);
c->authenticated = 0;
c->replstate = REDIS_REPL_NONE;
- if ((c->reply = listCreate()) == NULL) oom("listCreate");
+ c->reply = listCreate();
listSetFreeMethod(c->reply,decrRefCount);
listSetDupMethod(c->reply,dupClientReplyValue);
if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
freeClient(c);
return NULL;
}
- if (!listAddNodeTail(server.clients,c)) oom("listAddNodeTail");
+ listAddNodeTail(server.clients,c);
return c;
}
} else {
incrRefCount(obj);
}
- if (!listAddNodeTail(c->reply,obj)) oom("listAddNodeTail");
+ listAddNodeTail(c->reply,obj);
}
static void addReplySds(redisClient *c, sds s) {
} else {
o = zmalloc(sizeof(*o));
}
- if (!o) oom("createObject");
o->type = type;
o->encoding = REDIS_ENCODING_RAW;
o->ptr = ptr;
static robj *createListObject(void) {
list *l = listCreate();
- if (!l) oom("listCreate");
listSetFreeMethod(l,decrRefCount);
return createObject(REDIS_LIST,l);
}
static robj *createSetObject(void) {
dict *d = dictCreate(&setDictType,NULL);
- if (!d) oom("dictCreate");
return createObject(REDIS_SET,d);
}
-#if 0
static robj *createZsetObject(void) {
- dict *d = dictCreate(&zsetDictType,NULL);
- if (!d) oom("dictCreate");
- return createObject(REDIS_ZSET,d);
+ zset *zs = zmalloc(sizeof(*zs));
+
+ zs->dict = dictCreate(&zsetDictType,NULL);
+ zs->zsl = zslCreate();
+ return createObject(REDIS_ZSET,zs);
}
-#endif
static void freeStringObject(robj *o) {
if (o->encoding == REDIS_ENCODING_RAW) {
dictIterator *di = dictGetIterator(set);
dictEntry *de;
- if (!set) oom("dictGetIteraotr");
if (rdbSaveLen(fp,dictSize(set)) == -1) goto werr;
while((de = dictNext(di)) != NULL) {
robj *eleobj = dictGetEntryKey(de);
if ((ele = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
tryObjectEncoding(ele);
if (type == REDIS_LIST) {
- if (!listAddNodeTail((list*)o->ptr,ele))
- oom("listAddNodeTail");
+ listAddNodeTail((list*)o->ptr,ele);
} else {
- if (dictAdd((dict*)o->ptr,ele,NULL) == DICT_ERR)
- oom("dictAdd");
+ dictAdd((dict*)o->ptr,ele,NULL);
}
}
} else {
robj *lenobj = createObject(REDIS_STRING,NULL);
di = dictGetIterator(c->db->dict);
- if (!di) oom("dictGetIterator");
addReply(c,lenobj);
decrRefCount(lenobj);
while((de = dictNext(di)) != NULL) {
lobj = createListObject();
list = lobj->ptr;
if (where == REDIS_HEAD) {
- if (!listAddNodeHead(list,c->argv[2])) oom("listAddNodeHead");
+ listAddNodeHead(list,c->argv[2]);
} else {
- if (!listAddNodeTail(list,c->argv[2])) oom("listAddNodeTail");
+ listAddNodeTail(list,c->argv[2]);
}
dictAdd(c->db->dict,c->argv[1],lobj);
incrRefCount(c->argv[1]);
}
list = lobj->ptr;
if (where == REDIS_HEAD) {
- if (!listAddNodeHead(list,c->argv[2])) oom("listAddNodeHead");
+ listAddNodeHead(list,c->argv[2]);
} else {
- if (!listAddNodeTail(list,c->argv[2])) oom("listAddNodeTail");
+ listAddNodeTail(list,c->argv[2]);
}
incrRefCount(c->argv[2]);
}
robj *lenobj = NULL, *dstset = NULL;
int j, cardinality = 0;
- if (!dv) oom("sinterGenericCommand");
for (j = 0; j < setsnum; j++) {
robj *setobj;
* the element against all the other sets, if at least one set does
* not include the element it is discarded */
di = dictGetIterator(dv[0]);
- if (!di) oom("dictGetIterator");
while((de = dictNext(di)) != NULL) {
robj *ele;
robj *dstset = NULL;
int j, cardinality = 0;
- if (!dv) oom("sunionDiffGenericCommand");
for (j = 0; j < setsnum; j++) {
robj *setobj;
if (!dv[j]) continue; /* non existing keys are like empty sets */
di = dictGetIterator(dv[j]);
- if (!di) oom("dictGetIterator");
while((de = dictNext(di)) != NULL) {
robj *ele;
if (!dstkey) {
addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",cardinality));
di = dictGetIterator(dstset->ptr);
- if (!di) oom("dictGetIterator");
while((de = dictNext(di)) != NULL) {
robj *ele;
sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_DIFF);
}
+/* ==================================== ZSets =============================== */
+
+/* ZSETs are ordered sets using two data structures to hold the same elements
+ * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
+ * data structure.
+ *
+ * The elements are added to an hash table mapping Redis objects to scores.
+ * At the same time the elements are added to a skip list mapping scores
+ * to Redis objects (so objects are sorted by scores in this "view"). */
+
+/* This skiplist implementation is almost a C translation of the original
+ * algorithm described by William Pugh in "Skip Lists: A Probabilistic
+ * Alternative to Balanced Trees", modified in three ways:
+ * a) this implementation allows for repeated values.
+ * b) the comparison is not just by key (our 'score') but by satellite data.
+ * c) there is a back pointer, so it's a doubly linked list with the back
+ * pointers being only at "level 1". This allows to traverse the list
+ * from tail to head, useful for ZREVRANGE. */
+
+static zskiplistNode *zslCreateNode(int level, double score, robj *obj) {
+ zskiplistNode *zn = zmalloc(sizeof(*zn));
+
+ zn->forward = zmalloc(sizeof(zskiplistNode*) * level);
+ zn->score = score;
+ zn->obj = obj;
+ return zn;
+}
+
+static zskiplist *zslCreate(void) {
+ int j;
+ zskiplist *zsl;
+
+ zsl = zmalloc(sizeof(*zsl));
+ zsl->level = 1;
+ zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
+ for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++)
+ zsl->header->forward[j] = NULL;
+ return zsl;
+}
+
+static int zslRandomLevel(void) {
+ int level = 1;
+ while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
+ level += 1;
+ return level;
+}
+
+static void zslInsert(zskiplist *zsl, double score, robj *obj) {
+ zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
+ int i, level;
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->forward[i]->score < score)
+ x = x->forward[i];
+ update[i] = x;
+ }
+ x = x->forward[1];
+ /* we assume the key is not already inside, since we allow duplicated
+ * scores, and the re-insertion of score and redis object should never
+ * happpen since the caller of zslInsert() should test in the hash table
+ * if the element is already inside or not. */
+ level = zslRandomLevel();
+ if (level > zsl->level) {
+ for (i = zsl->level; i < level; i++)
+ update[i] = zsl->header;
+ zsl->level = level;
+ }
+ x = zslCreateNode(level,score,obj);
+ for (i = 0; i < level; i++) {
+ x->forward[i] = update[i]->forward[i];
+ update[i]->forward[i] = x;
+ }
+}
+
+/* ========================= Non type-specific commands ==================== */
+
static void flushdbCommand(redisClient *c) {
server.dirty += dictSize(c->db->dict);
dictEmpty(c->db->dict);
static redisSortOperation *createSortOperation(int type, robj *pattern) {
redisSortOperation *so = zmalloc(sizeof(*so));
- if (!so) oom("createSortOperation");
so->type = type;
so->pattern = pattern;
return so;
listLength((list*)sortval->ptr) :
dictSize((dict*)sortval->ptr);
vector = zmalloc(sizeof(redisSortObject)*vectorlen);
- if (!vector) oom("allocating objects vector for SORT");
j = 0;
if (sortval->type == REDIS_LIST) {
list *list = sortval->ptr;
dictEntry *setele;
di = dictGetIterator(set);
- if (!di) oom("dictGetIterator");
while((setele = dictNext(di)) != NULL) {
vector[j].obj = dictGetEntryKey(setele);
vector[j].u.score = 0;
c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
c->slaveseldb = 0;
- if (!listAddNodeTail(server.monitors,c)) oom("listAddNodeTail");
+ listAddNodeTail(server.monitors,c);
addReply(c,shared.ok);
}
* another slave. Set the right state, and copy the buffer. */
listRelease(c->reply);
c->reply = listDup(slave->reply);
- if (!c->reply) oom("listDup copying slave reply list");
c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
} else {
c->repldbfd = -1;
c->flags |= REDIS_SLAVE;
c->slaveseldb = 0;
- if (!listAddNodeTail(server.slaves,c)) oom("listAddNodeTail");
+ listAddNodeTail(server.slaves,c);
return;
}