+static void zrangeGenericCommand(redisClient *c, int reverse) {
+ robj *o;
+ int start = atoi(c->argv[2]->ptr);
+ int end = atoi(c->argv[3]->ptr);
+
+ o = lookupKeyRead(c->db,c->argv[1]);
+ if (o == NULL) {
+ addReply(c,shared.nullmultibulk);
+ } else {
+ if (o->type != REDIS_ZSET) {
+ addReply(c,shared.wrongtypeerr);
+ } else {
+ zset *zsetobj = o->ptr;
+ zskiplist *zsl = zsetobj->zsl;
+ zskiplistNode *ln;
+
+ int llen = zsl->length;
+ int rangelen, j;
+ robj *ele;
+
+ /* convert negative indexes */
+ if (start < 0) start = llen+start;
+ if (end < 0) end = llen+end;
+ if (start < 0) start = 0;
+ if (end < 0) end = 0;
+
+ /* indexes sanity checks */
+ if (start > end || start >= llen) {
+ /* Out of range start or start > end result in empty list */
+ addReply(c,shared.emptymultibulk);
+ return;
+ }
+ if (end >= llen) end = llen-1;
+ rangelen = (end-start)+1;
+
+ /* Return the result in form of a multi-bulk reply */
+ if (reverse) {
+ ln = zsl->tail;
+ while (start--)
+ ln = ln->backward;
+ } else {
+ ln = zsl->header->forward[0];
+ while (start--)
+ ln = ln->forward[0];
+ }
+
+ addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
+ for (j = 0; j < rangelen; j++) {
+ ele = ln->obj;
+ addReplyBulkLen(c,ele);
+ addReply(c,ele);
+ addReply(c,shared.crlf);
+ ln = reverse ? ln->backward : ln->forward[0];
+ }
+ }
+ }
+}
+
+static void zrangeCommand(redisClient *c) {
+ zrangeGenericCommand(c,0);
+}
+
+static void zrevrangeCommand(redisClient *c) {
+ zrangeGenericCommand(c,1);
+}
+
+static void zrangebyscoreCommand(redisClient *c) {
+ robj *o;
+ double min = strtod(c->argv[2]->ptr,NULL);
+ double max = strtod(c->argv[3]->ptr,NULL);
+
+ o = lookupKeyRead(c->db,c->argv[1]);
+ if (o == NULL) {
+ addReply(c,shared.nullmultibulk);
+ } else {
+ if (o->type != REDIS_ZSET) {
+ addReply(c,shared.wrongtypeerr);
+ } else {
+ zset *zsetobj = o->ptr;
+ zskiplist *zsl = zsetobj->zsl;
+ zskiplistNode *ln;
+ robj *ele, *lenobj;
+ unsigned int rangelen = 0;
+
+ /* Get the first node with the score >= min */
+ ln = zslFirstWithScore(zsl,min);
+ if (ln == NULL) {
+ /* No element matching the speciifed interval */
+ addReply(c,shared.emptymultibulk);
+ return;
+ }
+
+ /* We don't know in advance how many matching elements there
+ * are in the list, so we push this object that will represent
+ * the multi-bulk length in the output buffer, and will "fix"
+ * it later */
+ lenobj = createObject(REDIS_STRING,NULL);
+ addReply(c,lenobj);
+
+ while(ln && ln->score <= max) {
+ ele = ln->obj;
+ addReplyBulkLen(c,ele);
+ addReply(c,ele);
+ addReply(c,shared.crlf);
+ ln = ln->forward[0];
+ rangelen++;
+ }
+ lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",rangelen);
+ }
+ }
+}
+
+static void zcardCommand(redisClient *c) {
+ robj *o;
+ zset *zs;
+
+ o = lookupKeyRead(c->db,c->argv[1]);
+ if (o == NULL) {
+ addReply(c,shared.czero);
+ return;
+ } else {
+ if (o->type != REDIS_ZSET) {
+ addReply(c,shared.wrongtypeerr);
+ } else {
+ zs = o->ptr;
+ addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",zs->zsl->length));
+ }
+ }
+}
+
+static void zscoreCommand(redisClient *c) {
+ robj *o;
+ zset *zs;
+
+ o = lookupKeyRead(c->db,c->argv[1]);
+ if (o == NULL) {
+ addReply(c,shared.nullbulk);
+ return;
+ } else {
+ if (o->type != REDIS_ZSET) {
+ addReply(c,shared.wrongtypeerr);
+ } else {
+ dictEntry *de;
+
+ zs = o->ptr;
+ de = dictFind(zs->dict,c->argv[2]);
+ if (!de) {
+ addReply(c,shared.nullbulk);
+ } else {
+ double *score = dictGetEntryVal(de);
+
+ addReplyDouble(c,*score);
+ }
+ }
+ }
+}
+
+/* ========================= Non type-specific commands ==================== */
+
+static void flushdbCommand(redisClient *c) {
+ server.dirty += dictSize(c->db->dict);
+ dictEmpty(c->db->dict);
+ dictEmpty(c->db->expires);
+ addReply(c,shared.ok);
+}
+
+static void flushallCommand(redisClient *c) {
+ server.dirty += emptyDb();
+ addReply(c,shared.ok);
+ rdbSave(server.dbfilename);
+ server.dirty++;
+}
+
+static redisSortOperation *createSortOperation(int type, robj *pattern) {
+ redisSortOperation *so = zmalloc(sizeof(*so));
+ so->type = type;
+ so->pattern = pattern;
+ return so;
+}
+
+/* Return the value associated to the key with a name obtained
+ * substituting the first occurence of '*' in 'pattern' with 'subst' */
+static robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
+ char *p;
+ sds spat, ssub;
+ robj keyobj;
+ int prefixlen, sublen, postfixlen;
+ /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
+ struct {
+ long len;
+ long free;
+ char buf[REDIS_SORTKEY_MAX+1];
+ } keyname;
+
+ /* If the pattern is "#" return the substitution object itself in order
+ * to implement the "SORT ... GET #" feature. */
+ spat = pattern->ptr;
+ if (spat[0] == '#' && spat[1] == '\0') {
+ return subst;
+ }
+
+ /* The substitution object may be specially encoded. If so we create
+ * a decoded object on the fly. Otherwise getDecodedObject will just
+ * increment the ref count, that we'll decrement later. */
+ subst = getDecodedObject(subst);
+
+ ssub = subst->ptr;
+ if (sdslen(spat)+sdslen(ssub)-1 > REDIS_SORTKEY_MAX) return NULL;
+ p = strchr(spat,'*');
+ if (!p) {
+ decrRefCount(subst);
+ return NULL;
+ }
+
+ prefixlen = p-spat;
+ sublen = sdslen(ssub);
+ postfixlen = sdslen(spat)-(prefixlen+1);
+ memcpy(keyname.buf,spat,prefixlen);
+ memcpy(keyname.buf+prefixlen,ssub,sublen);
+ memcpy(keyname.buf+prefixlen+sublen,p+1,postfixlen);
+ keyname.buf[prefixlen+sublen+postfixlen] = '\0';
+ keyname.len = prefixlen+sublen+postfixlen;
+
+ keyobj.refcount = 1;
+ keyobj.type = REDIS_STRING;
+ keyobj.ptr = ((char*)&keyname)+(sizeof(long)*2);
+
+ decrRefCount(subst);
+
+ /* printf("lookup '%s' => %p\n", keyname.buf,de); */
+ return lookupKeyRead(db,&keyobj);
+}
+
+/* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
+ * the additional parameter is not standard but a BSD-specific we have to
+ * pass sorting parameters via the global 'server' structure */
+static int sortCompare(const void *s1, const void *s2) {
+ const redisSortObject *so1 = s1, *so2 = s2;
+ int cmp;
+
+ if (!server.sort_alpha) {
+ /* Numeric sorting. Here it's trivial as we precomputed scores */
+ if (so1->u.score > so2->u.score) {
+ cmp = 1;
+ } else if (so1->u.score < so2->u.score) {
+ cmp = -1;
+ } else {
+ cmp = 0;
+ }
+ } else {
+ /* Alphanumeric sorting */
+ if (server.sort_bypattern) {
+ if (!so1->u.cmpobj || !so2->u.cmpobj) {
+ /* At least one compare object is NULL */
+ if (so1->u.cmpobj == so2->u.cmpobj)
+ cmp = 0;
+ else if (so1->u.cmpobj == NULL)
+ cmp = -1;
+ else
+ cmp = 1;
+ } else {
+ /* We have both the objects, use strcoll */
+ cmp = strcoll(so1->u.cmpobj->ptr,so2->u.cmpobj->ptr);
+ }
+ } else {
+ /* Compare elements directly */
+ robj *dec1, *dec2;
+
+ dec1 = getDecodedObject(so1->obj);
+ dec2 = getDecodedObject(so2->obj);
+ cmp = strcoll(dec1->ptr,dec2->ptr);
+ decrRefCount(dec1);
+ decrRefCount(dec2);
+ }
+ }
+ return server.sort_desc ? -cmp : cmp;
+}
+
+/* The SORT command is the most complex command in Redis. Warning: this code
+ * is optimized for speed and a bit less for readability */
+static void sortCommand(redisClient *c) {
+ list *operations;
+ int outputlen = 0;
+ int desc = 0, alpha = 0;
+ int limit_start = 0, limit_count = -1, start, end;
+ int j, dontsort = 0, vectorlen;
+ int getop = 0; /* GET operation counter */
+ robj *sortval, *sortby = NULL, *storekey = NULL;
+ redisSortObject *vector; /* Resulting vector to sort */
+
+ /* Lookup the key to sort. It must be of the right types */
+ sortval = lookupKeyRead(c->db,c->argv[1]);
+ if (sortval == NULL) {
+ addReply(c,shared.nokeyerr);
+ return;
+ }
+ if (sortval->type != REDIS_SET && sortval->type != REDIS_LIST) {
+ addReply(c,shared.wrongtypeerr);
+ return;
+ }
+
+ /* Create a list of operations to perform for every sorted element.
+ * Operations can be GET/DEL/INCR/DECR */
+ operations = listCreate();
+ listSetFreeMethod(operations,zfree);
+ j = 2;
+
+ /* Now we need to protect sortval incrementing its count, in the future
+ * SORT may have options able to overwrite/delete keys during the sorting
+ * and the sorted key itself may get destroied */
+ incrRefCount(sortval);
+
+ /* The SORT command has an SQL-alike syntax, parse it */
+ while(j < c->argc) {
+ int leftargs = c->argc-j-1;
+ if (!strcasecmp(c->argv[j]->ptr,"asc")) {
+ desc = 0;
+ } else if (!strcasecmp(c->argv[j]->ptr,"desc")) {
+ desc = 1;
+ } else if (!strcasecmp(c->argv[j]->ptr,"alpha")) {
+ alpha = 1;
+ } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
+ limit_start = atoi(c->argv[j+1]->ptr);
+ limit_count = atoi(c->argv[j+2]->ptr);
+ j+=2;
+ } else if (!strcasecmp(c->argv[j]->ptr,"store") && leftargs >= 1) {
+ storekey = c->argv[j+1];
+ j++;
+ } else if (!strcasecmp(c->argv[j]->ptr,"by") && leftargs >= 1) {
+ sortby = c->argv[j+1];
+ /* If the BY pattern does not contain '*', i.e. it is constant,
+ * we don't need to sort nor to lookup the weight keys. */
+ if (strchr(c->argv[j+1]->ptr,'*') == NULL) dontsort = 1;
+ j++;
+ } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
+ listAddNodeTail(operations,createSortOperation(
+ REDIS_SORT_GET,c->argv[j+1]));
+ getop++;
+ j++;
+ } else {
+ decrRefCount(sortval);
+ listRelease(operations);
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ j++;
+ }
+
+ /* Load the sorting vector with all the objects to sort */
+ vectorlen = (sortval->type == REDIS_LIST) ?
+ listLength((list*)sortval->ptr) :
+ dictSize((dict*)sortval->ptr);
+ vector = zmalloc(sizeof(redisSortObject)*vectorlen);
+ j = 0;
+ if (sortval->type == REDIS_LIST) {
+ list *list = sortval->ptr;
+ listNode *ln;
+
+ listRewind(list);
+ while((ln = listYield(list))) {
+ robj *ele = ln->value;
+ vector[j].obj = ele;
+ vector[j].u.score = 0;
+ vector[j].u.cmpobj = NULL;
+ j++;
+ }
+ } else {
+ dict *set = sortval->ptr;
+ dictIterator *di;
+ dictEntry *setele;
+
+ di = dictGetIterator(set);
+ while((setele = dictNext(di)) != NULL) {
+ vector[j].obj = dictGetEntryKey(setele);
+ vector[j].u.score = 0;
+ vector[j].u.cmpobj = NULL;
+ j++;
+ }
+ dictReleaseIterator(di);
+ }
+ assert(j == vectorlen);
+
+ /* Now it's time to load the right scores in the sorting vector */
+ if (dontsort == 0) {
+ for (j = 0; j < vectorlen; j++) {
+ if (sortby) {
+ robj *byval;
+
+ byval = lookupKeyByPattern(c->db,sortby,vector[j].obj);
+ if (!byval || byval->type != REDIS_STRING) continue;
+ if (alpha) {
+ vector[j].u.cmpobj = getDecodedObject(byval);
+ } else {
+ if (byval->encoding == REDIS_ENCODING_RAW) {
+ vector[j].u.score = strtod(byval->ptr,NULL);
+ } else {
+ /* Don't need to decode the object if it's
+ * integer-encoded (the only encoding supported) so
+ * far. We can just cast it */
+ if (byval->encoding == REDIS_ENCODING_INT) {
+ vector[j].u.score = (long)byval->ptr;
+ } else
+ assert(1 != 1);
+ }
+ }
+ } else {
+ if (!alpha) {
+ if (vector[j].obj->encoding == REDIS_ENCODING_RAW)
+ vector[j].u.score = strtod(vector[j].obj->ptr,NULL);
+ else {
+ if (vector[j].obj->encoding == REDIS_ENCODING_INT)
+ vector[j].u.score = (long) vector[j].obj->ptr;
+ else
+ assert(1 != 1);
+ }
+ }
+ }
+ }
+ }
+
+ /* We are ready to sort the vector... perform a bit of sanity check
+ * on the LIMIT option too. We'll use a partial version of quicksort. */
+ start = (limit_start < 0) ? 0 : limit_start;
+ end = (limit_count < 0) ? vectorlen-1 : start+limit_count-1;
+ if (start >= vectorlen) {
+ start = vectorlen-1;
+ end = vectorlen-2;
+ }
+ if (end >= vectorlen) end = vectorlen-1;
+
+ if (dontsort == 0) {
+ server.sort_desc = desc;
+ server.sort_alpha = alpha;
+ server.sort_bypattern = sortby ? 1 : 0;
+ if (sortby && (start != 0 || end != vectorlen-1))
+ pqsort(vector,vectorlen,sizeof(redisSortObject),sortCompare, start,end);
+ else
+ qsort(vector,vectorlen,sizeof(redisSortObject),sortCompare);
+ }
+
+ /* Send command output to the output buffer, performing the specified
+ * GET/DEL/INCR/DECR operations if any. */
+ outputlen = getop ? getop*(end-start+1) : end-start+1;
+ if (storekey == NULL) {
+ /* STORE option not specified, sent the sorting result to client */
+ addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",outputlen));
+ for (j = start; j <= end; j++) {
+ listNode *ln;
+ if (!getop) {
+ addReplyBulkLen(c,vector[j].obj);
+ addReply(c,vector[j].obj);
+ addReply(c,shared.crlf);
+ }
+ listRewind(operations);
+ while((ln = listYield(operations))) {
+ redisSortOperation *sop = ln->value;
+ robj *val = lookupKeyByPattern(c->db,sop->pattern,
+ vector[j].obj);
+
+ if (sop->type == REDIS_SORT_GET) {
+ if (!val || val->type != REDIS_STRING) {
+ addReply(c,shared.nullbulk);
+ } else {
+ addReplyBulkLen(c,val);
+ addReply(c,val);
+ addReply(c,shared.crlf);
+ }
+ } else {
+ assert(sop->type == REDIS_SORT_GET); /* always fails */
+ }
+ }
+ }
+ } else {
+ robj *listObject = createListObject();
+ list *listPtr = (list*) listObject->ptr;
+
+ /* STORE option specified, set the sorting result as a List object */
+ for (j = start; j <= end; j++) {
+ listNode *ln;
+ if (!getop) {
+ listAddNodeTail(listPtr,vector[j].obj);
+ incrRefCount(vector[j].obj);
+ }
+ listRewind(operations);
+ while((ln = listYield(operations))) {
+ redisSortOperation *sop = ln->value;
+ robj *val = lookupKeyByPattern(c->db,sop->pattern,
+ vector[j].obj);
+
+ if (sop->type == REDIS_SORT_GET) {
+ if (!val || val->type != REDIS_STRING) {
+ listAddNodeTail(listPtr,createStringObject("",0));
+ } else {
+ listAddNodeTail(listPtr,val);
+ incrRefCount(val);
+ }
+ } else {
+ assert(sop->type == REDIS_SORT_GET); /* always fails */
+ }
+ }
+ }
+ if (dictReplace(c->db->dict,storekey,listObject)) {
+ incrRefCount(storekey);
+ }
+ /* Note: we add 1 because the DB is dirty anyway since even if the
+ * SORT result is empty a new key is set and maybe the old content
+ * replaced. */
+ server.dirty += 1+outputlen;
+ addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",outputlen));
+ }
+
+ /* Cleanup */
+ decrRefCount(sortval);
+ listRelease(operations);
+ for (j = 0; j < vectorlen; j++) {
+ if (sortby && alpha && vector[j].u.cmpobj)
+ decrRefCount(vector[j].u.cmpobj);
+ }
+ zfree(vector);
+}
+
+static void infoCommand(redisClient *c) {
+ sds info;
+ time_t uptime = time(NULL)-server.stat_starttime;
+ int j;
+
+ info = sdscatprintf(sdsempty(),
+ "redis_version:%s\r\n"
+ "arch_bits:%s\r\n"
+ "uptime_in_seconds:%d\r\n"
+ "uptime_in_days:%d\r\n"
+ "connected_clients:%d\r\n"
+ "connected_slaves:%d\r\n"
+ "used_memory:%zu\r\n"
+ "changes_since_last_save:%lld\r\n"
+ "bgsave_in_progress:%d\r\n"
+ "last_save_time:%d\r\n"
+ "total_connections_received:%lld\r\n"
+ "total_commands_processed:%lld\r\n"
+ "role:%s\r\n"
+ ,REDIS_VERSION,
+ (sizeof(long) == 8) ? "64" : "32",
+ uptime,
+ uptime/(3600*24),
+ listLength(server.clients)-listLength(server.slaves),
+ listLength(server.slaves),
+ server.usedmemory,
+ server.dirty,
+ server.bgsavechildpid != -1,
+ server.lastsave,
+ server.stat_numconnections,
+ server.stat_numcommands,
+ server.masterhost == NULL ? "master" : "slave"
+ );
+ if (server.masterhost) {
+ info = sdscatprintf(info,
+ "master_host:%s\r\n"
+ "master_port:%d\r\n"
+ "master_link_status:%s\r\n"
+ "master_last_io_seconds_ago:%d\r\n"
+ ,server.masterhost,
+ server.masterport,
+ (server.replstate == REDIS_REPL_CONNECTED) ?
+ "up" : "down",
+ server.master ? ((int)(time(NULL)-server.master->lastinteraction)) : -1
+ );
+ }
+ for (j = 0; j < server.dbnum; j++) {
+ long long keys, vkeys;
+
+ keys = dictSize(server.db[j].dict);
+ vkeys = dictSize(server.db[j].expires);
+ if (keys || vkeys) {
+ info = sdscatprintf(info, "db%d:keys=%lld,expires=%lld\r\n",
+ j, keys, vkeys);
+ }
+ }
+ addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",sdslen(info)));
+ addReplySds(c,info);
+ addReply(c,shared.crlf);
+}
+
+static void monitorCommand(redisClient *c) {
+ /* ignore MONITOR if aleady slave or in monitor mode */
+ if (c->flags & REDIS_SLAVE) return;
+
+ c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
+ c->slaveseldb = 0;
+ listAddNodeTail(server.monitors,c);
+ addReply(c,shared.ok);
+}
+
+/* ================================= Expire ================================= */
+static int removeExpire(redisDb *db, robj *key) {
+ if (dictDelete(db->expires,key) == DICT_OK) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static int setExpire(redisDb *db, robj *key, time_t when) {
+ if (dictAdd(db->expires,key,(void*)when) == DICT_ERR) {
+ return 0;
+ } else {
+ incrRefCount(key);
+ return 1;
+ }
+}
+
+/* Return the expire time of the specified key, or -1 if no expire
+ * is associated with this key (i.e. the key is non volatile) */
+static time_t getExpire(redisDb *db, robj *key) {
+ dictEntry *de;
+
+ /* No expire? return ASAP */
+ if (dictSize(db->expires) == 0 ||
+ (de = dictFind(db->expires,key)) == NULL) return -1;
+
+ return (time_t) dictGetEntryVal(de);
+}
+
+static int expireIfNeeded(redisDb *db, robj *key) {
+ time_t when;
+ dictEntry *de;
+
+ /* No expire? return ASAP */
+ if (dictSize(db->expires) == 0 ||
+ (de = dictFind(db->expires,key)) == NULL) return 0;
+
+ /* Lookup the expire */
+ when = (time_t) dictGetEntryVal(de);
+ if (time(NULL) <= when) return 0;
+
+ /* Delete the key */
+ dictDelete(db->expires,key);
+ return dictDelete(db->dict,key) == DICT_OK;
+}
+
+static int deleteIfVolatile(redisDb *db, robj *key) {
+ dictEntry *de;
+
+ /* No expire? return ASAP */
+ if (dictSize(db->expires) == 0 ||
+ (de = dictFind(db->expires,key)) == NULL) return 0;
+
+ /* Delete the key */
+ server.dirty++;
+ dictDelete(db->expires,key);
+ return dictDelete(db->dict,key) == DICT_OK;
+}
+
+static void expireGenericCommand(redisClient *c, robj *key, time_t seconds) {
+ dictEntry *de;
+
+ de = dictFind(c->db->dict,key);
+ if (de == NULL) {
+ addReply(c,shared.czero);
+ return;
+ }
+ if (seconds < 0) {
+ if (deleteKey(c->db,key)) server.dirty++;
+ addReply(c, shared.cone);
+ return;
+ } else {
+ time_t when = time(NULL)+seconds;
+ if (setExpire(c->db,key,when)) {
+ addReply(c,shared.cone);
+ server.dirty++;
+ } else {
+ addReply(c,shared.czero);
+ }
+ return;
+ }
+}
+
+static void expireCommand(redisClient *c) {
+ expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10));
+}
+
+static void expireatCommand(redisClient *c) {
+ expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10)-time(NULL));
+}
+
+static void ttlCommand(redisClient *c) {
+ time_t expire;
+ int ttl = -1;
+
+ expire = getExpire(c->db,c->argv[1]);
+ if (expire != -1) {
+ ttl = (int) (expire-time(NULL));
+ if (ttl < 0) ttl = -1;
+ }
+ addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl));
+}
+
+/* =============================== Replication ============================= */
+
+static int syncWrite(int fd, char *ptr, ssize_t size, int timeout) {
+ ssize_t nwritten, ret = size;
+ time_t start = time(NULL);
+
+ timeout++;
+ while(size) {
+ if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) {
+ nwritten = write(fd,ptr,size);
+ if (nwritten == -1) return -1;
+ ptr += nwritten;
+ size -= nwritten;
+ }
+ if ((time(NULL)-start) > timeout) {
+ errno = ETIMEDOUT;
+ return -1;
+ }
+ }
+ return ret;
+}
+
+static int syncRead(int fd, char *ptr, ssize_t size, int timeout) {
+ ssize_t nread, totread = 0;
+ time_t start = time(NULL);
+
+ timeout++;
+ while(size) {
+ if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) {
+ nread = read(fd,ptr,size);
+ if (nread == -1) return -1;
+ ptr += nread;
+ size -= nread;
+ totread += nread;
+ }
+ if ((time(NULL)-start) > timeout) {
+ errno = ETIMEDOUT;
+ return -1;
+ }
+ }
+ return totread;
+}
+
+static int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) {
+ ssize_t nread = 0;
+
+ size--;
+ while(size) {
+ char c;
+
+ if (syncRead(fd,&c,1,timeout) == -1) return -1;
+ if (c == '\n') {
+ *ptr = '\0';
+ if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
+ return nread;
+ } else {
+ *ptr++ = c;
+ *ptr = '\0';
+ nread++;
+ }
+ }
+ return nread;
+}
+
+static void syncCommand(redisClient *c) {
+ /* ignore SYNC if aleady slave or in monitor mode */
+ if (c->flags & REDIS_SLAVE) return;
+
+ /* SYNC can't be issued when the server has pending data to send to
+ * the client about already issued commands. We need a fresh reply
+ * buffer registering the differences between the BGSAVE and the current
+ * dataset, so that we can copy to other slaves if needed. */
+ if (listLength(c->reply) != 0) {
+ addReplySds(c,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
+ return;
+ }
+
+ redisLog(REDIS_NOTICE,"Slave ask for synchronization");
+ /* Here we need to check if there is a background saving operation
+ * in progress, or if it is required to start one */
+ if (server.bgsavechildpid != -1) {
+ /* Ok a background save is in progress. Let's check if it is a good
+ * one for replication, i.e. if there is another slave that is
+ * registering differences since the server forked to save */
+ redisClient *slave;
+ listNode *ln;
+
+ listRewind(server.slaves);
+ while((ln = listYield(server.slaves))) {
+ slave = ln->value;
+ if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
+ }
+ if (ln) {
+ /* Perfect, the server is already registering differences for
+ * another slave. Set the right state, and copy the buffer. */
+ listRelease(c->reply);
+ c->reply = listDup(slave->reply);
+ c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
+ redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
+ } else {
+ /* No way, we need to wait for the next BGSAVE in order to
+ * register differences */
+ c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
+ redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
+ }
+ } else {
+ /* Ok we don't have a BGSAVE in progress, let's start one */
+ redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
+ if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
+ redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
+ addReplySds(c,sdsnew("-ERR Unalbe to perform background save\r\n"));
+ return;
+ }
+ c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
+ }
+ c->repldbfd = -1;
+ c->flags |= REDIS_SLAVE;
+ c->slaveseldb = 0;
+ listAddNodeTail(server.slaves,c);
+ return;
+}
+
+static void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
+ redisClient *slave = privdata;
+ REDIS_NOTUSED(el);
+ REDIS_NOTUSED(mask);
+ char buf[REDIS_IOBUF_LEN];
+ ssize_t nwritten, buflen;
+
+ if (slave->repldboff == 0) {
+ /* Write the bulk write count before to transfer the DB. In theory here
+ * we don't know how much room there is in the output buffer of the
+ * socket, but in pratice SO_SNDLOWAT (the minimum count for output
+ * operations) will never be smaller than the few bytes we need. */
+ sds bulkcount;
+
+ bulkcount = sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
+ slave->repldbsize);
+ if (write(fd,bulkcount,sdslen(bulkcount)) != (signed)sdslen(bulkcount))
+ {
+ sdsfree(bulkcount);
+ freeClient(slave);
+ return;
+ }
+ sdsfree(bulkcount);
+ }
+ lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
+ buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN);
+ if (buflen <= 0) {
+ redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
+ (buflen == 0) ? "premature EOF" : strerror(errno));
+ freeClient(slave);
+ return;
+ }
+ if ((nwritten = write(fd,buf,buflen)) == -1) {
+ redisLog(REDIS_DEBUG,"Write error sending DB to slave: %s",
+ strerror(errno));
+ freeClient(slave);
+ return;
+ }
+ slave->repldboff += nwritten;
+ if (slave->repldboff == slave->repldbsize) {
+ close(slave->repldbfd);
+ slave->repldbfd = -1;
+ aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
+ slave->replstate = REDIS_REPL_ONLINE;
+ if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
+ sendReplyToClient, slave) == AE_ERR) {
+ freeClient(slave);
+ return;
+ }
+ addReplySds(slave,sdsempty());
+ redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
+ }
+}
+
+/* This function is called at the end of every backgrond saving.
+ * The argument bgsaveerr is REDIS_OK if the background saving succeeded
+ * otherwise REDIS_ERR is passed to the function.
+ *
+ * The goal of this function is to handle slaves waiting for a successful
+ * background saving in order to perform non-blocking synchronization. */
+static void updateSlavesWaitingBgsave(int bgsaveerr) {
+ listNode *ln;
+ int startbgsave = 0;
+
+ listRewind(server.slaves);
+ while((ln = listYield(server.slaves))) {
+ redisClient *slave = ln->value;
+
+ if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
+ startbgsave = 1;
+ slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
+ } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
+ struct redis_stat buf;
+
+ if (bgsaveerr != REDIS_OK) {
+ freeClient(slave);
+ redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
+ continue;
+ }
+ if ((slave->repldbfd = open(server.dbfilename,O_RDONLY)) == -1 ||
+ redis_fstat(slave->repldbfd,&buf) == -1) {
+ freeClient(slave);
+ redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
+ continue;
+ }
+ slave->repldboff = 0;
+ slave->repldbsize = buf.st_size;
+ slave->replstate = REDIS_REPL_SEND_BULK;
+ aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
+ if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
+ freeClient(slave);
+ continue;
+ }
+ }
+ }
+ if (startbgsave) {
+ if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
+ listRewind(server.slaves);
+ redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
+ while((ln = listYield(server.slaves))) {
+ redisClient *slave = ln->value;
+
+ if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
+ freeClient(slave);
+ }
+ }
+ }
+}
+
+static int syncWithMaster(void) {
+ char buf[1024], tmpfile[256], authcmd[1024];
+ int dumpsize;
+ int fd = anetTcpConnect(NULL,server.masterhost,server.masterport);
+ int dfd;
+
+ if (fd == -1) {
+ redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
+ strerror(errno));
+ return REDIS_ERR;
+ }
+
+ /* AUTH with the master if required. */
+ if(server.masterauth) {
+ snprintf(authcmd, 1024, "AUTH %s\r\n", server.masterauth);
+ if (syncWrite(fd, authcmd, strlen(server.masterauth)+7, 5) == -1) {
+ close(fd);
+ redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",
+ strerror(errno));
+ return REDIS_ERR;
+ }
+ /* Read the AUTH result. */
+ if (syncReadLine(fd,buf,1024,3600) == -1) {
+ close(fd);
+ redisLog(REDIS_WARNING,"I/O error reading auth result from MASTER: %s",
+ strerror(errno));
+ return REDIS_ERR;
+ }
+ if (buf[0] != '+') {
+ close(fd);
+ redisLog(REDIS_WARNING,"Cannot AUTH to MASTER, is the masterauth password correct?");
+ return REDIS_ERR;
+ }
+ }
+
+ /* Issue the SYNC command */
+ if (syncWrite(fd,"SYNC \r\n",7,5) == -1) {
+ close(fd);
+ redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
+ strerror(errno));
+ return REDIS_ERR;
+ }
+ /* Read the bulk write count */
+ if (syncReadLine(fd,buf,1024,3600) == -1) {
+ close(fd);
+ redisLog(REDIS_WARNING,"I/O error reading bulk count from MASTER: %s",
+ strerror(errno));
+ return REDIS_ERR;
+ }
+ if (buf[0] != '$') {
+ close(fd);
+ redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
+ return REDIS_ERR;
+ }
+ dumpsize = atoi(buf+1);
+ redisLog(REDIS_NOTICE,"Receiving %d bytes data dump from MASTER",dumpsize);
+ /* Read the bulk write data on a temp file */
+ snprintf(tmpfile,256,"temp-%d.%ld.rdb",(int)time(NULL),(long int)random());
+ dfd = open(tmpfile,O_CREAT|O_WRONLY,0644);
+ if (dfd == -1) {
+ close(fd);
+ redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
+ return REDIS_ERR;
+ }
+ while(dumpsize) {
+ int nread, nwritten;
+
+ nread = read(fd,buf,(dumpsize < 1024)?dumpsize:1024);
+ if (nread == -1) {
+ redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s",
+ strerror(errno));
+ close(fd);
+ close(dfd);
+ return REDIS_ERR;
+ }
+ nwritten = write(dfd,buf,nread);
+ if (nwritten == -1) {
+ redisLog(REDIS_WARNING,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno));
+ close(fd);
+ close(dfd);
+ return REDIS_ERR;
+ }
+ dumpsize -= nread;
+ }
+ close(dfd);
+ if (rename(tmpfile,server.dbfilename) == -1) {
+ redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
+ unlink(tmpfile);
+ close(fd);
+ return REDIS_ERR;
+ }
+ emptyDb();
+ if (rdbLoad(server.dbfilename) != REDIS_OK) {
+ redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
+ close(fd);
+ return REDIS_ERR;
+ }
+ server.master = createClient(fd);
+ server.master->flags |= REDIS_MASTER;
+ server.replstate = REDIS_REPL_CONNECTED;
+ return REDIS_OK;
+}
+
+static void slaveofCommand(redisClient *c) {
+ if (!strcasecmp(c->argv[1]->ptr,"no") &&
+ !strcasecmp(c->argv[2]->ptr,"one")) {
+ if (server.masterhost) {
+ sdsfree(server.masterhost);
+ server.masterhost = NULL;
+ if (server.master) freeClient(server.master);
+ server.replstate = REDIS_REPL_NONE;
+ redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
+ }
+ } else {
+ sdsfree(server.masterhost);
+ server.masterhost = sdsdup(c->argv[1]->ptr);
+ server.masterport = atoi(c->argv[2]->ptr);
+ if (server.master) freeClient(server.master);
+ server.replstate = REDIS_REPL_CONNECT;
+ redisLog(REDIS_NOTICE,"SLAVE OF %s:%d enabled (user request)",
+ server.masterhost, server.masterport);
+ }
+ addReply(c,shared.ok);
+}
+
+/* ============================ Maxmemory directive ======================== */
+
+/* This function gets called when 'maxmemory' is set on the config file to limit
+ * the max memory used by the server, and we are out of memory.
+ * This function will try to, in order:
+ *
+ * - Free objects from the free list
+ * - Try to remove keys with an EXPIRE set
+ *
+ * It is not possible to free enough memory to reach used-memory < maxmemory
+ * the server will start refusing commands that will enlarge even more the
+ * memory usage.
+ */
+static void freeMemoryIfNeeded(void) {
+ while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) {
+ if (listLength(server.objfreelist)) {
+ robj *o;
+
+ listNode *head = listFirst(server.objfreelist);
+ o = listNodeValue(head);
+ listDelNode(server.objfreelist,head);
+ zfree(o);
+ } else {
+ int j, k, freed = 0;
+
+ for (j = 0; j < server.dbnum; j++) {
+ int minttl = -1;
+ robj *minkey = NULL;
+ struct dictEntry *de;
+
+ if (dictSize(server.db[j].expires)) {
+ freed = 1;
+ /* From a sample of three keys drop the one nearest to
+ * the natural expire */
+ for (k = 0; k < 3; k++) {
+ time_t t;
+
+ de = dictGetRandomKey(server.db[j].expires);
+ t = (time_t) dictGetEntryVal(de);
+ if (minttl == -1 || t < minttl) {
+ minkey = dictGetEntryKey(de);
+ minttl = t;
+ }
+ }
+ deleteKey(server.db+j,minkey);
+ }
+ }
+ if (!freed) return; /* nothing to free... */
+ }
+ }
+}
+
+/* ============================== Append Only file ========================== */
+
+static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
+ sds buf = sdsempty();
+ int j;
+ ssize_t nwritten;
+ time_t now;
+ robj *tmpargv[3];
+
+ /* The DB this command was targetting is not the same as the last command
+ * we appendend. To issue a SELECT command is needed. */
+ if (dictid != server.appendseldb) {
+ char seldb[64];
+
+ snprintf(seldb,sizeof(seldb),"%d",dictid);
+ buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
+ strlen(seldb),seldb);
+ server.appendseldb = dictid;
+ }
+
+ /* "Fix" the argv vector if the command is EXPIRE. We want to translate
+ * EXPIREs into EXPIREATs calls */
+ if (cmd->proc == expireCommand) {
+ long when;
+
+ tmpargv[0] = createStringObject("EXPIREAT",8);
+ tmpargv[1] = argv[1];
+ incrRefCount(argv[1]);
+ when = time(NULL)+strtol(argv[2]->ptr,NULL,10);
+ tmpargv[2] = createObject(REDIS_STRING,
+ sdscatprintf(sdsempty(),"%ld",when));
+ argv = tmpargv;
+ }
+
+ /* Append the actual command */
+ buf = sdscatprintf(buf,"*%d\r\n",argc);
+ for (j = 0; j < argc; j++) {
+ robj *o = argv[j];
+
+ o = getDecodedObject(o);
+ buf = sdscatprintf(buf,"$%d\r\n",sdslen(o->ptr));
+ buf = sdscatlen(buf,o->ptr,sdslen(o->ptr));
+ buf = sdscatlen(buf,"\r\n",2);
+ decrRefCount(o);
+ }
+
+ /* Free the objects from the modified argv for EXPIREAT */
+ if (cmd->proc == expireCommand) {
+ for (j = 0; j < 3; j++)
+ decrRefCount(argv[j]);
+ }
+
+ /* We want to perform a single write. This should be guaranteed atomic
+ * at least if the filesystem we are writing is a real physical one.
+ * While this will save us against the server being killed I don't think
+ * there is much to do about the whole server stopping for power problems
+ * or alike */
+ nwritten = write(server.appendfd,buf,sdslen(buf));
+ if (nwritten != (signed)sdslen(buf)) {
+ /* Ooops, we are in troubles. The best thing to do for now is
+ * to simply exit instead to give the illusion that everything is
+ * working as expected. */
+ if (nwritten == -1) {
+ redisLog(REDIS_WARNING,"Exiting on error writing to the append-only file: %s",strerror(errno));
+ } else {
+ redisLog(REDIS_WARNING,"Exiting on short write while writing to the append-only file: %s",strerror(errno));
+ }
+ exit(1);
+ }
+ /* If a background append only file rewriting is in progress we want to
+ * accumulate the differences between the child DB and the current one
+ * in a buffer, so that when the child process will do its work we
+ * can append the differences to the new append only file. */
+ if (server.bgrewritechildpid != -1)
+ server.bgrewritebuf = sdscatlen(server.bgrewritebuf,buf,sdslen(buf));
+
+ sdsfree(buf);
+ now = time(NULL);
+ if (server.appendfsync == APPENDFSYNC_ALWAYS ||
+ (server.appendfsync == APPENDFSYNC_EVERYSEC &&
+ now-server.lastfsync > 1))
+ {
+ fsync(server.appendfd); /* Let's try to get this data on the disk */
+ server.lastfsync = now;
+ }
+}
+
+/* In Redis commands are always executed in the context of a client, so in
+ * order to load the append only file we need to create a fake client. */
+static struct redisClient *createFakeClient(void) {
+ struct redisClient *c = zmalloc(sizeof(*c));
+
+ selectDb(c,0);
+ c->fd = -1;
+ c->querybuf = sdsempty();
+ c->argc = 0;
+ c->argv = NULL;
+ c->flags = 0;
+ /* We set the fake client as a slave waiting for the synchronization
+ * so that Redis will not try to send replies to this client. */
+ c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
+ c->reply = listCreate();
+ listSetFreeMethod(c->reply,decrRefCount);
+ listSetDupMethod(c->reply,dupClientReplyValue);
+ return c;
+}
+
+static void freeFakeClient(struct redisClient *c) {
+ sdsfree(c->querybuf);
+ listRelease(c->reply);
+ zfree(c);
+}
+
+/* Replay the append log file. On error REDIS_OK is returned. On non fatal
+ * error (the append only file is zero-length) REDIS_ERR is returned. On
+ * fatal error an error message is logged and the program exists. */
+int loadAppendOnlyFile(char *filename) {
+ struct redisClient *fakeClient;
+ FILE *fp = fopen(filename,"r");
+ struct redis_stat sb;
+
+ if (redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0)
+ return REDIS_ERR;
+
+ if (fp == NULL) {
+ redisLog(REDIS_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno));
+ exit(1);
+ }
+
+ fakeClient = createFakeClient();
+ while(1) {
+ int argc, j;
+ unsigned long len;
+ robj **argv;
+ char buf[128];
+ sds argsds;
+ struct redisCommand *cmd;
+
+ if (fgets(buf,sizeof(buf),fp) == NULL) {
+ if (feof(fp))
+ break;
+ else
+ goto readerr;
+ }
+ if (buf[0] != '*') goto fmterr;
+ argc = atoi(buf+1);
+ argv = zmalloc(sizeof(robj*)*argc);
+ for (j = 0; j < argc; j++) {
+ if (fgets(buf,sizeof(buf),fp) == NULL) goto readerr;
+ if (buf[0] != '$') goto fmterr;
+ len = strtol(buf+1,NULL,10);
+ argsds = sdsnewlen(NULL,len);
+ if (len && fread(argsds,len,1,fp) == 0) goto fmterr;
+ argv[j] = createObject(REDIS_STRING,argsds);
+ if (fread(buf,2,1,fp) == 0) goto fmterr; /* discard CRLF */
+ }
+
+ /* Command lookup */
+ cmd = lookupCommand(argv[0]->ptr);
+ if (!cmd) {
+ redisLog(REDIS_WARNING,"Unknown command '%s' reading the append only file", argv[0]->ptr);
+ exit(1);
+ }
+ /* Try object sharing and encoding */
+ if (server.shareobjects) {
+ int j;
+ for(j = 1; j < argc; j++)
+ argv[j] = tryObjectSharing(argv[j]);
+ }
+ if (cmd->flags & REDIS_CMD_BULK)
+ tryObjectEncoding(argv[argc-1]);
+ /* Run the command in the context of a fake client */
+ fakeClient->argc = argc;
+ fakeClient->argv = argv;
+ cmd->proc(fakeClient);
+ /* Discard the reply objects list from the fake client */
+ while(listLength(fakeClient->reply))
+ listDelNode(fakeClient->reply,listFirst(fakeClient->reply));
+ /* Clean up, ready for the next command */
+ for (j = 0; j < argc; j++) decrRefCount(argv[j]);
+ zfree(argv);
+ }
+ fclose(fp);
+ freeFakeClient(fakeClient);
+ return REDIS_OK;
+
+readerr:
+ if (feof(fp)) {
+ redisLog(REDIS_WARNING,"Unexpected end of file reading the append only file");
+ } else {
+ redisLog(REDIS_WARNING,"Unrecoverable error reading the append only file: %s", strerror(errno));
+ }
+ exit(1);
+fmterr:
+ redisLog(REDIS_WARNING,"Bad file format reading the append only file");
+ exit(1);
+}
+
+/* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
+static int fwriteBulk(FILE *fp, robj *obj) {
+ char buf[128];
+ obj = getDecodedObject(obj);
+ snprintf(buf,sizeof(buf),"$%ld\r\n",(long)sdslen(obj->ptr));
+ if (fwrite(buf,strlen(buf),1,fp) == 0) goto err;
+ if (fwrite(obj->ptr,sdslen(obj->ptr),1,fp) == 0) goto err;
+ if (fwrite("\r\n",2,1,fp) == 0) goto err;
+ decrRefCount(obj);
+ return 1;
+err:
+ decrRefCount(obj);
+ return 0;
+}
+
+/* Write a double value in bulk format $<count>\r\n<payload>\r\n */
+static int fwriteBulkDouble(FILE *fp, double d) {
+ char buf[128], dbuf[128];
+
+ snprintf(dbuf,sizeof(dbuf),"%.17g\r\n",d);
+ snprintf(buf,sizeof(buf),"$%lu\r\n",(unsigned long)strlen(dbuf)-2);
+ if (fwrite(buf,strlen(buf),1,fp) == 0) return 0;
+ if (fwrite(dbuf,strlen(dbuf),1,fp) == 0) return 0;
+ return 1;
+}
+
+/* Write a long value in bulk format $<count>\r\n<payload>\r\n */
+static int fwriteBulkLong(FILE *fp, long l) {
+ char buf[128], lbuf[128];
+
+ snprintf(lbuf,sizeof(lbuf),"%ld\r\n",l);
+ snprintf(buf,sizeof(buf),"$%lu\r\n",(unsigned long)strlen(lbuf)-2);
+ if (fwrite(buf,strlen(buf),1,fp) == 0) return 0;
+ if (fwrite(lbuf,strlen(lbuf),1,fp) == 0) return 0;
+ return 1;
+}
+
+/* Write a sequence of commands able to fully rebuild the dataset into
+ * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
+static int rewriteAppendOnlyFile(char *filename) {
+ dictIterator *di = NULL;
+ dictEntry *de;
+ FILE *fp;
+ char tmpfile[256];
+ int j;
+ time_t now = time(NULL);
+
+ /* Note that we have to use a different temp name here compared to the
+ * one used by rewriteAppendOnlyFileBackground() function. */
+ snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid());
+ fp = fopen(tmpfile,"w");
+ if (!fp) {
+ redisLog(REDIS_WARNING, "Failed rewriting the append only file: %s", strerror(errno));
+ return REDIS_ERR;
+ }
+ for (j = 0; j < server.dbnum; j++) {
+ char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n";
+ redisDb *db = server.db+j;
+ dict *d = db->dict;
+ if (dictSize(d) == 0) continue;
+ di = dictGetIterator(d);
+ if (!di) {
+ fclose(fp);
+ return REDIS_ERR;
+ }
+
+ /* SELECT the new DB */
+ if (fwrite(selectcmd,sizeof(selectcmd)-1,1,fp) == 0) goto werr;
+ if (fwriteBulkLong(fp,j) == 0) goto werr;
+
+ /* Iterate this DB writing every entry */
+ while((de = dictNext(di)) != NULL) {
+ robj *key = dictGetEntryKey(de);
+ robj *o = dictGetEntryVal(de);
+ time_t expiretime = getExpire(db,key);
+
+ /* Save the key and associated value */
+ if (o->type == REDIS_STRING) {
+ /* Emit a SET command */
+ char cmd[]="*3\r\n$3\r\nSET\r\n";
+ if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
+ /* Key and value */
+ if (fwriteBulk(fp,key) == 0) goto werr;
+ if (fwriteBulk(fp,o) == 0) goto werr;
+ } else if (o->type == REDIS_LIST) {
+ /* Emit the RPUSHes needed to rebuild the list */
+ list *list = o->ptr;
+ listNode *ln;
+
+ listRewind(list);
+ while((ln = listYield(list))) {
+ char cmd[]="*3\r\n$5\r\nRPUSH\r\n";
+ robj *eleobj = listNodeValue(ln);