X-Git-Url: https://git.saurik.com/redis.git/blobdiff_plain/98d2e23be23f9af02653672081896747baf985f7..4d16bb253c8807e45abcd776fcf6521c525a3f2b:/redis.c?ds=sidebyside diff --git a/redis.c b/redis.c index 465398ad..b5bbd04d 100644 --- a/redis.c +++ b/redis.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include @@ -73,7 +74,8 @@ #include "zmalloc.h" /* total memory usage aware version of malloc/free */ #include "lzf.h" /* LZF compression library */ #include "pqsort.h" /* Partial qsort for SORT+LIMIT */ -#include "zipmap.h" +#include "zipmap.h" /* Compact dictionary-alike data structure */ +#include "sha1.h" /* SHA1 is used for DEBUG DIGEST */ /* Error codes */ #define REDIS_OK 0 @@ -370,6 +372,7 @@ struct redisServer { pid_t bgsavechildpid; pid_t bgrewritechildpid; sds bgrewritebuf; /* buffer taken by parent during oppend only rewrite */ + sds aofbuf; /* AOF buffer, written before entering the event loop */ struct saveparam *saveparams; int saveparamslen; char *logfile; @@ -449,6 +452,7 @@ typedef struct pubsubPattern { } pubsubPattern; typedef void redisCommandProc(redisClient *c); +typedef void redisVmPreloadProc(redisClient *c, struct redisCommand *cmd, int argc, robj **argv); struct redisCommand { char *name; redisCommandProc *proc; @@ -457,7 +461,7 @@ struct redisCommand { /* Use a function to determine which keys need to be loaded * in the background prior to executing this command. Takes precedence * over vm_firstkey and others, ignored when NULL */ - redisCommandProc *vm_preload_proc; + redisVmPreloadProc *vm_preload_proc; /* What keys should be loaded in background when calling this command? */ int vm_firstkey; /* The first argument that's a key (0 = no keys) */ int vm_lastkey; /* THe last argument that's a key */ @@ -557,6 +561,7 @@ static robj *createStringObject(char *ptr, size_t len); static robj *dupStringObject(robj *o); static void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc); static void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc); +static void flushAppendOnlyFile(void); static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc); static int syncWithMaster(void); static robj *tryObjectEncoding(robj *o); @@ -606,8 +611,9 @@ static robj *vmReadObjectFromSwap(off_t page, int type); static void waitEmptyIOJobsQueue(void); static void vmReopenSwapFile(void); static int vmFreePage(off_t page); -static void zunionInterBlockClientOnSwappedKeys(redisClient *c); -static int blockClientOnSwappedKeys(struct redisCommand *cmd, redisClient *c); +static void zunionInterBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv); +static void execBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv); +static int blockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd); static int dontWaitForSwappedKey(redisClient *c, robj *key); static void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key); static void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask); @@ -620,8 +626,10 @@ static int pubsubUnsubscribeAllPatterns(redisClient *c, int notify); static void freePubsubPattern(void *p); static int listMatchPubsubPattern(void *a, void *b); static int compareStringObjects(robj *a, robj *b); +static int equalStringObjects(robj *a, robj *b); static void usage(); static int rewriteAppendOnlyFileBackground(void); +static int vmSwapObjectBlocking(robj *key, robj *val); static void authCommand(redisClient *c); static void pingCommand(redisClient *c); @@ -715,8 +723,8 @@ static void hmgetCommand(redisClient *c); static void hdelCommand(redisClient *c); static void hlenCommand(redisClient *c); static void zremrangebyrankCommand(redisClient *c); -static void zunionCommand(redisClient *c); -static void zinterCommand(redisClient *c); +static void zunionstoreCommand(redisClient *c); +static void zinterstoreCommand(redisClient *c); static void hkeysCommand(redisClient *c); static void hvalsCommand(redisClient *c); static void hgetallCommand(redisClient *c); @@ -777,8 +785,8 @@ static struct redisCommand cmdTable[] = { {"zrem",zremCommand,3,REDIS_CMD_BULK,NULL,1,1,1}, {"zremrangebyscore",zremrangebyscoreCommand,4,REDIS_CMD_INLINE,NULL,1,1,1}, {"zremrangebyrank",zremrangebyrankCommand,4,REDIS_CMD_INLINE,NULL,1,1,1}, - {"zunion",zunionCommand,-4,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,zunionInterBlockClientOnSwappedKeys,0,0,0}, - {"zinter",zinterCommand,-4,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,zunionInterBlockClientOnSwappedKeys,0,0,0}, + {"zunionstore",zunionstoreCommand,-4,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,zunionInterBlockClientOnSwappedKeys,0,0,0}, + {"zinterstore",zinterstoreCommand,-4,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,zunionInterBlockClientOnSwappedKeys,0,0,0}, {"zrange",zrangeCommand,-4,REDIS_CMD_INLINE,NULL,1,1,1}, {"zrangebyscore",zrangebyscoreCommand,-4,REDIS_CMD_INLINE,NULL,1,1,1}, {"zcount",zcountCommand,4,REDIS_CMD_INLINE,NULL,1,1,1}, @@ -823,7 +831,7 @@ static struct redisCommand cmdTable[] = { {"lastsave",lastsaveCommand,1,REDIS_CMD_INLINE,NULL,0,0,0}, {"type",typeCommand,2,REDIS_CMD_INLINE,NULL,1,1,1}, {"multi",multiCommand,1,REDIS_CMD_INLINE,NULL,0,0,0}, - {"exec",execCommand,1,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,0,0,0}, + {"exec",execCommand,1,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,execBlockClientOnSwappedKeys,0,0,0}, {"discard",discardCommand,1,REDIS_CMD_INLINE,NULL,0,0,0}, {"sync",syncCommand,1,REDIS_CMD_INLINE,NULL,0,0,0}, {"flushdb",flushdbCommand,1,REDIS_CMD_INLINE,NULL,0,0,0}, @@ -1019,6 +1027,30 @@ static long long memtoll(const char *p, int *err) { return val*mul; } +/* Convert a long long into a string. Returns the number of + * characters needed to represent the number, that can be shorter if passed + * buffer length is not enough to store the whole number. */ +static int ll2string(char *s, size_t len, long long value) { + char buf[32], *p; + unsigned long long v; + size_t l; + + if (len == 0) return 0; + v = (value < 0) ? -value : value; + p = buf+31; /* point to the last character */ + do { + *p-- = '0'+(v%10); + v /= 10; + } while(v); + if (value < 0) *p-- = '-'; + p++; + l = 32-(p-buf); + if (l+1 > len) l = len-1; /* Make sure it fits, including the nul term */ + memcpy(s,p,l); + s[l] = '\0'; + return l; +} + static void redisLog(int level, const char *fmt, ...) { va_list ap; FILE *fp; @@ -1101,8 +1133,8 @@ static int dictEncObjKeyCompare(void *privdata, const void *key1, int cmp; if (o1->encoding == REDIS_ENCODING_INT && - o2->encoding == REDIS_ENCODING_INT && - o1->ptr == o2->ptr) return 1; + o2->encoding == REDIS_ENCODING_INT) + return o1->ptr == o2->ptr; o1 = getDecodedObject(o1); o2 = getDecodedObject(o2); @@ -1122,7 +1154,7 @@ static unsigned int dictEncObjHash(const void *key) { char buf[32]; int len; - len = snprintf(buf,32,"%ld",(long)o->ptr); + len = ll2string(buf,32,(long)o->ptr); return dictGenHashFunction((unsigned char*)buf, len); } else { unsigned int hash; @@ -1528,6 +1560,7 @@ static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientD static void beforeSleep(struct aeEventLoop *eventLoop) { REDIS_NOTUSED(eventLoop); + /* Awake clients that got all the swapped keys they requested */ if (server.vm_enabled && listLength(server.io_ready_clients)) { listIter li; listNode *ln; @@ -1552,6 +1585,8 @@ static void beforeSleep(struct aeEventLoop *eventLoop) { processInputBuffer(c); } } + /* Write the AOF buffer on disk */ + flushAppendOnlyFile(); } static void createSharedObjects(void) { @@ -1629,7 +1664,7 @@ static void initServerConfig() { server.glueoutputbuf = 1; server.daemonize = 0; server.appendonly = 0; - server.appendfsync = APPENDFSYNC_ALWAYS; + server.appendfsync = APPENDFSYNC_EVERYSEC; server.lastfsync = time(NULL); server.appendfd = -1; server.appendseldb = -1; /* Make sure the first time will not match */ @@ -1712,6 +1747,7 @@ static void initServer() { server.bgsavechildpid = -1; server.bgrewritechildpid = -1; server.bgrewritebuf = sdsempty(); + server.aofbuf = sdsempty(); server.lastsave = time(NULL); server.dirty = 0; server.stat_numcommands = 0; @@ -1880,6 +1916,9 @@ static void loadServerConfig(char *filename) { if ((server.appendonly = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; } + } else if (!strcasecmp(argv[0],"appendfilename") && argc == 2) { + zfree(server.appendfilename); + server.appendfilename = zstrdup(argv[1]); } else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) { if (!strcasecmp(argv[1],"no")) { server.appendfsync = APPENDFSYNC_NO; @@ -2385,7 +2424,7 @@ static int processCommand(redisClient *c) { addReply(c,shared.queued); } else { if (server.vm_enabled && server.vm_max_threads > 0 && - blockClientOnSwappedKeys(cmd,c)) return 1; + blockClientOnSwappedKeys(c,cmd)) return 1; call(c,cmd); } @@ -2649,7 +2688,7 @@ static void *dupClientReplyValue(void *o) { } static int listMatchObjects(void *a, void *b) { - return compareStringObjects(a,b) == 0; + return equalStringObjects(a,b); } static redisClient *createClient(int fd) { @@ -2889,7 +2928,7 @@ static robj *createStringObjectFromLongLong(long long value) { o->encoding = REDIS_ENCODING_INT; o->ptr = (void*)((long)value); } else { - o->ptr = sdscatprintf(sdsempty(),"%lld",value); + o = createObject(REDIS_STRING,sdsfromlonglong(value)); } } return o; @@ -3104,7 +3143,7 @@ static int isStringRepresentableAsLong(sds s, long *longval) { value = strtol(s, &endptr, 10); if (endptr[0] != '\0') return REDIS_ERR; - slen = snprintf(buf,32,"%ld",value); + slen = ll2string(buf,32,value); /* If the number converted back into a string is not identical * then it's not possible to encode the string as integer */ @@ -3157,7 +3196,7 @@ static robj *getDecodedObject(robj *o) { if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_INT) { char buf[32]; - snprintf(buf,32,"%ld",(long)o->ptr); + ll2string(buf,32,(long)o->ptr); dec = createStringObject(buf,strlen(buf)); return dec; } else { @@ -3167,7 +3206,7 @@ static robj *getDecodedObject(robj *o) { /* Compare two string objects via strcmp() or alike. * Note that the objects may be integer-encoded. In such a case we - * use snprintf() to get a string representation of the numbers on the stack + * use ll2string() to get a string representation of the numbers on the stack * and compare the strings, it's much faster than calling getDecodedObject(). * * Important note: if objects are not integer encoded, but binary-safe strings, @@ -3180,14 +3219,14 @@ static int compareStringObjects(robj *a, robj *b) { if (a == b) return 0; if (a->encoding != REDIS_ENCODING_RAW) { - snprintf(bufa,sizeof(bufa),"%ld",(long) a->ptr); + ll2string(bufa,sizeof(bufa),(long) a->ptr); astr = bufa; bothsds = 0; } else { astr = a->ptr; } if (b->encoding != REDIS_ENCODING_RAW) { - snprintf(bufb,sizeof(bufb),"%ld",(long) b->ptr); + ll2string(bufb,sizeof(bufb),(long) b->ptr); bstr = bufb; bothsds = 0; } else { @@ -3196,6 +3235,18 @@ static int compareStringObjects(robj *a, robj *b) { return bothsds ? sdscmp(astr,bstr) : strcmp(astr,bstr); } +/* Equal string objects return 1 if the two objects are the same from the + * point of view of a string comparison, otherwise 0 is returned. Note that + * this function is faster then checking for (compareStringObject(a,b) == 0) + * because it can perform some more optimization. */ +static int equalStringObjects(robj *a, robj *b) { + if (a->encoding != REDIS_ENCODING_RAW && b->encoding != REDIS_ENCODING_RAW){ + return a->ptr == b->ptr; + } else { + return compareStringObjects(a,b) == 0; + } +} + static size_t stringObjectLen(robj *o) { redisAssert(o->type == REDIS_STRING); if (o->encoding == REDIS_ENCODING_RAW) { @@ -3203,7 +3254,7 @@ static size_t stringObjectLen(robj *o) { } else { char buf[32]; - return snprintf(buf,32,"%ld",(long)o->ptr); + return ll2string(buf,32,(long)o->ptr); } } @@ -3334,22 +3385,12 @@ static int rdbSaveLen(FILE *fp, uint32_t len) { return 0; } -/* String objects in the form "2391" "-100" without any space and with a - * range of values that can fit in an 8, 16 or 32 bit signed value can be - * encoded as integers to save space */ -static int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) { - long long value; - char *endptr, buf[32]; - - /* Check if it's possible to encode this value as a number */ - value = strtoll(s, &endptr, 10); - if (endptr[0] != '\0') return 0; - snprintf(buf,32,"%lld",value); - - /* If the number converted back into a string is not identical - * then it's not possible to encode the string as integer */ - if (strlen(buf) != len || memcmp(buf,s,len)) return 0; - +/* Encode 'value' as an integer if possible (if integer will fit the + * supported range). If the function sucessful encoded the integer + * then the (up to 5 bytes) encoded representation is written in the + * string pointed by 'enc' and the length is returned. Otherwise + * 0 is returned. */ +static int rdbEncodeInteger(long long value, unsigned char *enc) { /* Finally check if it fits in our ranges */ if (value >= -(1<<7) && value <= (1<<7)-1) { enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT8; @@ -3372,6 +3413,25 @@ static int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) { } } +/* String objects in the form "2391" "-100" without any space and with a + * range of values that can fit in an 8, 16 or 32 bit signed value can be + * encoded as integers to save space */ +static int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) { + long long value; + char *endptr, buf[32]; + + /* Check if it's possible to encode this value as a number */ + value = strtoll(s, &endptr, 10); + if (endptr[0] != '\0') return 0; + ll2string(buf,32,value); + + /* If the number converted back into a string is not identical + * then it's not possible to encode the string as integer */ + if (strlen(buf) != len || memcmp(buf,s,len)) return 0; + + return rdbEncodeInteger(value,enc); +} + static int rdbSaveLzfStringObject(FILE *fp, unsigned char *s, size_t len) { size_t comprlen, outlen; unsigned char byte; @@ -3435,6 +3495,21 @@ static int rdbSaveRawString(FILE *fp, unsigned char *s, size_t len) { static int rdbSaveStringObject(FILE *fp, robj *obj) { int retval; + /* Avoid to decode the object, then encode it again, if the + * object is alrady integer encoded. */ + if (obj->encoding == REDIS_ENCODING_INT) { + long val = (long) obj->ptr; + unsigned char buf[5]; + int enclen; + + if ((enclen = rdbEncodeInteger(val,buf)) > 0) { + if (fwrite(buf,enclen,1,fp) == 0) return -1; + return 0; + } + /* otherwise... fall throught and continue with the usual + * code path. */ + } + /* Avoid incr/decr ref count business when possible. * This plays well with copy-on-write given that we are probably * in a child process (BGSAVE). Also this makes sure key objects @@ -3469,7 +3544,23 @@ static int rdbSaveDoubleValue(FILE *fp, double val) { len = 1; buf[0] = (val < 0) ? 255 : 254; } else { - snprintf((char*)buf+1,sizeof(buf)-1,"%.17g",val); +#if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL) + /* Check if the float is in a safe range to be casted into a + * long long. We are assuming that long long is 64 bit here. + * Also we are assuming that there are no implementations around where + * double has precision < 52 bit. + * + * Under this assumptions we test if a double is inside an interval + * where casting to long long is safe. Then using two castings we + * make sure the decimal part is zero. If all this is true we use + * integer printing function that is much faster. */ + double min = -4503599627370495; /* (2^52)-1 */ + double max = 4503599627370496; /* -(2^52) */ + if (val > min && val < max && val == ((double)((long long)val))) + ll2string((char*)buf+1,sizeof(buf),(long long)val); + else +#endif + snprintf((char*)buf+1,sizeof(buf)-1,"%.17g",val); buf[0] = strlen((char*)buf+1); len = buf[0]+1; } @@ -3753,7 +3844,11 @@ static uint32_t rdbLoadLen(FILE *fp, int *isencoded) { } } -static robj *rdbLoadIntegerObject(FILE *fp, int enctype) { +/* Load an integer-encoded object from file 'fp', with the specified + * encoding type 'enctype'. If encode is true the function may return + * an integer-encoded object as reply, otherwise the returned object + * will always be encoded as a raw string. */ +static robj *rdbLoadIntegerObject(FILE *fp, int enctype, int encode) { unsigned char enc[4]; long long val; @@ -3774,7 +3869,10 @@ static robj *rdbLoadIntegerObject(FILE *fp, int enctype) { val = 0; /* anti-warning */ redisPanic("Unknown RDB integer encoding type"); } - return createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",val)); + if (encode) + return createStringObjectFromLongLong(val); + else + return createObject(REDIS_STRING,sdsfromlonglong(val)); } static robj *rdbLoadLzfStringObject(FILE*fp) { @@ -3796,7 +3894,7 @@ err: return NULL; } -static robj *rdbLoadStringObject(FILE*fp) { +static robj *rdbGenericLoadStringObject(FILE*fp, int encode) { int isencoded; uint32_t len; sds val; @@ -3807,7 +3905,7 @@ static robj *rdbLoadStringObject(FILE*fp) { case REDIS_RDB_ENC_INT8: case REDIS_RDB_ENC_INT16: case REDIS_RDB_ENC_INT32: - return rdbLoadIntegerObject(fp,len); + return rdbLoadIntegerObject(fp,len,encode); case REDIS_RDB_ENC_LZF: return rdbLoadLzfStringObject(fp); default: @@ -3824,6 +3922,14 @@ static robj *rdbLoadStringObject(FILE*fp) { return createObject(REDIS_STRING,val); } +static robj *rdbLoadStringObject(FILE *fp) { + return rdbGenericLoadStringObject(fp,0); +} + +static robj *rdbLoadEncodedStringObject(FILE *fp) { + return rdbGenericLoadStringObject(fp,1); +} + /* For information about double serialization check rdbSaveDoubleValue() */ static int rdbLoadDoubleValue(FILE *fp, double *val) { char buf[128]; @@ -3850,7 +3956,7 @@ static robj *rdbLoadObject(int type, FILE *fp) { redisLog(REDIS_DEBUG,"LOADING OBJECT %d (at %d)\n",type,ftell(fp)); if (type == REDIS_STRING) { /* Read string value */ - if ((o = rdbLoadStringObject(fp)) == NULL) return NULL; + if ((o = rdbLoadEncodedStringObject(fp)) == NULL) return NULL; o = tryObjectEncoding(o); } else if (type == REDIS_LIST || type == REDIS_SET) { /* Read list/set value */ @@ -3866,7 +3972,7 @@ static robj *rdbLoadObject(int type, FILE *fp) { while(listlen--) { robj *ele; - if ((ele = rdbLoadStringObject(fp)) == NULL) return NULL; + if ((ele = rdbLoadEncodedStringObject(fp)) == NULL) return NULL; ele = tryObjectEncoding(ele); if (type == REDIS_LIST) { listAddNodeTail((list*)o->ptr,ele); @@ -3887,7 +3993,7 @@ static robj *rdbLoadObject(int type, FILE *fp) { robj *ele; double *score = zmalloc(sizeof(double)); - if ((ele = rdbLoadStringObject(fp)) == NULL) return NULL; + if ((ele = rdbLoadEncodedStringObject(fp)) == NULL) return NULL; ele = tryObjectEncoding(ele); if (rdbLoadDoubleValue(fp,score) == -1) return NULL; dictAdd(zs->dict,ele,score); @@ -3940,13 +4046,13 @@ static robj *rdbLoadObject(int type, FILE *fp) { static int rdbLoad(char *filename) { FILE *fp; - robj *keyobj = NULL; uint32_t dbid; int type, retval, rdbver; + int swap_all_values = 0; dict *d = server.db[0].dict; redisDb *db = server.db+0; char buf[1024]; - time_t expiretime = -1, now = time(NULL); + time_t expiretime, now = time(NULL); long long loadedkeys = 0; fp = fopen(filename,"r"); @@ -3965,8 +4071,9 @@ static int rdbLoad(char *filename) { return REDIS_ERR; } while(1) { - robj *o; + robj *key, *val; + expiretime = -1; /* Read type. */ if ((type = rdbLoadType(fp)) == -1) goto eoferr; if (type == REDIS_EXPIRETIME) { @@ -3988,36 +4095,61 @@ static int rdbLoad(char *filename) { continue; } /* Read key */ - if ((keyobj = rdbLoadStringObject(fp)) == NULL) goto eoferr; + if ((key = rdbLoadStringObject(fp)) == NULL) goto eoferr; /* Read value */ - if ((o = rdbLoadObject(type,fp)) == NULL) goto eoferr; + if ((val = rdbLoadObject(type,fp)) == NULL) goto eoferr; + /* Check if the key already expired */ + if (expiretime != -1 && expiretime < now) { + decrRefCount(key); + decrRefCount(val); + continue; + } /* Add the new object in the hash table */ - retval = dictAdd(d,keyobj,o); + retval = dictAdd(d,key,val); if (retval == DICT_ERR) { - redisLog(REDIS_WARNING,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj->ptr); + redisLog(REDIS_WARNING,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key->ptr); exit(1); } + loadedkeys++; /* Set the expire time if needed */ - if (expiretime != -1) { - setExpire(db,keyobj,expiretime); - /* Delete this key if already expired */ - if (expiretime < now) deleteKey(db,keyobj); - expiretime = -1; - } - keyobj = o = NULL; + if (expiretime != -1) setExpire(db,key,expiretime); + /* Handle swapping while loading big datasets when VM is on */ - loadedkeys++; - if (server.vm_enabled && (loadedkeys % 5000) == 0) { + + /* If we detecter we are hopeless about fitting something in memory + * we just swap every new key on disk. Directly... + * Note that's important to check for this condition before resorting + * to random sampling, otherwise we may try to swap already + * swapped keys. */ + if (swap_all_values) { + dictEntry *de = dictFind(d,key); + + /* de may be NULL since the key already expired */ + if (de) { + key = dictGetEntryKey(de); + val = dictGetEntryVal(de); + + if (vmSwapObjectBlocking(key,val) == REDIS_OK) { + dictGetEntryVal(de) = NULL; + } + } + continue; + } + + /* If we have still some hope of having some value fitting memory + * then we try random sampling. */ + if (!swap_all_values && server.vm_enabled && (loadedkeys % 5000) == 0) { while (zmalloc_used_memory() > server.vm_max_memory) { if (vmSwapOneObjectBlocking() == REDIS_ERR) break; } + if (zmalloc_used_memory() > server.vm_max_memory) + swap_all_values = 1; /* We are already using too much mem */ } } fclose(fp); return REDIS_OK; eoferr: /* unexpected end of file is handled here with a fatal exit */ - if (keyobj) decrRefCount(keyobj); redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now."); exit(1); return REDIS_ERR; /* Just to avoid warning */ @@ -4198,8 +4330,8 @@ static void incrDecrCommand(redisClient *c, long long incr) { robj *o; o = lookupKeyWrite(c->db,c->argv[1]); - - if (getLongLongFromObjectOrReply(c, o, &value, NULL) != REDIS_OK) return; + if (o != NULL && checkType(c,o,REDIS_STRING)) return; + if (getLongLongFromObjectOrReply(c,o,&value,NULL) != REDIS_OK) return; value += incr; o = createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",value)); @@ -4337,7 +4469,12 @@ static void delCommand(redisClient *c) { } static void existsCommand(redisClient *c) { - addReply(c,lookupKeyRead(c->db,c->argv[1]) ? shared.cone : shared.czero); + expireIfNeeded(c->db,c->argv[1]); + if (dictFind(c->db->dict,c->argv[1])) { + addReply(c, shared.cone); + } else { + addReply(c, shared.czero); + } } static void selectCommand(redisClient *c) { @@ -4479,7 +4616,6 @@ static void shutdownCommand(redisClient *c) { unlink(server.pidfile); redisLog(REDIS_WARNING,"%zu bytes used at exit",zmalloc_used_memory()); redisLog(REDIS_WARNING,"Server exit now, bye bye..."); - if (server.vm_enabled) unlink(server.vm_swap_file); exit(0); } else { /* Ooops.. error saving! The best we can do is to continue @@ -4818,7 +4954,7 @@ static void lremCommand(redisClient *c) { robj *ele = listNodeValue(ln); next = fromtail ? ln->prev : ln->next; - if (compareStringObjects(ele,c->argv[3]) == 0) { + if (equalStringObjects(ele,c->argv[3])) { listDelNode(list,ln); server.dirty++; removed++; @@ -5422,7 +5558,7 @@ static int zslDelete(zskiplist *zsl, double score, robj *obj) { /* We may have multiple elements with the same score, what we need * is to find the element with both the right score and object. */ x = x->forward[0]; - if (x && score == x->score && compareStringObjects(x->obj,obj) == 0) { + if (x && score == x->score && equalStringObjects(x->obj,obj)) { zslDeleteNode(zsl, x, update); zslFreeNode(x); return 1; @@ -5527,7 +5663,7 @@ static unsigned long zslGetRank(zskiplist *zsl, double score, robj *o) { } /* x might be equal to zsl->header, so test if obj is non-NULL */ - if (x->obj && compareStringObjects(x->obj,o) == 0) { + if (x->obj && equalStringObjects(x->obj,o)) { return rank; } } @@ -5781,7 +5917,7 @@ static void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) { /* expect zsetnum input keys to be given */ zsetnum = atoi(c->argv[2]->ptr); if (zsetnum < 1) { - addReplySds(c,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n")); + addReplySds(c,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n")); return; } @@ -5931,11 +6067,11 @@ static void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) { zfree(src); } -static void zunionCommand(redisClient *c) { +static void zunionstoreCommand(redisClient *c) { zunionInterGenericCommand(c,c->argv[1], REDIS_OP_UNION); } -static void zinterCommand(redisClient *c) { +static void zinterstoreCommand(redisClient *c) { zunionInterGenericCommand(c,c->argv[1], REDIS_OP_INTER); } @@ -8007,65 +8143,28 @@ static void freeMemoryIfNeeded(void) { /* ============================== Append Only file ========================== */ -static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) { - sds buf = sdsempty(); - int j; - ssize_t nwritten; +/* Write the append only file buffer on disk. + * + * Since we are required to write the AOF before replying to the client, + * and the only way the client socket can get a write is entering when the + * the event loop, we accumulate all the AOF writes in a memory + * buffer and write it on disk using this function just before entering + * the event loop again. */ +static void flushAppendOnlyFile(void) { time_t now; - robj *tmpargv[3]; - - /* The DB this command was targetting is not the same as the last command - * we appendend. To issue a SELECT command is needed. */ - if (dictid != server.appendseldb) { - char seldb[64]; - - snprintf(seldb,sizeof(seldb),"%d",dictid); - buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n", - (unsigned long)strlen(seldb),seldb); - server.appendseldb = dictid; - } - - /* "Fix" the argv vector if the command is EXPIRE. We want to translate - * EXPIREs into EXPIREATs calls */ - if (cmd->proc == expireCommand) { - long when; - - tmpargv[0] = createStringObject("EXPIREAT",8); - tmpargv[1] = argv[1]; - incrRefCount(argv[1]); - when = time(NULL)+strtol(argv[2]->ptr,NULL,10); - tmpargv[2] = createObject(REDIS_STRING, - sdscatprintf(sdsempty(),"%ld",when)); - argv = tmpargv; - } - - /* Append the actual command */ - buf = sdscatprintf(buf,"*%d\r\n",argc); - for (j = 0; j < argc; j++) { - robj *o = argv[j]; - - o = getDecodedObject(o); - buf = sdscatprintf(buf,"$%lu\r\n",(unsigned long)sdslen(o->ptr)); - buf = sdscatlen(buf,o->ptr,sdslen(o->ptr)); - buf = sdscatlen(buf,"\r\n",2); - decrRefCount(o); - } + ssize_t nwritten; - /* Free the objects from the modified argv for EXPIREAT */ - if (cmd->proc == expireCommand) { - for (j = 0; j < 3; j++) - decrRefCount(argv[j]); - } + if (sdslen(server.aofbuf) == 0) return; /* We want to perform a single write. This should be guaranteed atomic * at least if the filesystem we are writing is a real physical one. * While this will save us against the server being killed I don't think * there is much to do about the whole server stopping for power problems * or alike */ - nwritten = write(server.appendfd,buf,sdslen(buf)); - if (nwritten != (signed)sdslen(buf)) { + nwritten = write(server.appendfd,server.aofbuf,sdslen(server.aofbuf)); + if (nwritten != (signed)sdslen(server.aofbuf)) { /* Ooops, we are in troubles. The best thing to do for now is - * to simply exit instead to give the illusion that everything is + * aborting instead of giving the illusion that everything is * working as expected. */ if (nwritten == -1) { redisLog(REDIS_WARNING,"Exiting on error writing to the append-only file: %s",strerror(errno)); @@ -8074,14 +8173,10 @@ static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv } exit(1); } - /* If a background append only file rewriting is in progress we want to - * accumulate the differences between the child DB and the current one - * in a buffer, so that when the child process will do its work we - * can append the differences to the new append only file. */ - if (server.bgrewritechildpid != -1) - server.bgrewritebuf = sdscatlen(server.bgrewritebuf,buf,sdslen(buf)); + sdsfree(server.aofbuf); + server.aofbuf = sdsempty(); - sdsfree(buf); + /* Fsync if needed */ now = time(NULL); if (server.appendfsync == APPENDFSYNC_ALWAYS || (server.appendfsync == APPENDFSYNC_EVERYSEC && @@ -8094,6 +8189,84 @@ static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv } } +static sds catAppendOnlyGenericCommand(sds buf, int argc, robj **argv) { + int j; + buf = sdscatprintf(buf,"*%d\r\n",argc); + for (j = 0; j < argc; j++) { + robj *o = getDecodedObject(argv[j]); + buf = sdscatprintf(buf,"$%lu\r\n",(unsigned long)sdslen(o->ptr)); + buf = sdscatlen(buf,o->ptr,sdslen(o->ptr)); + buf = sdscatlen(buf,"\r\n",2); + decrRefCount(o); + } + return buf; +} + +static sds catAppendOnlyExpireAtCommand(sds buf, robj *key, robj *seconds) { + int argc = 3; + long when; + robj *argv[3]; + + /* Make sure we can use strtol */ + seconds = getDecodedObject(seconds); + when = time(NULL)+strtol(seconds->ptr,NULL,10); + decrRefCount(seconds); + + argv[0] = createStringObject("EXPIREAT",8); + argv[1] = key; + argv[2] = createObject(REDIS_STRING, + sdscatprintf(sdsempty(),"%ld",when)); + buf = catAppendOnlyGenericCommand(buf, argc, argv); + decrRefCount(argv[0]); + decrRefCount(argv[2]); + return buf; +} + +static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) { + sds buf = sdsempty(); + robj *tmpargv[3]; + + /* The DB this command was targetting is not the same as the last command + * we appendend. To issue a SELECT command is needed. */ + if (dictid != server.appendseldb) { + char seldb[64]; + + snprintf(seldb,sizeof(seldb),"%d",dictid); + buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n", + (unsigned long)strlen(seldb),seldb); + server.appendseldb = dictid; + } + + if (cmd->proc == expireCommand) { + /* Translate EXPIRE into EXPIREAT */ + buf = catAppendOnlyExpireAtCommand(buf,argv[1],argv[2]); + } else if (cmd->proc == setexCommand) { + /* Translate SETEX to SET and EXPIREAT */ + tmpargv[0] = createStringObject("SET",3); + tmpargv[1] = argv[1]; + tmpargv[2] = argv[3]; + buf = catAppendOnlyGenericCommand(buf,3,tmpargv); + decrRefCount(tmpargv[0]); + buf = catAppendOnlyExpireAtCommand(buf,argv[1],argv[2]); + } else { + buf = catAppendOnlyGenericCommand(buf,argc,argv); + } + + /* Append to the AOF buffer. This will be flushed on disk just before + * of re-entering the event loop, so before the client will get a + * positive reply about the operation performed. */ + server.aofbuf = sdscatlen(server.aofbuf,buf,sdslen(buf)); + + /* If a background append only file rewriting is in progress we want to + * accumulate the differences between the child DB and the current one + * in a buffer, so that when the child process will do its work we + * can append the differences to the new append only file. */ + if (server.bgrewritechildpid != -1) + server.bgrewritebuf = sdscatlen(server.bgrewritebuf,buf,sdslen(buf)); + + sdsfree(buf); +} + /* In Redis commands are always executed in the context of a client, so in * order to load the append only file we need to create a fake client. */ static struct redisClient *createFakeClient(void) { @@ -8111,12 +8284,14 @@ static struct redisClient *createFakeClient(void) { c->reply = listCreate(); listSetFreeMethod(c->reply,decrRefCount); listSetDupMethod(c->reply,dupClientReplyValue); + initClientMultiState(c); return c; } static void freeFakeClient(struct redisClient *c) { sdsfree(c->querybuf); listRelease(c->reply); + freeClientMultiState(c); zfree(c); } @@ -8128,6 +8303,7 @@ int loadAppendOnlyFile(char *filename) { FILE *fp = fopen(filename,"r"); struct redis_stat sb; unsigned long long loadedkeys = 0; + int appendonly = server.appendonly; if (redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0) return REDIS_ERR; @@ -8137,6 +8313,10 @@ int loadAppendOnlyFile(char *filename) { exit(1); } + /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI + * to the same file we're about to read. */ + server.appendonly = 0; + fakeClient = createFakeClient(); while(1) { int argc, j; @@ -8192,8 +8372,14 @@ int loadAppendOnlyFile(char *filename) { } } } + + /* This point can only be reached when EOF is reached without errors. + * If the client is in the middle of a MULTI/EXEC, log error and quit. */ + if (fakeClient->flags & REDIS_MULTI) goto readerr; + fclose(fp); freeFakeClient(fakeClient); + server.appendonly = appendonly; return REDIS_OK; readerr: @@ -8543,42 +8729,38 @@ static void aofRemoveTempFile(pid_t childpid) { /* =================== Virtual Memory - Blocking Side ====================== */ -/* substitute the first occurrence of '%p' with the process pid in the - * swap file name. */ -static void expandVmSwapFilename(void) { - char *p = strstr(server.vm_swap_file,"%p"); - sds new; - - if (!p) return; - new = sdsempty(); - *p = '\0'; - new = sdscat(new,server.vm_swap_file); - new = sdscatprintf(new,"%ld",(long) getpid()); - new = sdscat(new,p+2); - zfree(server.vm_swap_file); - server.vm_swap_file = new; -} - static void vmInit(void) { off_t totsize; int pipefds[2]; size_t stacksize; + struct flock fl; if (server.vm_max_threads != 0) zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */ - expandVmSwapFilename(); redisLog(REDIS_NOTICE,"Using '%s' as swap file",server.vm_swap_file); + /* Try to open the old swap file, otherwise create it */ if ((server.vm_fp = fopen(server.vm_swap_file,"r+b")) == NULL) { server.vm_fp = fopen(server.vm_swap_file,"w+b"); } if (server.vm_fp == NULL) { redisLog(REDIS_WARNING, - "Impossible to open the swap file: %s. Exiting.", + "Can't open the swap file: %s. Exiting.", strerror(errno)); exit(1); } server.vm_fd = fileno(server.vm_fp); + /* Lock the swap file for writing, this is useful in order to avoid + * another instance to use the same swap file for a config error. */ + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = fl.l_len = 0; + if (fcntl(server.vm_fd,F_SETLK,&fl) == -1) { + redisLog(REDIS_WARNING, + "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server.vm_swap_file, strerror(errno)); + exit(1); + } + /* Initialize */ server.vm_next_page = 0; server.vm_near_pages = 0; server.vm_stats_used_pages = 0; @@ -9504,12 +9686,56 @@ static int waitForSwappedKey(redisClient *c, robj *key) { return 1; } -/* Preload keys needed for the ZUNION and ZINTER commands. */ -static void zunionInterBlockClientOnSwappedKeys(redisClient *c) { +/* Preload keys for any command with first, last and step values for + * the command keys prototype, as defined in the command table. */ +static void waitForMultipleSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv) { + int j, last; + if (cmd->vm_firstkey == 0) return; + last = cmd->vm_lastkey; + if (last < 0) last = argc+last; + for (j = cmd->vm_firstkey; j <= last; j += cmd->vm_keystep) { + redisAssert(j < argc); + waitForSwappedKey(c,argv[j]); + } +} + +/* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands. + * Note that the number of keys to preload is user-defined, so we need to + * apply a sanity check against argc. */ +static void zunionInterBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv) { int i, num; - num = atoi(c->argv[2]->ptr); + REDIS_NOTUSED(cmd); + + num = atoi(argv[2]->ptr); + if (num > (argc-3)) return; for (i = 0; i < num; i++) { - waitForSwappedKey(c,c->argv[3+i]); + waitForSwappedKey(c,argv[3+i]); + } +} + +/* Preload keys needed to execute the entire MULTI/EXEC block. + * + * This function is called by blockClientOnSwappedKeys when EXEC is issued, + * and will block the client when any command requires a swapped out value. */ +static void execBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv) { + int i, margc; + struct redisCommand *mcmd; + robj **margv; + REDIS_NOTUSED(cmd); + REDIS_NOTUSED(argc); + REDIS_NOTUSED(argv); + + if (!(c->flags & REDIS_MULTI)) return; + for (i = 0; i < c->mstate.count; i++) { + mcmd = c->mstate.commands[i].cmd; + margc = c->mstate.commands[i].argc; + margv = c->mstate.commands[i].argv; + + if (mcmd->vm_preload_proc != NULL) { + mcmd->vm_preload_proc(c,mcmd,margc,margv); + } else { + waitForMultipleSwappedKeys(c,mcmd,margc,margv); + } } } @@ -9523,17 +9749,11 @@ static void zunionInterBlockClientOnSwappedKeys(redisClient *c) { * * Return 1 if the client is marked as blocked, 0 if the client can * continue as the keys it is going to access appear to be in memory. */ -static int blockClientOnSwappedKeys(struct redisCommand *cmd, redisClient *c) { - int j, last; - +static int blockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd) { if (cmd->vm_preload_proc != NULL) { - cmd->vm_preload_proc(c); + cmd->vm_preload_proc(c,cmd,c->argc,c->argv); } else { - if (cmd->vm_firstkey == 0) return 0; - last = cmd->vm_lastkey; - if (last < 0) last = c->argc+last; - for (j = cmd->vm_firstkey; j <= last; j += cmd->vm_keystep) - waitForSwappedKey(c,c->argv[j]); + waitForMultipleSwappedKeys(c,cmd,c->argc,c->argv); } /* If the client was blocked for at least one key, mark it as blocked. */ @@ -9560,7 +9780,7 @@ static int dontWaitForSwappedKey(redisClient *c, robj *key) { /* Remove the key from the list of keys this client is waiting for. */ listRewind(c->io_keys,&li); while ((ln = listNext(&li)) != NULL) { - if (compareStringObjects(ln->value,key) == 0) { + if (equalStringObjects(ln->value,key)) { listDelNode(c->io_keys,ln); break; } @@ -9620,6 +9840,50 @@ static void configSetCommand(redisClient *c) { server.masterauth = zstrdup(o->ptr); } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory")) { server.maxmemory = strtoll(o->ptr, NULL, 10); + } else if (!strcasecmp(c->argv[2]->ptr,"appendfsync")) { + if (!strcasecmp(o->ptr,"no")) { + server.appendfsync = APPENDFSYNC_NO; + } else if (!strcasecmp(o->ptr,"everysec")) { + server.appendfsync = APPENDFSYNC_EVERYSEC; + } else if (!strcasecmp(o->ptr,"always")) { + server.appendfsync = APPENDFSYNC_ALWAYS; + } else { + goto badfmt; + } + } else if (!strcasecmp(c->argv[2]->ptr,"save")) { + int vlen, j; + sds *v = sdssplitlen(o->ptr,sdslen(o->ptr)," ",1,&vlen); + + /* Perform sanity check before setting the new config: + * - Even number of args + * - Seconds >= 1, changes >= 0 */ + if (vlen & 1) { + sdsfreesplitres(v,vlen); + goto badfmt; + } + for (j = 0; j < vlen; j++) { + char *eptr; + long val; + + val = strtoll(v[j], &eptr, 10); + if (eptr[0] != '\0' || + ((j & 1) == 0 && val < 1) || + ((j & 1) == 1 && val < 0)) { + sdsfreesplitres(v,vlen); + goto badfmt; + } + } + /* Finally set the new config */ + resetServerSaveParams(); + for (j = 0; j < vlen; j += 2) { + time_t seconds; + int changes; + + seconds = strtoll(v[j],NULL,10); + changes = strtoll(v[j+1],NULL,10); + appendServerSaveParams(seconds, changes); + } + sdsfreesplitres(v,vlen); } else { addReplySds(c,sdscatprintf(sdsempty(), "-ERR not supported CONFIG parameter %s\r\n", @@ -9629,6 +9893,14 @@ static void configSetCommand(redisClient *c) { } decrRefCount(o); addReply(c,shared.ok); + return; + +badfmt: /* Bad format errors */ + addReplySds(c,sdscatprintf(sdsempty(), + "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n", + (char*)o->ptr, + (char*)c->argv[2]->ptr)); + decrRefCount(o); } static void configGetCommand(redisClient *c) { @@ -9663,6 +9935,35 @@ static void configGetCommand(redisClient *c) { addReplyBulkCString(c,buf); matches++; } + if (stringmatch(pattern,"appendfsync",0)) { + char *policy; + + switch(server.appendfsync) { + case APPENDFSYNC_NO: policy = "no"; break; + case APPENDFSYNC_EVERYSEC: policy = "everysec"; break; + case APPENDFSYNC_ALWAYS: policy = "always"; break; + default: policy = "unknown"; break; /* too harmless to panic */ + } + addReplyBulkCString(c,"appendfsync"); + addReplyBulkCString(c,policy); + matches++; + } + if (stringmatch(pattern,"save",0)) { + sds buf = sdsempty(); + int j; + + for (j = 0; j < server.saveparamslen; j++) { + buf = sdscatprintf(buf,"%ld %d", + server.saveparams[j].seconds, + server.saveparams[j].changes); + if (j != server.saveparamslen-1) + buf = sdscatlen(buf," ",1); + } + addReplyBulkCString(c,"save"); + addReplyBulkCString(c,buf); + sdsfree(buf); + matches++; + } decrRefCount(o); lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",matches*2); } @@ -9706,7 +10007,7 @@ static int listMatchPubsubPattern(void *a, void *b) { pubsubPattern *pa = a, *pb = b; return (pa->client == pb->client) && - (compareStringObjects(pa->pattern,pb->pattern) == 0); + (equalStringObjects(pa->pattern,pb->pattern)); } /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or @@ -9955,6 +10256,180 @@ static void publishCommand(redisClient *c) { /* ================================= Debugging ============================== */ +/* Compute the sha1 of string at 's' with 'len' bytes long. + * The SHA1 is then xored againt the string pointed by digest. + * Since xor is commutative, this operation is used in order to + * "add" digests relative to unordered elements. + * + * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */ +static void xorDigest(unsigned char *digest, void *ptr, size_t len) { + SHA1_CTX ctx; + unsigned char hash[20], *s = ptr; + int j; + + SHA1Init(&ctx); + SHA1Update(&ctx,s,len); + SHA1Final(hash,&ctx); + + for (j = 0; j < 20; j++) + digest[j] ^= hash[j]; +} + +static void xorObjectDigest(unsigned char *digest, robj *o) { + o = getDecodedObject(o); + xorDigest(digest,o->ptr,sdslen(o->ptr)); + decrRefCount(o); +} + +/* This function instead of just computing the SHA1 and xoring it + * against diget, also perform the digest of "digest" itself and + * replace the old value with the new one. + * + * So the final digest will be: + * + * digest = SHA1(digest xor SHA1(data)) + * + * This function is used every time we want to preserve the order so + * that digest(a,b,c,d) will be different than digest(b,c,d,a) + * + * Also note that mixdigest("foo") followed by mixdigest("bar") + * will lead to a different digest compared to "fo", "obar". + */ +static void mixDigest(unsigned char *digest, void *ptr, size_t len) { + SHA1_CTX ctx; + char *s = ptr; + + xorDigest(digest,s,len); + SHA1Init(&ctx); + SHA1Update(&ctx,digest,20); + SHA1Final(digest,&ctx); +} + +static void mixObjectDigest(unsigned char *digest, robj *o) { + o = getDecodedObject(o); + mixDigest(digest,o->ptr,sdslen(o->ptr)); + decrRefCount(o); +} + +/* Compute the dataset digest. Since keys, sets elements, hashes elements + * are not ordered, we use a trick: every aggregate digest is the xor + * of the digests of their elements. This way the order will not change + * the result. For list instead we use a feedback entering the output digest + * as input in order to ensure that a different ordered list will result in + * a different digest. */ +static void computeDatasetDigest(unsigned char *final) { + unsigned char digest[20]; + char buf[128]; + dictIterator *di = NULL; + dictEntry *de; + int j; + uint32_t aux; + + memset(final,0,20); /* Start with a clean result */ + + for (j = 0; j < server.dbnum; j++) { + redisDb *db = server.db+j; + + if (dictSize(db->dict) == 0) continue; + di = dictGetIterator(db->dict); + + /* hash the DB id, so the same dataset moved in a different + * DB will lead to a different digest */ + aux = htonl(j); + mixDigest(final,&aux,sizeof(aux)); + + /* Iterate this DB writing every entry */ + while((de = dictNext(di)) != NULL) { + robj *key, *o; + time_t expiretime; + + memset(digest,0,20); /* This key-val digest */ + key = dictGetEntryKey(de); + mixObjectDigest(digest,key); + if (!server.vm_enabled || key->storage == REDIS_VM_MEMORY || + key->storage == REDIS_VM_SWAPPING) { + o = dictGetEntryVal(de); + incrRefCount(o); + } else { + o = vmPreviewObject(key); + } + aux = htonl(o->type); + mixDigest(digest,&aux,sizeof(aux)); + expiretime = getExpire(db,key); + + /* Save the key and associated value */ + if (o->type == REDIS_STRING) { + mixObjectDigest(digest,o); + } else if (o->type == REDIS_LIST) { + list *list = o->ptr; + listNode *ln; + listIter li; + + listRewind(list,&li); + while((ln = listNext(&li))) { + robj *eleobj = listNodeValue(ln); + + mixObjectDigest(digest,eleobj); + } + } else if (o->type == REDIS_SET) { + dict *set = o->ptr; + dictIterator *di = dictGetIterator(set); + dictEntry *de; + + while((de = dictNext(di)) != NULL) { + robj *eleobj = dictGetEntryKey(de); + + xorObjectDigest(digest,eleobj); + } + dictReleaseIterator(di); + } else if (o->type == REDIS_ZSET) { + zset *zs = o->ptr; + dictIterator *di = dictGetIterator(zs->dict); + dictEntry *de; + + while((de = dictNext(di)) != NULL) { + robj *eleobj = dictGetEntryKey(de); + double *score = dictGetEntryVal(de); + unsigned char eledigest[20]; + + snprintf(buf,sizeof(buf),"%.17g",*score); + memset(eledigest,0,20); + mixObjectDigest(eledigest,eleobj); + mixDigest(eledigest,buf,strlen(buf)); + xorDigest(digest,eledigest,20); + } + dictReleaseIterator(di); + } else if (o->type == REDIS_HASH) { + hashIterator *hi; + robj *obj; + + hi = hashInitIterator(o); + while (hashNext(hi) != REDIS_ERR) { + unsigned char eledigest[20]; + + memset(eledigest,0,20); + obj = hashCurrent(hi,REDIS_HASH_KEY); + mixObjectDigest(eledigest,obj); + decrRefCount(obj); + obj = hashCurrent(hi,REDIS_HASH_VALUE); + mixObjectDigest(eledigest,obj); + decrRefCount(obj); + xorDigest(digest,eledigest,20); + } + hashReleaseIterator(hi); + } else { + redisPanic("Unknown object type"); + } + decrRefCount(o); + /* If the key has an expire, add it to the mix */ + if (expiretime != -1) xorDigest(digest,"!!expire!!",10); + /* We can finally xor the key-val digest to the final digest */ + xorDigest(final,digest,20); + } + dictReleaseIterator(di); + } +} + static void debugCommand(redisClient *c) { if (!strcasecmp(c->argv[1]->ptr,"segfault")) { *((char*)-1) = 'x'; @@ -10043,6 +10518,36 @@ static void debugCommand(redisClient *c) { } else { addReply(c,shared.err); } + } else if (!strcasecmp(c->argv[1]->ptr,"populate") && c->argc == 3) { + long keys, j; + robj *key, *val; + char buf[128]; + + if (getLongFromObjectOrReply(c, c->argv[2], &keys, NULL) != REDIS_OK) + return; + for (j = 0; j < keys; j++) { + snprintf(buf,sizeof(buf),"key:%lu",j); + key = createStringObject(buf,strlen(buf)); + if (lookupKeyRead(c->db,key) != NULL) { + decrRefCount(key); + continue; + } + snprintf(buf,sizeof(buf),"value:%lu",j); + val = createStringObject(buf,strlen(buf)); + dictAdd(c->db->dict,key,val); + } + addReply(c,shared.ok); + } else if (!strcasecmp(c->argv[1]->ptr,"digest") && c->argc == 2) { + unsigned char digest[20]; + sds d = sdsnew("+"); + int j; + + computeDatasetDigest(digest); + for (j = 0; j < 20; j++) + d = sdscatprintf(d, "%02x",digest[j]); + + d = sdscatlen(d,"\r\n",2); + addReplySds(c,d); } else { addReplySds(c,sdsnew( "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT |SWAPIN |SWAPOUT |RELOAD]\r\n"));