From: antirez Date: Thu, 28 Oct 2010 20:59:47 +0000 (+0200) Subject: merge conflict resolved X-Git-Url: https://git.saurik.com/redis.git/commitdiff_plain/21dbc6499a538af07f52a41742cf1683f3fc9c23?hp=4794d88f15ee4e4175e712b411bb7cebee7aff09 merge conflict resolved --- diff --git a/README b/README index 1f0a1fe6..9ea1456f 100644 --- a/README +++ b/README @@ -27,6 +27,23 @@ After you build Redis is a good idea to test it, using: % make test +Buliding using tcmalloc +----------------------- + +tcmalloc is a fast and space efficient implementation (for little objects) +of malloc(). Compiling Redis with it can improve performances and memeory +usage. You can read more about it here: + +http://goog-perftools.sourceforge.net/doc/tcmalloc.html + +In order to compile Redis with tcmalloc support install tcmalloc on your system +and then use: + + % make USE_TCMALLOC=yes + +Note that you can pass any other target to make, as long as you append +USE_TCMALLOC=yes at the end. + Running Redis ------------- diff --git a/redis.conf b/redis.conf index b087417a..8d69f929 100644 --- a/redis.conf +++ b/redis.conf @@ -148,6 +148,25 @@ dir ./ # # maxmemory +# MAXMEMORY POLICY: how Redis will select what to remove when maxmemory +# is reached? You can select among five behavior: +# +# volatile-lru -> remove the key with an expire set using an LRU algorithm +# allkeys-lru -> remove any key accordingly to the LRU algorithm +# volatile-random -> remove a random key with an expire set +# allkeys->random -> remove a random key, any key +# volatile-ttl -> remove the key with the nearest expire time (minor TTL) +# +# maxmemory-policy volatile-lru + +# LRU and minimal TTL algorithms are not precise algorithms but approximated +# algorithms (in order to save memory), so you can select as well the sample +# size to check. For instance for default Redis will check three keys and +# pick the one that was used less recently, you can change the sample size +# using the following configuration directive. +# +# maxmemory-samples 3 + ############################## APPEND ONLY MODE ############################### # By default Redis asynchronously dumps the dataset on disk. If you can live diff --git a/src/Makefile b/src/Makefile index 3add2925..ba6ecf4d 100644 --- a/src/Makefile +++ b/src/Makefile @@ -12,6 +12,11 @@ else CFLAGS?= -std=c99 -pedantic $(OPTIMIZATION) -Wall -W $(ARCH) $(PROF) CCLINK?= -lm -pthread endif + +ifeq ($(USE_TCMALLOC),yes) + CCLINK+= -ltcmalloc + CFLAGS+= -DUSE_TCMALLOC +endif CCOPT= $(CFLAGS) $(CCLINK) $(ARCH) $(PROF) DEBUG?= -g -rdynamic -ggdb @@ -19,7 +24,7 @@ PREFIX= /usr/local INSTALL_BIN= $(PREFIX)/bin INSTALL= cp -p -OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o vm.o pubsub.o multi.o debug.o sort.o intset.o +OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o vm.o pubsub.o multi.o debug.o sort.o intset.o syncio.o BENCHOBJ = ae.o anet.o redis-benchmark.o sds.o adlist.o zmalloc.o CLIOBJ = anet.o sds.o adlist.o redis-cli.o zmalloc.o linenoise.o CHECKDUMPOBJ = redis-check-dump.o lzf_c.o lzf_d.o @@ -33,7 +38,6 @@ CHECKAOFPRGNAME = redis-check-aof all: redis-server redis-benchmark redis-cli redis-check-dump redis-check-aof - # Deps (use make dep to generate this) adlist.o: adlist.c adlist.h zmalloc.h ae.o: ae.c ae.h zmalloc.h config.h ae_kqueue.c @@ -43,6 +47,7 @@ ae_select.o: ae_select.c anet.o: anet.c fmacros.h anet.h aof.o: aof.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h +chprgname.o: chprgname.c config.o: config.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h db.o: db.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ @@ -80,6 +85,7 @@ sds.o: sds.c sds.h zmalloc.h sha1.o: sha1.c sha1.h sort.o: sort.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h pqsort.h +syncio.o: syncio.c t_hash.o: t_hash.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h t_list.o: t_list.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ diff --git a/src/aof.c b/src/aof.c index 36d97e70..4dbce394 100644 --- a/src/aof.c +++ b/src/aof.c @@ -311,55 +311,6 @@ fmterr: exit(1); } -/* Write binary-safe string into a file in the bulkformat - * $\r\n\r\n */ -int fwriteBulkString(FILE *fp, char *s, unsigned long len) { - char cbuf[128]; - int clen; - cbuf[0] = '$'; - clen = 1+ll2string(cbuf+1,sizeof(cbuf)-1,len); - cbuf[clen++] = '\r'; - cbuf[clen++] = '\n'; - if (fwrite(cbuf,clen,1,fp) == 0) return 0; - if (len > 0 && fwrite(s,len,1,fp) == 0) return 0; - if (fwrite("\r\n",2,1,fp) == 0) return 0; - return 1; -} - -/* Write a double value in bulk format $\r\n\r\n */ -int fwriteBulkDouble(FILE *fp, double d) { - char buf[128], dbuf[128]; - - snprintf(dbuf,sizeof(dbuf),"%.17g\r\n",d); - snprintf(buf,sizeof(buf),"$%lu\r\n",(unsigned long)strlen(dbuf)-2); - if (fwrite(buf,strlen(buf),1,fp) == 0) return 0; - if (fwrite(dbuf,strlen(dbuf),1,fp) == 0) return 0; - return 1; -} - -/* Write a long value in bulk format $\r\n\r\n */ -int fwriteBulkLongLong(FILE *fp, long long l) { - char bbuf[128], lbuf[128]; - unsigned int blen, llen; - llen = ll2string(lbuf,32,l); - blen = snprintf(bbuf,sizeof(bbuf),"$%u\r\n%s\r\n",llen,lbuf); - if (fwrite(bbuf,blen,1,fp) == 0) return 0; - return 1; -} - -/* Delegate writing an object to writing a bulk string or bulk long long. */ -int fwriteBulkObject(FILE *fp, robj *obj) { - /* Avoid using getDecodedObject to help copy-on-write (we are often - * in a child process when this function is called). */ - if (obj->encoding == REDIS_ENCODING_INT) { - return fwriteBulkLongLong(fp,(long)obj->ptr); - } else if (obj->encoding == REDIS_ENCODING_RAW) { - return fwriteBulkString(fp,obj->ptr,sdslen(obj->ptr)); - } else { - redisPanic("Unknown string encoding"); - } -} - /* Write a sequence of commands able to fully rebuild the dataset into * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */ int rewriteAppendOnlyFile(char *filename) { diff --git a/src/config.c b/src/config.c index c979162b..db58a236 100644 --- a/src/config.c +++ b/src/config.c @@ -123,6 +123,27 @@ void loadServerConfig(char *filename) { server.maxclients = atoi(argv[1]); } else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) { server.maxmemory = memtoll(argv[1],NULL); + } else if (!strcasecmp(argv[0],"maxmemory-policy") && argc == 2) { + if (!strcasecmp(argv[1],"volatile-lru")) { + server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU; + } else if (!strcasecmp(argv[1],"volatile-random")) { + server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_RANDOM; + } else if (!strcasecmp(argv[1],"volatile-ttl")) { + server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_TTL; + } else if (!strcasecmp(argv[1],"allkeys-lru")) { + server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_LRU; + } else if (!strcasecmp(argv[1],"allkeys-random")) { + server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_RANDOM; + } else { + err = "Invalid maxmemory policy"; + goto loaderr; + } + } else if (!strcasecmp(argv[0],"maxmemory-samples") && argc == 2) { + server.maxmemory_samples = atoi(argv[1]); + if (server.maxmemory_samples <= 0) { + err = "maxmemory-samples must be 1 or greater"; + goto loaderr; + } } else if (!strcasecmp(argv[0],"slaveof") && argc == 3) { server.masterhost = sdsnew(argv[1]); server.masterport = atoi(argv[2]); @@ -245,6 +266,24 @@ void configSetCommand(redisClient *c) { ll < 0) goto badfmt; server.maxmemory = ll; if (server.maxmemory) freeMemoryIfNeeded(); + } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory-policy")) { + if (!strcasecmp(o->ptr,"volatile-lru")) { + server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU; + } else if (!strcasecmp(o->ptr,"volatile-random")) { + server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_RANDOM; + } else if (!strcasecmp(o->ptr,"volatile-ttl")) { + server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_TTL; + } else if (!strcasecmp(o->ptr,"allkeys-lru")) { + server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_LRU; + } else if (!strcasecmp(o->ptr,"allkeys-random")) { + server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_RANDOM; + } else { + goto badfmt; + } + } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory-samples")) { + if (getLongLongFromObject(o,&ll) == REDIS_ERR || + ll <= 0) goto badfmt; + server.maxmemory_samples = ll; } else if (!strcasecmp(c->argv[2]->ptr,"timeout")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0 || ll > LONG_MAX) goto badfmt; @@ -332,6 +371,7 @@ void configGetCommand(redisClient *c) { robj *o = c->argv[2]; void *replylen = addDeferredMultiBulkLength(c); char *pattern = o->ptr; + char buf[128]; int matches = 0; redisAssert(o->encoding == REDIS_ENCODING_RAW); @@ -351,17 +391,34 @@ void configGetCommand(redisClient *c) { matches++; } if (stringmatch(pattern,"maxmemory",0)) { - char buf[128]; - - ll2string(buf,128,server.maxmemory); + ll2string(buf,sizeof(buf),server.maxmemory); addReplyBulkCString(c,"maxmemory"); addReplyBulkCString(c,buf); matches++; } - if (stringmatch(pattern,"timeout",0)) { - char buf[128]; + if (stringmatch(pattern,"maxmemory-policy",0)) { + char *s; - ll2string(buf,128,server.maxidletime); + switch(server.maxmemory_policy) { + case REDIS_MAXMEMORY_VOLATILE_LRU: s = "volatile-lru"; break; + case REDIS_MAXMEMORY_VOLATILE_TTL: s = "volatile-ttl"; break; + case REDIS_MAXMEMORY_VOLATILE_RANDOM: s = "volatile-random"; break; + case REDIS_MAXMEMORY_ALLKEYS_LRU: s = "allkeys-lru"; break; + case REDIS_MAXMEMORY_ALLKEYS_RANDOM: s = "allkeys-random"; break; + default: s = "unknown"; break; /* too harmless to panic */ + } + addReplyBulkCString(c,"maxmemory-policy"); + addReplyBulkCString(c,s); + matches++; + } + if (stringmatch(pattern,"maxmemory-samples",0)) { + ll2string(buf,sizeof(buf),server.maxmemory_samples); + addReplyBulkCString(c,"maxmemory-samples"); + addReplyBulkCString(c,buf); + matches++; + } + if (stringmatch(pattern,"timeout",0)) { + ll2string(buf,sizeof(buf),server.maxidletime); addReplyBulkCString(c,"timeout"); addReplyBulkCString(c,buf); matches++; @@ -417,10 +474,11 @@ void configCommand(redisClient *c) { configGetCommand(c); } else if (!strcasecmp(c->argv[1]->ptr,"resetstat")) { if (c->argc != 2) goto badarity; + server.stat_keyspace_hits = 0; + server.stat_keyspace_misses = 0; server.stat_numcommands = 0; server.stat_numconnections = 0; server.stat_expiredkeys = 0; - server.stat_starttime = time(NULL); addReply(c,shared.ok); } else { addReplyError(c, diff --git a/src/config.h b/src/config.h index e2d84818..40f22fa5 100644 --- a/src/config.h +++ b/src/config.h @@ -5,8 +5,17 @@ #include #endif -/* test for malloc_size() */ -#ifdef __APPLE__ +/* Use tcmalloc's malloc_size() when available. + * When tcmalloc is used, native OSX malloc_size() may never be used because + * this expects a different allocation scheme. Therefore, *exclusively* use + * either tcmalloc or OSX's malloc_size()! */ +#if defined(USE_TCMALLOC) +#include +#if TC_VERSION_MAJOR >= 1 && TC_VERSION_MINOR >= 6 +#define HAVE_MALLOC_SIZE 1 +#define redis_malloc_size(p) tc_malloc_size(p) +#endif +#elif defined(__APPLE__) #include #define HAVE_MALLOC_SIZE 1 #define redis_malloc_size(p) malloc_size(p) diff --git a/src/db.c b/src/db.c index 44507847..a39a03bb 100644 --- a/src/db.c +++ b/src/db.c @@ -11,6 +11,9 @@ robj *lookupKey(redisDb *db, robj *key) { if (de) { robj *val = dictGetEntryVal(de); + /* Update the access time for the aging algorithm. */ + val->lru = server.lruclock; + if (server.vm_enabled) { if (val->storage == REDIS_VM_MEMORY || val->storage == REDIS_VM_SWAPPING) @@ -18,8 +21,6 @@ robj *lookupKey(redisDb *db, robj *key) { /* If we were swapping the object out, cancel the operation */ if (val->storage == REDIS_VM_SWAPPING) vmCancelThreadedIOJob(val); - /* Update the access time for the aging algorithm. */ - val->lru = server.lruclock; } else { int notify = (val->storage == REDIS_VM_LOADING); @@ -33,8 +34,10 @@ robj *lookupKey(redisDb *db, robj *key) { if (notify) handleClientsBlockedOnSwappedKey(db,key); } } + server.stat_keyspace_hits++; return val; } else { + server.stat_keyspace_misses++; return NULL; } } @@ -467,7 +470,6 @@ int expireIfNeeded(redisDb *db, robj *key) { /* Delete the key */ server.stat_expiredkeys++; - server.dirty++; propagateExpire(db,key); return dbDelete(db,key); } diff --git a/src/debug.c b/src/debug.c index 2f7ab58f..b364dd16 100644 --- a/src/debug.c +++ b/src/debug.c @@ -213,9 +213,11 @@ void debugCommand(redisClient *c) { strenc = strEncoding(val->encoding); addReplyStatusFormat(c, "Value at:%p refcount:%d " - "encoding:%s serializedlength:%lld", + "encoding:%s serializedlength:%lld " + "lru:%d lru_seconds_idle:%lu", (void*)val, val->refcount, - strenc, (long long) rdbSavedObjectLen(val,NULL)); + strenc, (long long) rdbSavedObjectLen(val,NULL), + val->lru, estimateObjectIdleTime(val)); } else { vmpointer *vp = (vmpointer*) val; addReplyStatusFormat(c, diff --git a/src/object.c b/src/object.c index c1a08245..b1eae963 100644 --- a/src/object.c +++ b/src/object.c @@ -19,14 +19,19 @@ robj *createObject(int type, void *ptr) { o->encoding = REDIS_ENCODING_RAW; o->ptr = ptr; o->refcount = 1; - if (server.vm_enabled) { - /* Note that this code may run in the context of an I/O thread - * and accessing server.lruclock in theory is an error - * (no locks). But in practice this is safe, and even if we read - * garbage Redis will not fail. */ - o->lru = server.lruclock; - o->storage = REDIS_VM_MEMORY; - } + /* Set the LRU to the current lruclock (minutes resolution). + * We do this regardless of the fact VM is active as LRU is also + * used for the maxmemory directive when Redis is used as cache. + * + * Note that this code may run in the context of an I/O thread + * and accessing server.lruclock in theory is an error + * (no locks). But in practice this is safe, and even if we read + * garbage Redis will not fail. */ + o->lru = server.lruclock; + /* The following is only needed if VM is active, but since the conditional + * is probably more costly than initializing the field it's better to + * have every field properly initialized anyway. */ + o->storage = REDIS_VM_MEMORY; return o; } @@ -240,8 +245,12 @@ robj *tryObjectEncoding(robj *o) { * range and if this is the main thread, since when VM is enabled we * have the constraint that I/O thread should only handle non-shared * objects, in order to avoid race conditions (we don't have per-object - * locking). */ - if (value >= 0 && value < REDIS_SHARED_INTEGERS && + * locking). + * + * Note that we also avoid using shared integers when maxmemory is used + * because very object needs to have a private LRU field for the LRU + * algorithm to work well. */ + if (server.maxmemory == 0 && value >= 0 && value < REDIS_SHARED_INTEGERS && pthread_equal(pthread_self(),server.mainthread)) { decrRefCount(o); incrRefCount(shared.integers[value]); @@ -433,3 +442,14 @@ char *strEncoding(int encoding) { default: return "unknown"; } } + +/* Given an object returns the min number of seconds the object was never + * requested, using an approximated LRU algorithm. */ +unsigned long estimateObjectIdleTime(robj *o) { + if (server.lruclock >= o->lru) { + return (server.lruclock - o->lru) * REDIS_LRU_CLOCK_RESOLUTION; + } else { + return ((REDIS_LRU_CLOCK_MAX - o->lru) + server.lruclock) * + REDIS_LRU_CLOCK_RESOLUTION; + } +} diff --git a/src/redis.c b/src/redis.c index 1f8d71a7..4aa19560 100644 --- a/src/redis.c +++ b/src/redis.c @@ -120,6 +120,7 @@ struct redisCommand readonlyCommandTable[] = { {"zinterstore",zinterstoreCommand,-4,REDIS_CMD_DENYOOM,zunionInterBlockClientOnSwappedKeys,0,0,0}, {"zrange",zrangeCommand,-4,0,NULL,1,1,1}, {"zrangebyscore",zrangebyscoreCommand,-4,0,NULL,1,1,1}, + {"zrevrangebyscore",zrevrangebyscoreCommand,-4,0,NULL,1,1,1}, {"zcount",zcountCommand,4,0,NULL,1,1,1}, {"zrevrange",zrevrangeCommand,-4,0,NULL,1,1,1}, {"zcard",zcardCommand,2,0,NULL,1,1,1}, @@ -478,6 +479,10 @@ void activeExpireCycle(void) { } } +void updateLRUClock(void) { + server.lruclock = (time(NULL)/REDIS_LRU_CLOCK_RESOLUTION) & + REDIS_LRU_CLOCK_MAX; +} int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { int j, loops = server.cronloops++; @@ -490,19 +495,19 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { * in objects at every object access, and accuracy is not needed. * To access a global var is faster than calling time(NULL) */ server.unixtime = time(NULL); - /* We have just 21 bits per object for LRU information. - * So we use an (eventually wrapping) LRU clock with minutes resolution. + /* We have just 22 bits per object for LRU information. + * So we use an (eventually wrapping) LRU clock with 10 seconds resolution. + * 2^22 bits with 10 seconds resoluton is more or less 1.5 years. * - * When we need to select what object to swap, we compute the minimum - * time distance between the current lruclock and the object last access - * lruclock info. Even if clocks will wrap on overflow, there is - * the interesting property that we are sure that at least - * ABS(A-B) minutes passed between current time and timestamp B. + * Note that even if this will wrap after 1.5 years it's not a problem, + * everything will still work but just some object will appear younger + * to Redis. But for this to happen a given object should never be touched + * for 1.5 years. * - * This is not precise but we don't need at all precision, but just - * something statistically reasonable. + * Note that you can change the resolution altering the + * REDIS_LRU_CLOCK_RESOLUTION define. */ - server.lruclock = (time(NULL)/60)&((1<<21)-1); + updateLRUClock(); /* We received a SIGTERM, shutting down here in a safe way, as it is * not ok doing so inside the signal handler. */ @@ -733,6 +738,8 @@ void initServerConfig() { server.maxclients = 0; server.blpop_blocked_clients = 0; server.maxmemory = 0; + server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU; + server.maxmemory_samples = 3; server.vm_enabled = 0; server.vm_swap_file = zstrdup("/tmp/redis-%p.vm"); server.vm_page_size = 256; /* 256 bytes per page */ @@ -747,6 +754,7 @@ void initServerConfig() { server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES; server.shutdown_asap = 0; + updateLRUClock(); resetServerSaveParams(); appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */ @@ -816,6 +824,8 @@ void initServer() { server.stat_numconnections = 0; server.stat_expiredkeys = 0; server.stat_starttime = time(NULL); + server.stat_keyspace_misses = 0; + server.stat_keyspace_hits = 0; server.unixtime = time(NULL); aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL); if (aeCreateFileEvent(server.el, server.fd, AE_READABLE, @@ -972,7 +982,7 @@ int prepareForShutdown() { /* Append only file: fsync() the AOF and exit */ aof_fsync(server.appendfd); if (server.vm_enabled) unlink(server.vm_swap_file); - } else { + } else if (server.saveparamslen > 0) { /* Snapshotting. Perform a SYNC SAVE and exit */ if (rdbSave(server.dbfilename) != REDIS_OK) { /* Ooops.. error saving! The best we can do is to continue @@ -983,6 +993,8 @@ int prepareForShutdown() { redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit"); return REDIS_ERR; } + } else { + redisLog(REDIS_WARNING,"Not saving DB."); } if (server.daemonize) unlink(server.pidfile); redisLog(REDIS_WARNING,"Server exit now, bye bye..."); @@ -1053,6 +1065,7 @@ sds genRedisInfoString(void) { "process_id:%ld\r\n" "uptime_in_seconds:%ld\r\n" "uptime_in_days:%ld\r\n" + "lru_clock:%ld\r\n" "used_cpu_sys:%.2f\r\n" "used_cpu_user:%.2f\r\n" "used_cpu_sys_childrens:%.2f\r\n" @@ -1063,6 +1076,7 @@ sds genRedisInfoString(void) { "used_memory:%zu\r\n" "used_memory_human:%s\r\n" "mem_fragmentation_ratio:%.2f\r\n" + "use_tcmalloc:%d\r\n" "changes_since_last_save:%lld\r\n" "bgsave_in_progress:%d\r\n" "last_save_time:%ld\r\n" @@ -1070,6 +1084,8 @@ sds genRedisInfoString(void) { "total_connections_received:%lld\r\n" "total_commands_processed:%lld\r\n" "expired_keys:%lld\r\n" + "keyspace_hits:%lld\r\n" + "keyspace_misses:%lld\r\n" "hash_max_zipmap_entries:%zu\r\n" "hash_max_zipmap_value:%zu\r\n" "pubsub_channels:%ld\r\n" @@ -1084,6 +1100,7 @@ sds genRedisInfoString(void) { (long) getpid(), uptime, uptime/(3600*24), + (unsigned long) server.lruclock, (float)self_ru.ru_utime.tv_sec+(float)self_ru.ru_utime.tv_usec/1000000, (float)self_ru.ru_stime.tv_sec+(float)self_ru.ru_stime.tv_usec/1000000, (float)c_ru.ru_utime.tv_sec+(float)c_ru.ru_utime.tv_usec/1000000, @@ -1094,6 +1111,11 @@ sds genRedisInfoString(void) { zmalloc_used_memory(), hmem, zmalloc_get_fragmentation_ratio(), +#ifdef USE_TCMALLOC + 1, +#else + 0, +#endif server.dirty, server.bgsavechildpid != -1, server.lastsave, @@ -1101,6 +1123,8 @@ sds genRedisInfoString(void) { server.stat_numconnections, server.stat_numcommands, server.stat_expiredkeys, + server.stat_keyspace_hits, + server.stat_keyspace_misses, server.hash_max_zipmap_entries, server.hash_max_zipmap_value, dictSize(server.pubsub_channels), @@ -1217,10 +1241,93 @@ int tryFreeOneObjectFromFreelist(void) { * memory usage. */ void freeMemoryIfNeeded(void) { + /* Remove keys accordingly to the active policy as long as we are + * over the memory limit. */ while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) { int j, k, freed = 0; + /* Basic strategy -- remove objects from the free list. */ if (tryFreeOneObjectFromFreelist() == REDIS_OK) continue; + + for (j = 0; j < server.dbnum; j++) { + long bestval; + sds bestkey = NULL; + struct dictEntry *de; + redisDb *db = server.db+j; + dict *dict; + + if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU || + server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM) + { + dict = server.db[j].dict; + } else { + dict = server.db[j].expires; + } + if (dictSize(dict) == 0) continue; + + /* volatile-random and allkeys-random policy */ + if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM || + server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_RANDOM) + { + de = dictGetRandomKey(dict); + bestkey = dictGetEntryKey(de); + } + + /* volatile-lru and allkeys-lru policy */ + else if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU || + server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU) + { + for (k = 0; k < server.maxmemory_samples; k++) { + sds thiskey; + long thisval; + robj *o; + + de = dictGetRandomKey(dict); + thiskey = dictGetEntryKey(de); + o = dictGetEntryVal(de); + thisval = estimateObjectIdleTime(o); + + /* Higher idle time is better candidate for deletion */ + if (bestkey == NULL || thisval > bestval) { + bestkey = thiskey; + bestval = thisval; + } + } + } + + /* volatile-ttl */ + else if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_TTL) { + for (k = 0; k < server.maxmemory_samples; k++) { + sds thiskey; + long thisval; + + de = dictGetRandomKey(dict); + thiskey = dictGetEntryKey(de); + thisval = (long) dictGetEntryVal(de); + + /* Expire sooner (minor expire unix timestamp) is better + * candidate for deletion */ + if (bestkey == NULL || thisval < bestval) { + bestkey = thiskey; + bestval = thisval; + } + } + } + + /* Finally remove the selected key. */ + if (bestkey) { + robj *keyobj = createStringObject(bestkey,sdslen(bestkey)); + dbDelete(db,keyobj); + server.stat_expiredkeys++; + decrRefCount(keyobj); + freed++; + } + } + if (!freed) return; /* nothing to free... */ + } + + while(0) { + int j, k, freed = 0; for (j = 0; j < server.dbnum; j++) { int minttl = -1; sds minkey = NULL; @@ -1388,6 +1495,7 @@ void segvHandler(int sig, siginfo_t *info, void *secret) { int i, trace_size = 0; ucontext_t *uc = (ucontext_t*) secret; sds infostring; + struct sigaction act; REDIS_NOTUSED(info); redisLog(REDIS_WARNING, @@ -1409,7 +1517,16 @@ void segvHandler(int sig, siginfo_t *info, void *secret) { /* free(messages); Don't call free() with possibly corrupted memory. */ if (server.daemonize) unlink(server.pidfile); - _exit(0); + + /* Make sure we exit with the right signal at the end. So for instance + * the core will be dumped if enabled. */ + sigemptyset (&act.sa_mask); + /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction + * is used. Otherwise, sa_handler is used */ + act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND; + act.sa_handler = SIG_DFL; + sigaction (sig, &act, NULL); + kill(getpid(),sig); } void sigtermHandler(int sig) { diff --git a/src/redis.h b/src/redis.h index 44857569..cf21447d 100644 --- a/src/redis.h +++ b/src/redis.h @@ -203,6 +203,13 @@ #define REDIS_OP_DIFF 1 #define REDIS_OP_INTER 2 +/* Redis maxmemory strategies */ +#define REDIS_MAXMEMORY_VOLATILE_LRU 0 +#define REDIS_MAXMEMORY_VOLATILE_TTL 1 +#define REDIS_MAXMEMORY_VOLATILE_RANDOM 2 +#define REDIS_MAXMEMORY_ALLKEYS_LRU 3 +#define REDIS_MAXMEMORY_ALLKEYS_RANDOM 4 + /* We can print the stacktrace, so our assert is defined this way: */ #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1))) #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1) @@ -216,6 +223,8 @@ void _redisPanic(char *msg, char *file, int line); /* A redis object, that is a type able to hold a string / list / set */ /* The actual Redis Object */ +#define REDIS_LRU_CLOCK_MAX ((1<<21)-1) /* Max value of obj->lru */ +#define REDIS_LRU_CLOCK_RESOLUTION 10 /* LRU clock resolution in seconds */ typedef struct redisObject { unsigned type:4; unsigned storage:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */ @@ -353,12 +362,14 @@ struct redisServer { aeEventLoop *el; int cronloops; /* number of times the cron function run */ list *objfreelist; /* A list of freed objects to avoid malloc() */ - time_t lastsave; /* Unix time of last save succeeede */ + time_t lastsave; /* Unix time of last save succeeede */ /* Fields used only for stats */ - time_t stat_starttime; /* server start time */ - long long stat_numcommands; /* number of processed commands */ - long long stat_numconnections; /* number of connections received */ - long long stat_expiredkeys; /* number of expired keys */ + time_t stat_starttime; /* server start time */ + long long stat_numcommands; /* number of processed commands */ + long long stat_numconnections; /* number of connections received */ + long long stat_expiredkeys; /* number of expired keys */ + long long stat_keyspace_hits; /* number of successful lookups of keys */ + long long stat_keyspace_misses; /* number of failed lookups of keys */ /* Configuration */ int verbosity; int glueoutputbuf; @@ -395,6 +406,8 @@ struct redisServer { int replstate; unsigned int maxclients; unsigned long long maxmemory; + int maxmemory_policy; + int maxmemory_samples; unsigned int blpop_blocked_clients; unsigned int vm_blocked_clients; /* Sort parameters - qsort_r() is only available under BSD so we @@ -683,6 +696,16 @@ int getLongLongFromObject(robj *o, long long *target); char *strEncoding(int encoding); int compareStringObjects(robj *a, robj *b); int equalStringObjects(robj *a, robj *b); +unsigned long estimateObjectIdleTime(robj *o); + +/* Synchronous I/O with timeout */ +int syncWrite(int fd, char *ptr, ssize_t size, int timeout); +int syncRead(int fd, char *ptr, ssize_t size, int timeout); +int syncReadLine(int fd, char *ptr, ssize_t size, int timeout); +int fwriteBulkString(FILE *fp, char *s, unsigned long len); +int fwriteBulkDouble(FILE *fp, double d); +int fwriteBulkLongLong(FILE *fp, long long l); +int fwriteBulkObject(FILE *fp, robj *obj); /* Replication */ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc); @@ -901,6 +924,7 @@ void zaddCommand(redisClient *c); void zincrbyCommand(redisClient *c); void zrangeCommand(redisClient *c); void zrangebyscoreCommand(redisClient *c); +void zrevrangebyscoreCommand(redisClient *c); void zcountCommand(redisClient *c); void zrevrangeCommand(redisClient *c); void zcardCommand(redisClient *c); diff --git a/src/replication.c b/src/replication.c index 8c629006..7687206a 100644 --- a/src/replication.c +++ b/src/replication.c @@ -110,68 +110,6 @@ void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc) decrRefCount(cmdobj); } -int syncWrite(int fd, char *ptr, ssize_t size, int timeout) { - ssize_t nwritten, ret = size; - time_t start = time(NULL); - - timeout++; - while(size) { - if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) { - nwritten = write(fd,ptr,size); - if (nwritten == -1) return -1; - ptr += nwritten; - size -= nwritten; - } - if ((time(NULL)-start) > timeout) { - errno = ETIMEDOUT; - return -1; - } - } - return ret; -} - -int syncRead(int fd, char *ptr, ssize_t size, int timeout) { - ssize_t nread, totread = 0; - time_t start = time(NULL); - - timeout++; - while(size) { - if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) { - nread = read(fd,ptr,size); - if (nread <= 0) return -1; - ptr += nread; - size -= nread; - totread += nread; - } - if ((time(NULL)-start) > timeout) { - errno = ETIMEDOUT; - return -1; - } - } - return totread; -} - -int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) { - ssize_t nread = 0; - - size--; - while(size) { - char c; - - if (syncRead(fd,&c,1,timeout) == -1) return -1; - if (c == '\n') { - *ptr = '\0'; - if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0'; - return nread; - } else { - *ptr++ = c; - *ptr = '\0'; - nread++; - } - } - return nread; -} - void syncCommand(redisClient *c) { /* ignore SYNC if aleady slave or in monitor mode */ if (c->flags & REDIS_SLAVE) return; diff --git a/src/syncio.c b/src/syncio.c new file mode 100644 index 00000000..28ac1811 --- /dev/null +++ b/src/syncio.c @@ -0,0 +1,154 @@ +/* Synchronous socket and file I/O operations useful across the core. + * + * Copyright (c) 2009-2010, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "redis.h" + +/* ----------------- Blocking sockets I/O with timeouts --------------------- */ + +/* Redis performs most of the I/O in a nonblocking way, with the exception + * of the SYNC command where the slave does it in a blocking way, and + * the MIGRATE command that must be blocking in order to be atomic from the + * point of view of the two instances (one migrating the key and one receiving + * the key). This is why need the following blocking I/O functions. */ + +int syncWrite(int fd, char *ptr, ssize_t size, int timeout) { + ssize_t nwritten, ret = size; + time_t start = time(NULL); + + timeout++; + while(size) { + if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) { + nwritten = write(fd,ptr,size); + if (nwritten == -1) return -1; + ptr += nwritten; + size -= nwritten; + } + if ((time(NULL)-start) > timeout) { + errno = ETIMEDOUT; + return -1; + } + } + return ret; +} + +int syncRead(int fd, char *ptr, ssize_t size, int timeout) { + ssize_t nread, totread = 0; + time_t start = time(NULL); + + timeout++; + while(size) { + if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) { + nread = read(fd,ptr,size); + if (nread <= 0) return -1; + ptr += nread; + size -= nread; + totread += nread; + } + if ((time(NULL)-start) > timeout) { + errno = ETIMEDOUT; + return -1; + } + } + return totread; +} + +int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) { + ssize_t nread = 0; + + size--; + while(size) { + char c; + + if (syncRead(fd,&c,1,timeout) == -1) return -1; + if (c == '\n') { + *ptr = '\0'; + if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0'; + return nread; + } else { + *ptr++ = c; + *ptr = '\0'; + nread++; + } + } + return nread; +} + +/* ----------------- Blocking sockets I/O with timeouts --------------------- */ + +/* Write binary-safe string into a file in the bulkformat + * $\r\n\r\n */ +int fwriteBulkString(FILE *fp, char *s, unsigned long len) { + char cbuf[128]; + int clen; + cbuf[0] = '$'; + clen = 1+ll2string(cbuf+1,sizeof(cbuf)-1,len); + cbuf[clen++] = '\r'; + cbuf[clen++] = '\n'; + if (fwrite(cbuf,clen,1,fp) == 0) return 0; + if (len > 0 && fwrite(s,len,1,fp) == 0) return 0; + if (fwrite("\r\n",2,1,fp) == 0) return 0; + return 1; +} + +/* Write a double value in bulk format $\r\n\r\n */ +int fwriteBulkDouble(FILE *fp, double d) { + char buf[128], dbuf[128]; + + snprintf(dbuf,sizeof(dbuf),"%.17g\r\n",d); + snprintf(buf,sizeof(buf),"$%lu\r\n",(unsigned long)strlen(dbuf)-2); + if (fwrite(buf,strlen(buf),1,fp) == 0) return 0; + if (fwrite(dbuf,strlen(dbuf),1,fp) == 0) return 0; + return 1; +} + +/* Write a long value in bulk format $\r\n\r\n */ +int fwriteBulkLongLong(FILE *fp, long long l) { + char bbuf[128], lbuf[128]; + unsigned int blen, llen; + llen = ll2string(lbuf,32,l); + blen = snprintf(bbuf,sizeof(bbuf),"$%u\r\n%s\r\n",llen,lbuf); + if (fwrite(bbuf,blen,1,fp) == 0) return 0; + return 1; +} + +/* Delegate writing an object to writing a bulk string or bulk long long. */ +int fwriteBulkObject(FILE *fp, robj *obj) { + /* Avoid using getDecodedObject to help copy-on-write (we are often + * in a child process when this function is called). */ + if (obj->encoding == REDIS_ENCODING_INT) { + return fwriteBulkLongLong(fp,(long)obj->ptr); + } else if (obj->encoding == REDIS_ENCODING_RAW) { + return fwriteBulkString(fp,obj->ptr,sdslen(obj->ptr)); + } else { + redisPanic("Unknown string encoding"); + } +} + + diff --git a/src/t_hash.c b/src/t_hash.c index 5cef1cab..071b7754 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -310,6 +310,7 @@ void hmgetCommand(redisClient *c) { o = lookupKeyRead(c->db,c->argv[1]); if (o != NULL && o->type != REDIS_HASH) { addReply(c,shared.wrongtypeerr); + return; } /* Note the check for o != NULL happens inside the loop. This is diff --git a/src/t_zset.c b/src/t_zset.c index ba05c278..8139b53d 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -174,25 +174,35 @@ int zslDelete(zskiplist *zsl, double score, robj *obj) { return 0; /* not found */ } +/* Struct to hold a inclusive/exclusive range spec. */ +typedef struct { + double min, max; + int minex, maxex; /* are min or max exclusive? */ +} zrangespec; + /* Delete all the elements with score between min and max from the skiplist. * Min and mx are inclusive, so a score >= min || score <= max is deleted. * Note that this function takes the reference to the hash table view of the * sorted set, in order to remove the elements from the hash table too. */ -unsigned long zslDeleteRangeByScore(zskiplist *zsl, double min, double max, dict *dict) { +unsigned long zslDeleteRangeByScore(zskiplist *zsl, zrangespec range, dict *dict) { zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; unsigned long removed = 0; int i; x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { - while (x->level[i].forward && x->level[i].forward->score < min) - x = x->level[i].forward; + while (x->level[i].forward && (range.minex ? + x->level[i].forward->score <= range.min : + x->level[i].forward->score < range.min)) + x = x->level[i].forward; update[i] = x; } - /* We may have multiple elements with the same score, what we need - * is to find the element with both the right score and object. */ + + /* Current node is the last with score < or <= min. */ x = x->level[0].forward; - while (x && x->score <= max) { + + /* Delete nodes while in range. */ + while (x && (range.maxex ? x->score < range.max : x->score <= range.max)) { zskiplistNode *next = x->level[0].forward; zslDeleteNode(zsl,x,update); dictDelete(dict,x->obj); @@ -200,7 +210,7 @@ unsigned long zslDeleteRangeByScore(zskiplist *zsl, double min, double max, dict removed++; x = next; } - return removed; /* not found */ + return removed; } /* Delete all the elements with rank between start and end from the skiplist. @@ -296,6 +306,44 @@ zskiplistNode* zslistTypeGetElementByRank(zskiplist *zsl, unsigned long rank) { return NULL; } +/* Populate the rangespec according to the objects min and max. */ +static int zslParseRange(robj *min, robj *max, zrangespec *spec) { + char *eptr; + spec->minex = spec->maxex = 0; + + /* Parse the min-max interval. If one of the values is prefixed + * by the "(" character, it's considered "open". For instance + * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max + * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */ + if (min->encoding == REDIS_ENCODING_INT) { + spec->min = (long)min->ptr; + } else { + if (((char*)min->ptr)[0] == '(') { + spec->min = strtod((char*)min->ptr+1,&eptr); + if (eptr[0] != '\0' || isnan(spec->min)) return REDIS_ERR; + spec->minex = 1; + } else { + spec->min = strtod((char*)min->ptr,&eptr); + if (eptr[0] != '\0' || isnan(spec->min)) return REDIS_ERR; + } + } + if (max->encoding == REDIS_ENCODING_INT) { + spec->max = (long)max->ptr; + } else { + if (((char*)max->ptr)[0] == '(') { + spec->max = strtod((char*)max->ptr+1,&eptr); + if (eptr[0] != '\0' || isnan(spec->max)) return REDIS_ERR; + spec->maxex = 1; + } else { + spec->max = strtod((char*)max->ptr,&eptr); + if (eptr[0] != '\0' || isnan(spec->max)) return REDIS_ERR; + } + } + + return REDIS_OK; +} + + /*----------------------------------------------------------------------------- * Sorted set commands *----------------------------------------------------------------------------*/ @@ -435,20 +483,22 @@ void zremCommand(redisClient *c) { } void zremrangebyscoreCommand(redisClient *c) { - double min; - double max; + zrangespec range; long deleted; - robj *zsetobj; + robj *o; zset *zs; - if ((getDoubleFromObjectOrReply(c, c->argv[2], &min, NULL) != REDIS_OK) || - (getDoubleFromObjectOrReply(c, c->argv[3], &max, NULL) != REDIS_OK)) return; + /* Parse the range arguments. */ + if (zslParseRange(c->argv[2],c->argv[3],&range) != REDIS_OK) { + addReplyError(c,"min or max is not a double"); + return; + } - if ((zsetobj = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL || - checkType(c,zsetobj,REDIS_ZSET)) return; + if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL || + checkType(c,o,REDIS_ZSET)) return; - zs = zsetobj->ptr; - deleted = zslDeleteRangeByScore(zs->zsl,min,max,zs->dict); + zs = o->ptr; + deleted = zslDeleteRangeByScore(zs->zsl,range,zs->dict); if (htNeedsResize(zs->dict)) dictResize(zs->dict); if (dictSize(zs->dict) == 0) dbDelete(c->db,c->argv[1]); if (deleted) touchWatchedKey(c->db,c->argv[1]); @@ -635,13 +685,13 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) { dictEntry *other = dictFind(src[j].dict,dictGetEntryKey(de)); if (other) { value = src[j].weight * zunionInterDictValue(other); - zunionInterAggregate(&score, value, aggregate); + zunionInterAggregate(&score,value,aggregate); } else { break; } } - /* accept entry only when present in every source dict */ + /* Only continue when present in every source dict. */ if (j == setnum) { robj *o = dictGetEntryKey(de); znode = zslInsert(dstzset->zsl,score,o); @@ -663,6 +713,8 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) { /* skip key when already processed */ if (dictFind(dstzset->dict,dictGetEntryKey(de)) != NULL) continue; + + /* initialize score */ score = src[i].weight * zunionInterDictValue(de); /* because the zsets are sorted by size, its only possible @@ -671,7 +723,7 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) { dictEntry *other = dictFind(src[j].dict,dictGetEntryKey(de)); if (other) { value = src[j].weight * zunionInterDictValue(other); - zunionInterAggregate(&score, value, aggregate); + zunionInterAggregate(&score,value,aggregate); } } @@ -783,125 +835,156 @@ void zrevrangeCommand(redisClient *c) { zrangeGenericCommand(c,1); } -/* This command implements both ZRANGEBYSCORE and ZCOUNT. - * If justcount is non-zero, just the count is returned. */ -void genericZrangebyscoreCommand(redisClient *c, int justcount) { - robj *o; - double min, max; - int minex = 0, maxex = 0; /* are min or max exclusive? */ +/* This command implements ZRANGEBYSCORE, ZREVRANGEBYSCORE and ZCOUNT. + * If "justcount", only the number of elements in the range is returned. */ +void genericZrangebyscoreCommand(redisClient *c, int reverse, int justcount) { + zrangespec range; + robj *o, *emptyreply; + zset *zsetobj; + zskiplist *zsl; + zskiplistNode *ln; int offset = 0, limit = -1; int withscores = 0; - int badsyntax = 0; + unsigned long rangelen = 0; + void *replylen = NULL; - /* Parse the min-max interval. If one of the values is prefixed - * by the "(" character, it's considered "open". For instance - * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max - * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */ - if (((char*)c->argv[2]->ptr)[0] == '(') { - min = strtod((char*)c->argv[2]->ptr+1,NULL); - minex = 1; - } else { - min = strtod(c->argv[2]->ptr,NULL); - } - if (((char*)c->argv[3]->ptr)[0] == '(') { - max = strtod((char*)c->argv[3]->ptr+1,NULL); - maxex = 1; - } else { - max = strtod(c->argv[3]->ptr,NULL); + /* Parse the range arguments. */ + if (zslParseRange(c->argv[2],c->argv[3],&range) != REDIS_OK) { + addReplyError(c,"min or max is not a double"); + return; } - /* Parse "WITHSCORES": note that if the command was called with - * the name ZCOUNT then we are sure that c->argc == 4, so we'll never - * enter the following paths to parse WITHSCORES and LIMIT. */ - if (c->argc == 5 || c->argc == 8) { - if (strcasecmp(c->argv[c->argc-1]->ptr,"withscores") == 0) - withscores = 1; - else - badsyntax = 1; + /* Parse optional extra arguments. Note that ZCOUNT will exactly have + * 4 arguments, so we'll never enter the following code path. */ + if (c->argc > 4) { + int remaining = c->argc - 4; + int pos = 4; + + while (remaining) { + if (remaining >= 1 && !strcasecmp(c->argv[pos]->ptr,"withscores")) { + pos++; remaining--; + withscores = 1; + } else if (remaining >= 3 && !strcasecmp(c->argv[pos]->ptr,"limit")) { + offset = atoi(c->argv[pos+1]->ptr); + limit = atoi(c->argv[pos+2]->ptr); + pos += 3; remaining -= 3; + } else { + addReply(c,shared.syntaxerr); + return; + } + } } - if (c->argc != (4 + withscores) && c->argc != (7 + withscores)) - badsyntax = 1; - if (badsyntax) { - addReplyError(c,"wrong number of arguments for ZRANGEBYSCORE"); - return; + + /* Ok, lookup the key and get the range */ + emptyreply = justcount ? shared.czero : shared.emptymultibulk; + if ((o = lookupKeyReadOrReply(c,c->argv[1],emptyreply)) == NULL || + checkType(c,o,REDIS_ZSET)) return; + zsetobj = o->ptr; + zsl = zsetobj->zsl; + + /* If reversed, assume the elements are sorted from high to low score. */ + ln = zslFirstWithScore(zsl,range.min); + if (reverse) { + /* If range.min is out of range, ln will be NULL and we need to use + * the tail of the skiplist as first node of the range. */ + if (ln == NULL) ln = zsl->tail; + + /* zslFirstWithScore returns the first element with where with + * score >= range.min, so backtrack to make sure the element we use + * here has score <= range.min. */ + while (ln && ln->score > range.min) ln = ln->backward; + + /* Move to the right element according to the range spec. */ + if (range.minex) { + /* Find last element with score < range.min */ + while (ln && ln->score == range.min) ln = ln->backward; + } else { + /* Find last element with score <= range.min */ + while (ln && ln->level[0].forward && + ln->level[0].forward->score == range.min) + ln = ln->level[0].forward; + } + } else { + if (range.minex) { + /* Find first element with score > range.min */ + while (ln && ln->score == range.min) ln = ln->level[0].forward; + } } - /* Parse "LIMIT" */ - if (c->argc == (7 + withscores) && strcasecmp(c->argv[4]->ptr,"limit")) { - addReply(c,shared.syntaxerr); + /* No "first" element in the specified interval. */ + if (ln == NULL) { + addReply(c,emptyreply); return; - } else if (c->argc == (7 + withscores)) { - offset = atoi(c->argv[5]->ptr); - limit = atoi(c->argv[6]->ptr); - if (offset < 0) offset = 0; } - /* Ok, lookup the key and get the range */ - o = lookupKeyRead(c->db,c->argv[1]); - if (o == NULL) { - addReply(c,justcount ? shared.czero : shared.emptymultibulk); - } else { - if (o->type != REDIS_ZSET) { - addReply(c,shared.wrongtypeerr); - } else { - zset *zsetobj = o->ptr; - zskiplist *zsl = zsetobj->zsl; - zskiplistNode *ln; - robj *ele; - void *replylen = NULL; - unsigned long rangelen = 0; - - /* Get the first node with the score >= min, or with - * score > min if 'minex' is true. */ - ln = zslFirstWithScore(zsl,min); - while (minex && ln && ln->score == min) ln = ln->level[0].forward; - - if (ln == NULL) { - /* No element matching the speciifed interval */ - addReply(c,justcount ? shared.czero : shared.emptymultibulk); - return; - } + /* We don't know in advance how many matching elements there + * are in the list, so we push this object that will represent + * the multi-bulk length in the output buffer, and will "fix" + * it later */ + if (!justcount) + replylen = addDeferredMultiBulkLength(c); + + /* If there is an offset, just traverse the number of elements without + * checking the score because that is done in the next loop. */ + while(ln && offset--) { + if (reverse) + ln = ln->backward; + else + ln = ln->level[0].forward; + } - /* We don't know in advance how many matching elements there - * are in the list, so we push this object that will represent - * the multi-bulk length in the output buffer, and will "fix" - * it later */ - if (!justcount) - replylen = addDeferredMultiBulkLength(c); - - while(ln && (maxex ? (ln->score < max) : (ln->score <= max))) { - if (offset) { - offset--; - ln = ln->level[0].forward; - continue; - } - if (limit == 0) break; - if (!justcount) { - ele = ln->obj; - addReplyBulk(c,ele); - if (withscores) - addReplyDouble(c,ln->score); - } - ln = ln->level[0].forward; - rangelen++; - if (limit > 0) limit--; + while (ln && limit--) { + /* Check if this this element is in range. */ + if (reverse) { + if (range.maxex) { + /* Element should have score > range.max */ + if (ln->score <= range.max) break; + } else { + /* Element should have score >= range.max */ + if (ln->score < range.max) break; } - if (justcount) { - addReplyLongLong(c,(long)rangelen); + } else { + if (range.maxex) { + /* Element should have score < range.max */ + if (ln->score >= range.max) break; } else { - setDeferredMultiBulkLength(c,replylen, - withscores ? (rangelen*2) : rangelen); + /* Element should have score <= range.max */ + if (ln->score > range.max) break; } } + + /* Do our magic */ + rangelen++; + if (!justcount) { + addReplyBulk(c,ln->obj); + if (withscores) + addReplyDouble(c,ln->score); + } + + if (reverse) + ln = ln->backward; + else + ln = ln->level[0].forward; + } + + if (justcount) { + addReplyLongLong(c,(long)rangelen); + } else { + setDeferredMultiBulkLength(c,replylen, + withscores ? (rangelen*2) : rangelen); } } void zrangebyscoreCommand(redisClient *c) { - genericZrangebyscoreCommand(c,0); + genericZrangebyscoreCommand(c,0,0); +} + +void zrevrangebyscoreCommand(redisClient *c) { + genericZrangebyscoreCommand(c,1,0); } void zcountCommand(redisClient *c) { - genericZrangebyscoreCommand(c,1); + genericZrangebyscoreCommand(c,0,1); } void zcardCommand(redisClient *c) { diff --git a/src/version.h b/src/version.h index 80decef1..b139f9f8 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define REDIS_VERSION "2.1.4" +#define REDIS_VERSION "2.1.5" diff --git a/src/vm.c b/src/vm.c index ee831fb9..1aad95d7 100644 --- a/src/vm.c +++ b/src/vm.c @@ -362,7 +362,7 @@ robj *vmPreviewObject(robj *o) { double computeObjectSwappability(robj *o) { /* actual age can be >= minage, but not < minage. As we use wrapping * 21 bit clocks with minutes resolution for the LRU. */ - time_t minage = abs(server.lruclock - o->lru); + time_t minage = estimateObjectIdleTime(o); long asize = 0, elesize; robj *ele; list *l; diff --git a/src/ziplist.c b/src/ziplist.c index 4f44bd58..98ad3113 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -409,6 +409,7 @@ static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p /* Advance the cursor */ p += rawlen; + curlen += extra; } else { if (next.prevrawlensize > rawlensize) { /* This would result in shrinking, which we want to avoid. @@ -781,6 +782,10 @@ void ziplistRepr(unsigned char *zl) { #ifdef ZIPLIST_TEST_MAIN #include +#include "adlist.h" +#include "sds.h" + +#define debug(f, ...) { if (DEBUG) printf(f, __VA_ARGS__); } unsigned char *createList() { unsigned char *zl = ziplistNew(); @@ -864,6 +869,32 @@ void pop(unsigned char *zl, int where) { } } +void randstring(char *target, unsigned int min, unsigned int max) { + int p, len = min+rand()%(max-min+1); + int minval, maxval; + switch(rand() % 3) { + case 0: + minval = 0; + maxval = 255; + break; + case 1: + minval = 48; + maxval = 122; + break; + case 2: + minval = 48; + maxval = 52; + break; + default: + assert(NULL); + } + + while(p < len) + target[p++] = minval+rand()%(maxval-minval+1); + return; +} + + int main(int argc, char **argv) { unsigned char *zl, *p; unsigned char *entry; @@ -1137,10 +1168,10 @@ int main(int argc, char **argv) { /* Pop values again and compare their value. */ p = ziplistIndex(zl,0); assert(ziplistGet(p,&entry,&elen,&value)); - assert(strncmp(v1,entry,elen) == 0); + assert(strncmp(v1,(char*)entry,elen) == 0); p = ziplistIndex(zl,1); assert(ziplistGet(p,&entry,&elen,&value)); - assert(strncmp(v2,entry,elen) == 0); + assert(strncmp(v2,(char*)entry,elen) == 0); printf("SUCCESS\n\n"); } @@ -1192,50 +1223,78 @@ int main(int argc, char **argv) { printf("Stress with random payloads of different encoding:\n"); { - int i, idx, where, len; - long long v; + int i,j,len,where; unsigned char *p; - char buf[0x4041]; /* max length of generated string */ - zl = ziplistNew(); - for (i = 0; i < 100000; i++) { - where = (rand() & 1) ? ZIPLIST_HEAD : ZIPLIST_TAIL; - if (rand() & 1) { - /* equally likely create a 16, 32 or 64 bit int */ - v = (rand() & INT16_MAX) + ((1ll << 32) >> ((rand() % 3)*16)); - v *= 2*(rand() & 1)-1; /* randomly flip sign */ - sprintf(buf, "%lld", v); + char buf[1024]; + list *ref; + listNode *refnode; + + /* Hold temp vars from ziplist */ + unsigned char *sstr; + unsigned int slen; + long long sval; + + /* In the regression for the cascade bug, it was triggered + * with a random seed of 2. */ + srand(2); + + for (i = 0; i < 20000; i++) { + zl = ziplistNew(); + ref = listCreate(); + listSetFreeMethod(ref,sdsfree); + len = rand() % 256; + + /* Create lists */ + for (j = 0; j < len; j++) { + where = (rand() & 1) ? ZIPLIST_HEAD : ZIPLIST_TAIL; + switch(rand() % 4) { + case 0: + sprintf(buf,"%lld",(0LL + rand()) >> 20); + break; + case 1: + sprintf(buf,"%lld",(0LL + rand())); + break; + case 2: + sprintf(buf,"%lld",(0LL + rand()) << 20); + break; + case 3: + randstring(buf,0,256); + break; + default: + assert(NULL); + } + + /* Add to ziplist */ zl = ziplistPush(zl, (unsigned char*)buf, strlen(buf), where); - } else { - /* equally likely generate 6, 14 or >14 bit length */ - v = rand() & 0x3f; - v += 0x4000 >> ((rand() % 3)*8); - memset(buf, 'x', v); - zl = ziplistPush(zl, (unsigned char*)buf, v, where); - } - /* delete a random element */ - if ((len = ziplistLen(zl)) >= 10) { - idx = rand() % len; - // printf("Delete index %d\n", idx); - // ziplistRepr(zl); - ziplistDeleteRange(zl, idx, 1); - // ziplistRepr(zl); - len--; + /* Add to reference list */ + if (where == ZIPLIST_HEAD) { + listAddNodeHead(ref,sdsnew(buf)); + } else if (where == ZIPLIST_TAIL) { + listAddNodeTail(ref,sdsnew(buf)); + } else { + assert(NULL); + } } - /* iterate from front to back */ - idx = 0; - p = ziplistIndex(zl, 0); - while((p = ziplistNext(zl,p))) - idx++; - assert(len == idx+1); - - /* iterate from back to front */ - idx = 0; - p = ziplistIndex(zl, -1); - while((p = ziplistPrev(zl,p))) - idx++; - assert(len == idx+1); + assert(listLength(ref) == ziplistLen(zl)); + for (j = 0; j < len; j++) { + /* Naive way to get elements, but similar to the stresser + * executed from the Tcl test suite. */ + p = ziplistIndex(zl,j); + refnode = listIndex(ref,j); + + assert(ziplistGet(p,&sstr,&slen,&sval)); + if (sstr == NULL) { + sprintf(buf,"%lld",sval); + } else { + memcpy(buf,sstr,slen); + buf[slen] = '\0'; + } + assert(strcmp(buf,listNodeValue(refnode)) == 0); + } + zfree(zl); + listRelease(ref); } printf("SUCCESS\n\n"); } diff --git a/src/zmalloc.c b/src/zmalloc.c index 544155e7..8cfb18d9 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -32,13 +32,24 @@ #include #include #include - #include "config.h" +#ifdef HAVE_MALLOC_SIZE +#define PREFIX_SIZE (0) +#else #if defined(__sun) -#define PREFIX_SIZE sizeof(long long) +#define PREFIX_SIZE (sizeof(long long)) #else -#define PREFIX_SIZE sizeof(size_t) +#define PREFIX_SIZE (sizeof(size_t)) +#endif +#endif + +/* Explicitly override malloc/free etc when using tcmalloc. */ +#if defined(USE_TCMALLOC) +#define malloc(size) tc_malloc(size) +#define calloc(count,size) tc_calloc(count,size) +#define realloc(ptr,size) tc_realloc(ptr,size) +#define free(ptr) tc_free(ptr) #endif #define increment_used_memory(__n) do { \ diff --git a/tests/unit/type/hash.tcl b/tests/unit/type/hash.tcl index 2c0bd534..8559dc3c 100644 --- a/tests/unit/type/hash.tcl +++ b/tests/unit/type/hash.tcl @@ -140,6 +140,11 @@ start_server {tags {"hash"}} { set _ $rv } {{{} {}} {{} {}} {{} {}}} + test {HMGET against wrong type} { + r set wrongtype somevalue + assert_error "*wrong*" {r hmget wrongtype field1 field2} + } + test {HMGET - small hash} { set keys {} set vals {} diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl index 642922e9..6b8fc54a 100644 --- a/tests/unit/type/zset.tcl +++ b/tests/unit/type/zset.tcl @@ -199,26 +199,65 @@ start_server {tags {"zset"}} { list $v1 $v2 [r zscore zset foo] [r zscore zset bar] } {{bar foo} {foo bar} -2 6} - test {ZRANGEBYSCORE and ZCOUNT basics} { - r del zset - r zadd zset 1 a - r zadd zset 2 b - r zadd zset 3 c - r zadd zset 4 d - r zadd zset 5 e - list [r zrangebyscore zset 2 4] [r zrangebyscore zset (2 (4] \ - [r zcount zset 2 4] [r zcount zset (2 (4] - } {{b c d} c 3 1} - - test {ZRANGEBYSCORE withscores} { - r del zset - r zadd zset 1 a - r zadd zset 2 b - r zadd zset 3 c - r zadd zset 4 d - r zadd zset 5 e - r zrangebyscore zset 2 4 withscores - } {b 2 c 3 d 4} + proc create_default_zset {} { + create_zset zset {-inf a 1 b 2 c 3 d 4 e 5 f +inf g} + } + + test "ZRANGEBYSCORE/ZREVRANGEBYSCORE/ZCOUNT basics" { + create_default_zset + + # inclusive range + assert_equal {a b c} [r zrangebyscore zset -inf 2] + assert_equal {b c d} [r zrangebyscore zset 0 3] + assert_equal {d e f} [r zrangebyscore zset 3 6] + assert_equal {e f g} [r zrangebyscore zset 4 +inf] + assert_equal {c b a} [r zrevrangebyscore zset 2 -inf] + assert_equal {d c b} [r zrevrangebyscore zset 3 0] + assert_equal {f e d} [r zrevrangebyscore zset 6 3] + assert_equal {g f e} [r zrevrangebyscore zset +inf 4] + assert_equal 3 [r zcount zset 0 3] + + # exclusive range + assert_equal {b} [r zrangebyscore zset (-inf (2] + assert_equal {b c} [r zrangebyscore zset (0 (3] + assert_equal {e f} [r zrangebyscore zset (3 (6] + assert_equal {f} [r zrangebyscore zset (4 (+inf] + assert_equal {b} [r zrevrangebyscore zset (2 (-inf] + assert_equal {c b} [r zrevrangebyscore zset (3 (0] + assert_equal {f e} [r zrevrangebyscore zset (6 (3] + assert_equal {f} [r zrevrangebyscore zset (+inf (4] + assert_equal 2 [r zcount zset (0 (3] + } + + test "ZRANGEBYSCORE with WITHSCORES" { + create_default_zset + assert_equal {b 1 c 2 d 3} [r zrangebyscore zset 0 3 withscores] + assert_equal {d 3 c 2 b 1} [r zrevrangebyscore zset 3 0 withscores] + } + + test "ZRANGEBYSCORE with LIMIT" { + create_default_zset + assert_equal {b c} [r zrangebyscore zset 0 10 LIMIT 0 2] + assert_equal {d e f} [r zrangebyscore zset 0 10 LIMIT 2 3] + assert_equal {d e f} [r zrangebyscore zset 0 10 LIMIT 2 10] + assert_equal {} [r zrangebyscore zset 0 10 LIMIT 20 10] + assert_equal {f e} [r zrevrangebyscore zset 10 0 LIMIT 0 2] + assert_equal {d c b} [r zrevrangebyscore zset 10 0 LIMIT 2 3] + assert_equal {d c b} [r zrevrangebyscore zset 10 0 LIMIT 2 10] + assert_equal {} [r zrevrangebyscore zset 10 0 LIMIT 20 10] + } + + test "ZRANGEBYSCORE with LIMIT and WITHSCORES" { + create_default_zset + assert_equal {e 4 f 5} [r zrangebyscore zset 2 5 LIMIT 2 3 WITHSCORES] + assert_equal {d 3 c 2} [r zrevrangebyscore zset 5 2 LIMIT 2 3 WITHSCORES] + } + + test "ZRANGEBYSCORE with non-value min or max" { + assert_error "*not a double*" {r zrangebyscore fooz str 1} + assert_error "*not a double*" {r zrangebyscore fooz 1 str} + assert_error "*not a double*" {r zrangebyscore fooz 1 NaN} + } tags {"slow"} { test {ZRANGEBYSCORE fuzzy test, 100 ranges in 1000 elements sorted set} { @@ -302,49 +341,62 @@ start_server {tags {"zset"}} { } {} } - test {ZRANGEBYSCORE with LIMIT} { - r del zset - r zadd zset 1 a - r zadd zset 2 b - r zadd zset 3 c - r zadd zset 4 d - r zadd zset 5 e - list \ - [r zrangebyscore zset 0 10 LIMIT 0 2] \ - [r zrangebyscore zset 0 10 LIMIT 2 3] \ - [r zrangebyscore zset 0 10 LIMIT 2 10] \ - [r zrangebyscore zset 0 10 LIMIT 20 10] - } {{a b} {c d e} {c d e} {}} - - test {ZRANGEBYSCORE with LIMIT and withscores} { - r del zset - r zadd zset 10 a - r zadd zset 20 b - r zadd zset 30 c - r zadd zset 40 d - r zadd zset 50 e - r zrangebyscore zset 20 50 LIMIT 2 3 withscores - } {d 40 e 50} - - test {ZREMRANGEBYSCORE basics} { - r del zset - r zadd zset 1 a - r zadd zset 2 b - r zadd zset 3 c - r zadd zset 4 d - r zadd zset 5 e - list [r zremrangebyscore zset 2 4] [r zrange zset 0 -1] - } {3 {a e}} - - test {ZREMRANGEBYSCORE from -inf to +inf} { - r del zset - r zadd zset 1 a - r zadd zset 2 b - r zadd zset 3 c - r zadd zset 4 d - r zadd zset 5 e - list [r zremrangebyscore zset -inf +inf] [r zrange zset 0 -1] - } {5 {}} + test "ZREMRANGEBYSCORE basics" { + proc remrangebyscore {min max} { + create_zset zset {1 a 2 b 3 c 4 d 5 e} + r zremrangebyscore zset $min $max + } + + # inner range + assert_equal 3 [remrangebyscore 2 4] + assert_equal {a e} [r zrange zset 0 -1] + + # start underflow + assert_equal 1 [remrangebyscore -10 1] + assert_equal {b c d e} [r zrange zset 0 -1] + + # end overflow + assert_equal 1 [remrangebyscore 5 10] + assert_equal {a b c d} [r zrange zset 0 -1] + + # switch min and max + assert_equal 0 [remrangebyscore 4 2] + assert_equal {a b c d e} [r zrange zset 0 -1] + + # -inf to mid + assert_equal 3 [remrangebyscore -inf 3] + assert_equal {d e} [r zrange zset 0 -1] + + # mid to +inf + assert_equal 3 [remrangebyscore 3 +inf] + assert_equal {a b} [r zrange zset 0 -1] + + # -inf to +inf + assert_equal 5 [remrangebyscore -inf +inf] + assert_equal {} [r zrange zset 0 -1] + + # exclusive min + assert_equal 4 [remrangebyscore (1 5] + assert_equal {a} [r zrange zset 0 -1] + assert_equal 3 [remrangebyscore (2 5] + assert_equal {a b} [r zrange zset 0 -1] + + # exclusive max + assert_equal 4 [remrangebyscore 1 (5] + assert_equal {e} [r zrange zset 0 -1] + assert_equal 3 [remrangebyscore 1 (4] + assert_equal {d e} [r zrange zset 0 -1] + + # exclusive min and max + assert_equal 3 [remrangebyscore (1 (5] + assert_equal {a e} [r zrange zset 0 -1] + } + + test "ZREMRANGEBYSCORE with non-value min or max" { + assert_error "*not a double*" {r zremrangebyscore fooz str 1} + assert_error "*not a double*" {r zremrangebyscore fooz 1 str} + assert_error "*not a double*" {r zremrangebyscore fooz 1 NaN} + } test "ZREMRANGEBYRANK basics" { proc remrangebyrank {min max} {