From: antirez Date: Thu, 28 Oct 2010 12:12:25 +0000 (+0200) Subject: Merge remote branch 'remotes/pietern/zrevrangebyscore' X-Git-Url: https://git.saurik.com/redis.git/commitdiff_plain/73abd0a9d2a956af34351c59e15eba603ab29c6e?hp=7236fdb22f7fdb60833c32121b0828f11a13883c Merge remote branch 'remotes/pietern/zrevrangebyscore' --- diff --git a/README b/README index 1f0a1fe6..9ea1456f 100644 --- a/README +++ b/README @@ -27,6 +27,23 @@ After you build Redis is a good idea to test it, using: % make test +Buliding using tcmalloc +----------------------- + +tcmalloc is a fast and space efficient implementation (for little objects) +of malloc(). Compiling Redis with it can improve performances and memeory +usage. You can read more about it here: + +http://goog-perftools.sourceforge.net/doc/tcmalloc.html + +In order to compile Redis with tcmalloc support install tcmalloc on your system +and then use: + + % make USE_TCMALLOC=yes + +Note that you can pass any other target to make, as long as you append +USE_TCMALLOC=yes at the end. + Running Redis ------------- diff --git a/redis.conf b/redis.conf index b087417a..8d69f929 100644 --- a/redis.conf +++ b/redis.conf @@ -148,6 +148,25 @@ dir ./ # # maxmemory +# MAXMEMORY POLICY: how Redis will select what to remove when maxmemory +# is reached? You can select among five behavior: +# +# volatile-lru -> remove the key with an expire set using an LRU algorithm +# allkeys-lru -> remove any key accordingly to the LRU algorithm +# volatile-random -> remove a random key with an expire set +# allkeys->random -> remove a random key, any key +# volatile-ttl -> remove the key with the nearest expire time (minor TTL) +# +# maxmemory-policy volatile-lru + +# LRU and minimal TTL algorithms are not precise algorithms but approximated +# algorithms (in order to save memory), so you can select as well the sample +# size to check. For instance for default Redis will check three keys and +# pick the one that was used less recently, you can change the sample size +# using the following configuration directive. +# +# maxmemory-samples 3 + ############################## APPEND ONLY MODE ############################### # By default Redis asynchronously dumps the dataset on disk. If you can live diff --git a/src/Makefile b/src/Makefile index e1e989c6..6f09200e 100644 --- a/src/Makefile +++ b/src/Makefile @@ -12,6 +12,11 @@ else CFLAGS?= -std=c99 -pedantic $(OPTIMIZATION) -Wall -W $(ARCH) $(PROF) CCLINK?= -lm -pthread endif + +ifeq ($(USE_TCMALLOC),yes) + CCLINK+= -ltcmalloc + CFLAGS+= -DUSE_TCMALLOC +endif CCOPT= $(CFLAGS) $(CCLINK) $(ARCH) $(PROF) DEBUG?= -g -rdynamic -ggdb @@ -19,7 +24,7 @@ PREFIX= /usr/local INSTALL_BIN= $(PREFIX)/bin INSTALL= cp -p -OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o vm.o pubsub.o multi.o debug.o sort.o intset.o +OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o vm.o pubsub.o multi.o debug.o sort.o intset.o syncio.o BENCHOBJ = ae.o anet.o redis-benchmark.o sds.o adlist.o zmalloc.o CLIOBJ = anet.o sds.o adlist.o redis-cli.o zmalloc.o linenoise.o CHECKDUMPOBJ = redis-check-dump.o lzf_c.o lzf_d.o @@ -33,7 +38,6 @@ CHECKAOFPRGNAME = redis-check-aof all: redis-server redis-benchmark redis-cli redis-check-dump redis-check-aof - # Deps (use make dep to generate this) adlist.o: adlist.c adlist.h zmalloc.h ae.o: ae.c ae.h zmalloc.h config.h ae_kqueue.c @@ -43,6 +47,7 @@ ae_select.o: ae_select.c anet.o: anet.c fmacros.h anet.h aof.o: aof.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h +chprgname.o: chprgname.c config.o: config.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h db.o: db.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ @@ -80,6 +85,7 @@ sds.o: sds.c sds.h zmalloc.h sha1.o: sha1.c sha1.h sort.o: sort.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h pqsort.h +syncio.o: syncio.c t_hash.o: t_hash.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h t_list.o: t_list.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ diff --git a/src/aof.c b/src/aof.c index 16713481..f7a0c453 100644 --- a/src/aof.c +++ b/src/aof.c @@ -314,55 +314,6 @@ fmterr: exit(1); } -/* Write binary-safe string into a file in the bulkformat - * $\r\n\r\n */ -int fwriteBulkString(FILE *fp, char *s, unsigned long len) { - char cbuf[128]; - int clen; - cbuf[0] = '$'; - clen = 1+ll2string(cbuf+1,sizeof(cbuf)-1,len); - cbuf[clen++] = '\r'; - cbuf[clen++] = '\n'; - if (fwrite(cbuf,clen,1,fp) == 0) return 0; - if (len > 0 && fwrite(s,len,1,fp) == 0) return 0; - if (fwrite("\r\n",2,1,fp) == 0) return 0; - return 1; -} - -/* Write a double value in bulk format $\r\n\r\n */ -int fwriteBulkDouble(FILE *fp, double d) { - char buf[128], dbuf[128]; - - snprintf(dbuf,sizeof(dbuf),"%.17g\r\n",d); - snprintf(buf,sizeof(buf),"$%lu\r\n",(unsigned long)strlen(dbuf)-2); - if (fwrite(buf,strlen(buf),1,fp) == 0) return 0; - if (fwrite(dbuf,strlen(dbuf),1,fp) == 0) return 0; - return 1; -} - -/* Write a long value in bulk format $\r\n\r\n */ -int fwriteBulkLongLong(FILE *fp, long long l) { - char bbuf[128], lbuf[128]; - unsigned int blen, llen; - llen = ll2string(lbuf,32,l); - blen = snprintf(bbuf,sizeof(bbuf),"$%u\r\n%s\r\n",llen,lbuf); - if (fwrite(bbuf,blen,1,fp) == 0) return 0; - return 1; -} - -/* Delegate writing an object to writing a bulk string or bulk long long. */ -int fwriteBulkObject(FILE *fp, robj *obj) { - /* Avoid using getDecodedObject to help copy-on-write (we are often - * in a child process when this function is called). */ - if (obj->encoding == REDIS_ENCODING_INT) { - return fwriteBulkLongLong(fp,(long)obj->ptr); - } else if (obj->encoding == REDIS_ENCODING_RAW) { - return fwriteBulkString(fp,obj->ptr,sdslen(obj->ptr)); - } else { - redisPanic("Unknown string encoding"); - } -} - /* Write a sequence of commands able to fully rebuild the dataset into * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */ int rewriteAppendOnlyFile(char *filename) { diff --git a/src/config.c b/src/config.c index 1bd678c7..9655917c 100644 --- a/src/config.c +++ b/src/config.c @@ -123,6 +123,27 @@ void loadServerConfig(char *filename) { server.maxclients = atoi(argv[1]); } else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) { server.maxmemory = memtoll(argv[1],NULL); + } else if (!strcasecmp(argv[0],"maxmemory-policy") && argc == 2) { + if (!strcasecmp(argv[1],"volatile-lru")) { + server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU; + } else if (!strcasecmp(argv[1],"volatile-random")) { + server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_RANDOM; + } else if (!strcasecmp(argv[1],"volatile-ttl")) { + server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_TTL; + } else if (!strcasecmp(argv[1],"allkeys-lru")) { + server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_LRU; + } else if (!strcasecmp(argv[1],"allkeys-random")) { + server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_RANDOM; + } else { + err = "Invalid maxmemory policy"; + goto loaderr; + } + } else if (!strcasecmp(argv[0],"maxmemory-samples") && argc == 2) { + server.maxmemory_samples = atoi(argv[1]); + if (server.maxmemory_samples <= 0) { + err = "maxmemory-samples must be 1 or greater"; + goto loaderr; + } } else if (!strcasecmp(argv[0],"slaveof") && argc == 3) { server.masterhost = sdsnew(argv[1]); server.masterport = atoi(argv[2]); @@ -242,6 +263,24 @@ void configSetCommand(redisClient *c) { ll < 0) goto badfmt; server.maxmemory = ll; if (server.maxmemory) freeMemoryIfNeeded(); + } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory-policy")) { + if (!strcasecmp(o->ptr,"volatile-lru")) { + server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU; + } else if (!strcasecmp(o->ptr,"volatile-random")) { + server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_RANDOM; + } else if (!strcasecmp(o->ptr,"volatile-ttl")) { + server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_TTL; + } else if (!strcasecmp(o->ptr,"allkeys-lru")) { + server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_LRU; + } else if (!strcasecmp(o->ptr,"allkeys-random")) { + server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_RANDOM; + } else { + goto badfmt; + } + } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory-samples")) { + if (getLongLongFromObject(o,&ll) == REDIS_ERR || + ll <= 0) goto badfmt; + server.maxmemory_samples = ll; } else if (!strcasecmp(c->argv[2]->ptr,"timeout")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0 || ll > LONG_MAX) goto badfmt; @@ -333,6 +372,7 @@ void configGetCommand(redisClient *c) { robj *o = getDecodedObject(c->argv[2]); void *replylen = addDeferredMultiBulkLength(c); char *pattern = o->ptr; + char buf[128]; int matches = 0; if (stringmatch(pattern,"dbfilename",0)) { @@ -351,17 +391,34 @@ void configGetCommand(redisClient *c) { matches++; } if (stringmatch(pattern,"maxmemory",0)) { - char buf[128]; - - ll2string(buf,128,server.maxmemory); + ll2string(buf,sizeof(buf),server.maxmemory); addReplyBulkCString(c,"maxmemory"); addReplyBulkCString(c,buf); matches++; } - if (stringmatch(pattern,"timeout",0)) { - char buf[128]; + if (stringmatch(pattern,"maxmemory-policy",0)) { + char *s; - ll2string(buf,128,server.maxidletime); + switch(server.maxmemory_policy) { + case REDIS_MAXMEMORY_VOLATILE_LRU: s = "volatile-lru"; break; + case REDIS_MAXMEMORY_VOLATILE_TTL: s = "volatile-ttl"; break; + case REDIS_MAXMEMORY_VOLATILE_RANDOM: s = "volatile-random"; break; + case REDIS_MAXMEMORY_ALLKEYS_LRU: s = "allkeys-lru"; break; + case REDIS_MAXMEMORY_ALLKEYS_RANDOM: s = "allkeys-random"; break; + default: s = "unknown"; break; /* too harmless to panic */ + } + addReplyBulkCString(c,"maxmemory-policy"); + addReplyBulkCString(c,s); + matches++; + } + if (stringmatch(pattern,"maxmemory-samples",0)) { + ll2string(buf,sizeof(buf),server.maxmemory_samples); + addReplyBulkCString(c,"maxmemory-samples"); + addReplyBulkCString(c,buf); + matches++; + } + if (stringmatch(pattern,"timeout",0)) { + ll2string(buf,sizeof(buf),server.maxidletime); addReplyBulkCString(c,"timeout"); addReplyBulkCString(c,buf); matches++; @@ -418,10 +475,11 @@ void configCommand(redisClient *c) { configGetCommand(c); } else if (!strcasecmp(c->argv[1]->ptr,"resetstat")) { if (c->argc != 2) goto badarity; + server.stat_keyspace_hits = 0; + server.stat_keyspace_misses = 0; server.stat_numcommands = 0; server.stat_numconnections = 0; server.stat_expiredkeys = 0; - server.stat_starttime = time(NULL); addReply(c,shared.ok); } else { addReplyError(c, diff --git a/src/config.h b/src/config.h index e2d84818..40f22fa5 100644 --- a/src/config.h +++ b/src/config.h @@ -5,8 +5,17 @@ #include #endif -/* test for malloc_size() */ -#ifdef __APPLE__ +/* Use tcmalloc's malloc_size() when available. + * When tcmalloc is used, native OSX malloc_size() may never be used because + * this expects a different allocation scheme. Therefore, *exclusively* use + * either tcmalloc or OSX's malloc_size()! */ +#if defined(USE_TCMALLOC) +#include +#if TC_VERSION_MAJOR >= 1 && TC_VERSION_MINOR >= 6 +#define HAVE_MALLOC_SIZE 1 +#define redis_malloc_size(p) tc_malloc_size(p) +#endif +#elif defined(__APPLE__) #include #define HAVE_MALLOC_SIZE 1 #define redis_malloc_size(p) malloc_size(p) diff --git a/src/db.c b/src/db.c index 44507847..a39a03bb 100644 --- a/src/db.c +++ b/src/db.c @@ -11,6 +11,9 @@ robj *lookupKey(redisDb *db, robj *key) { if (de) { robj *val = dictGetEntryVal(de); + /* Update the access time for the aging algorithm. */ + val->lru = server.lruclock; + if (server.vm_enabled) { if (val->storage == REDIS_VM_MEMORY || val->storage == REDIS_VM_SWAPPING) @@ -18,8 +21,6 @@ robj *lookupKey(redisDb *db, robj *key) { /* If we were swapping the object out, cancel the operation */ if (val->storage == REDIS_VM_SWAPPING) vmCancelThreadedIOJob(val); - /* Update the access time for the aging algorithm. */ - val->lru = server.lruclock; } else { int notify = (val->storage == REDIS_VM_LOADING); @@ -33,8 +34,10 @@ robj *lookupKey(redisDb *db, robj *key) { if (notify) handleClientsBlockedOnSwappedKey(db,key); } } + server.stat_keyspace_hits++; return val; } else { + server.stat_keyspace_misses++; return NULL; } } @@ -467,7 +470,6 @@ int expireIfNeeded(redisDb *db, robj *key) { /* Delete the key */ server.stat_expiredkeys++; - server.dirty++; propagateExpire(db,key); return dbDelete(db,key); } diff --git a/src/debug.c b/src/debug.c index 2f7ab58f..b364dd16 100644 --- a/src/debug.c +++ b/src/debug.c @@ -213,9 +213,11 @@ void debugCommand(redisClient *c) { strenc = strEncoding(val->encoding); addReplyStatusFormat(c, "Value at:%p refcount:%d " - "encoding:%s serializedlength:%lld", + "encoding:%s serializedlength:%lld " + "lru:%d lru_seconds_idle:%lu", (void*)val, val->refcount, - strenc, (long long) rdbSavedObjectLen(val,NULL)); + strenc, (long long) rdbSavedObjectLen(val,NULL), + val->lru, estimateObjectIdleTime(val)); } else { vmpointer *vp = (vmpointer*) val; addReplyStatusFormat(c, diff --git a/src/object.c b/src/object.c index c1a08245..b1eae963 100644 --- a/src/object.c +++ b/src/object.c @@ -19,14 +19,19 @@ robj *createObject(int type, void *ptr) { o->encoding = REDIS_ENCODING_RAW; o->ptr = ptr; o->refcount = 1; - if (server.vm_enabled) { - /* Note that this code may run in the context of an I/O thread - * and accessing server.lruclock in theory is an error - * (no locks). But in practice this is safe, and even if we read - * garbage Redis will not fail. */ - o->lru = server.lruclock; - o->storage = REDIS_VM_MEMORY; - } + /* Set the LRU to the current lruclock (minutes resolution). + * We do this regardless of the fact VM is active as LRU is also + * used for the maxmemory directive when Redis is used as cache. + * + * Note that this code may run in the context of an I/O thread + * and accessing server.lruclock in theory is an error + * (no locks). But in practice this is safe, and even if we read + * garbage Redis will not fail. */ + o->lru = server.lruclock; + /* The following is only needed if VM is active, but since the conditional + * is probably more costly than initializing the field it's better to + * have every field properly initialized anyway. */ + o->storage = REDIS_VM_MEMORY; return o; } @@ -240,8 +245,12 @@ robj *tryObjectEncoding(robj *o) { * range and if this is the main thread, since when VM is enabled we * have the constraint that I/O thread should only handle non-shared * objects, in order to avoid race conditions (we don't have per-object - * locking). */ - if (value >= 0 && value < REDIS_SHARED_INTEGERS && + * locking). + * + * Note that we also avoid using shared integers when maxmemory is used + * because very object needs to have a private LRU field for the LRU + * algorithm to work well. */ + if (server.maxmemory == 0 && value >= 0 && value < REDIS_SHARED_INTEGERS && pthread_equal(pthread_self(),server.mainthread)) { decrRefCount(o); incrRefCount(shared.integers[value]); @@ -433,3 +442,14 @@ char *strEncoding(int encoding) { default: return "unknown"; } } + +/* Given an object returns the min number of seconds the object was never + * requested, using an approximated LRU algorithm. */ +unsigned long estimateObjectIdleTime(robj *o) { + if (server.lruclock >= o->lru) { + return (server.lruclock - o->lru) * REDIS_LRU_CLOCK_RESOLUTION; + } else { + return ((REDIS_LRU_CLOCK_MAX - o->lru) + server.lruclock) * + REDIS_LRU_CLOCK_RESOLUTION; + } +} diff --git a/src/redis.c b/src/redis.c index 8f208926..08bba542 100644 --- a/src/redis.c +++ b/src/redis.c @@ -479,6 +479,10 @@ void activeExpireCycle(void) { } } +void updateLRUClock(void) { + server.lruclock = (time(NULL)/REDIS_LRU_CLOCK_RESOLUTION) & + REDIS_LRU_CLOCK_MAX; +} int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { int j, loops = server.cronloops++; @@ -491,19 +495,19 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { * in objects at every object access, and accuracy is not needed. * To access a global var is faster than calling time(NULL) */ server.unixtime = time(NULL); - /* We have just 21 bits per object for LRU information. - * So we use an (eventually wrapping) LRU clock with minutes resolution. + /* We have just 22 bits per object for LRU information. + * So we use an (eventually wrapping) LRU clock with 10 seconds resolution. + * 2^22 bits with 10 seconds resoluton is more or less 1.5 years. * - * When we need to select what object to swap, we compute the minimum - * time distance between the current lruclock and the object last access - * lruclock info. Even if clocks will wrap on overflow, there is - * the interesting property that we are sure that at least - * ABS(A-B) minutes passed between current time and timestamp B. + * Note that even if this will wrap after 1.5 years it's not a problem, + * everything will still work but just some object will appear younger + * to Redis. But for this to happen a given object should never be touched + * for 1.5 years. * - * This is not precise but we don't need at all precision, but just - * something statistically reasonable. + * Note that you can change the resolution altering the + * REDIS_LRU_CLOCK_RESOLUTION define. */ - server.lruclock = (time(NULL)/60)&((1<<21)-1); + updateLRUClock(); /* We received a SIGTERM, shutting down here in a safe way, as it is * not ok doing so inside the signal handler. */ @@ -734,6 +738,8 @@ void initServerConfig() { server.maxclients = 0; server.blpop_blocked_clients = 0; server.maxmemory = 0; + server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU; + server.maxmemory_samples = 3; server.vm_enabled = 0; server.vm_swap_file = zstrdup("/tmp/redis-%p.vm"); server.vm_page_size = 256; /* 256 bytes per page */ @@ -748,6 +754,7 @@ void initServerConfig() { server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES; server.shutdown_asap = 0; + updateLRUClock(); resetServerSaveParams(); appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */ @@ -817,6 +824,8 @@ void initServer() { server.stat_numconnections = 0; server.stat_expiredkeys = 0; server.stat_starttime = time(NULL); + server.stat_keyspace_misses = 0; + server.stat_keyspace_hits = 0; server.unixtime = time(NULL); aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL); if (aeCreateFileEvent(server.el, server.fd, AE_READABLE, @@ -1085,7 +1094,7 @@ int prepareForShutdown() { /* Append only file: fsync() the AOF and exit */ aof_fsync(server.appendfd); if (server.vm_enabled) unlink(server.vm_swap_file); - } else { + } else if (server.saveparamslen > 0) { /* Snapshotting. Perform a SYNC SAVE and exit */ if (rdbSave(server.dbfilename) != REDIS_OK) { /* Ooops.. error saving! The best we can do is to continue @@ -1096,6 +1105,8 @@ int prepareForShutdown() { redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit"); return REDIS_ERR; } + } else { + redisLog(REDIS_WARNING,"Not saving DB."); } if (server.daemonize) unlink(server.pidfile); redisLog(REDIS_WARNING,"Server exit now, bye bye..."); @@ -1166,6 +1177,7 @@ sds genRedisInfoString(void) { "process_id:%ld\r\n" "uptime_in_seconds:%ld\r\n" "uptime_in_days:%ld\r\n" + "lru_clock:%ld\r\n" "used_cpu_sys:%.2f\r\n" "used_cpu_user:%.2f\r\n" "used_cpu_sys_childrens:%.2f\r\n" @@ -1176,6 +1188,7 @@ sds genRedisInfoString(void) { "used_memory:%zu\r\n" "used_memory_human:%s\r\n" "mem_fragmentation_ratio:%.2f\r\n" + "use_tcmalloc:%d\r\n" "changes_since_last_save:%lld\r\n" "bgsave_in_progress:%d\r\n" "last_save_time:%ld\r\n" @@ -1183,6 +1196,8 @@ sds genRedisInfoString(void) { "total_connections_received:%lld\r\n" "total_commands_processed:%lld\r\n" "expired_keys:%lld\r\n" + "keyspace_hits:%lld\r\n" + "keyspace_misses:%lld\r\n" "hash_max_zipmap_entries:%zu\r\n" "hash_max_zipmap_value:%zu\r\n" "pubsub_channels:%ld\r\n" @@ -1197,6 +1212,7 @@ sds genRedisInfoString(void) { (long) getpid(), uptime, uptime/(3600*24), + (unsigned long) server.lruclock, (float)self_ru.ru_utime.tv_sec+(float)self_ru.ru_utime.tv_usec/1000000, (float)self_ru.ru_stime.tv_sec+(float)self_ru.ru_stime.tv_usec/1000000, (float)c_ru.ru_utime.tv_sec+(float)c_ru.ru_utime.tv_usec/1000000, @@ -1207,6 +1223,11 @@ sds genRedisInfoString(void) { zmalloc_used_memory(), hmem, zmalloc_get_fragmentation_ratio(), +#ifdef USE_TCMALLOC + 1, +#else + 0, +#endif server.dirty, server.bgsavechildpid != -1, server.lastsave, @@ -1214,6 +1235,8 @@ sds genRedisInfoString(void) { server.stat_numconnections, server.stat_numcommands, server.stat_expiredkeys, + server.stat_keyspace_hits, + server.stat_keyspace_misses, server.hash_max_zipmap_entries, server.hash_max_zipmap_value, dictSize(server.pubsub_channels), @@ -1330,10 +1353,93 @@ int tryFreeOneObjectFromFreelist(void) { * memory usage. */ void freeMemoryIfNeeded(void) { + /* Remove keys accordingly to the active policy as long as we are + * over the memory limit. */ while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) { int j, k, freed = 0; + /* Basic strategy -- remove objects from the free list. */ if (tryFreeOneObjectFromFreelist() == REDIS_OK) continue; + + for (j = 0; j < server.dbnum; j++) { + long bestval; + sds bestkey = NULL; + struct dictEntry *de; + redisDb *db = server.db+j; + dict *dict; + + if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU || + server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM) + { + dict = server.db[j].dict; + } else { + dict = server.db[j].expires; + } + if (dictSize(dict) == 0) continue; + + /* volatile-random and allkeys-random policy */ + if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM || + server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_RANDOM) + { + de = dictGetRandomKey(dict); + bestkey = dictGetEntryKey(de); + } + + /* volatile-lru and allkeys-lru policy */ + else if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU || + server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU) + { + for (k = 0; k < server.maxmemory_samples; k++) { + sds thiskey; + long thisval; + robj *o; + + de = dictGetRandomKey(dict); + thiskey = dictGetEntryKey(de); + o = dictGetEntryVal(de); + thisval = estimateObjectIdleTime(o); + + /* Higher idle time is better candidate for deletion */ + if (bestkey == NULL || thisval > bestval) { + bestkey = thiskey; + bestval = thisval; + } + } + } + + /* volatile-ttl */ + else if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_TTL) { + for (k = 0; k < server.maxmemory_samples; k++) { + sds thiskey; + long thisval; + + de = dictGetRandomKey(dict); + thiskey = dictGetEntryKey(de); + thisval = (long) dictGetEntryVal(de); + + /* Expire sooner (minor expire unix timestamp) is better + * candidate for deletion */ + if (bestkey == NULL || thisval < bestval) { + bestkey = thiskey; + bestval = thisval; + } + } + } + + /* Finally remove the selected key. */ + if (bestkey) { + robj *keyobj = createStringObject(bestkey,sdslen(bestkey)); + dbDelete(db,keyobj); + server.stat_expiredkeys++; + decrRefCount(keyobj); + freed++; + } + } + if (!freed) return; /* nothing to free... */ + } + + while(0) { + int j, k, freed = 0; for (j = 0; j < server.dbnum; j++) { int minttl = -1; sds minkey = NULL; @@ -1501,6 +1607,7 @@ void segvHandler(int sig, siginfo_t *info, void *secret) { int i, trace_size = 0; ucontext_t *uc = (ucontext_t*) secret; sds infostring; + struct sigaction act; REDIS_NOTUSED(info); redisLog(REDIS_WARNING, @@ -1522,7 +1629,16 @@ void segvHandler(int sig, siginfo_t *info, void *secret) { /* free(messages); Don't call free() with possibly corrupted memory. */ if (server.daemonize) unlink(server.pidfile); - _exit(0); + + /* Make sure we exit with the right signal at the end. So for instance + * the core will be dumped if enabled. */ + sigemptyset (&act.sa_mask); + /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction + * is used. Otherwise, sa_handler is used */ + act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND; + act.sa_handler = SIG_DFL; + sigaction (sig, &act, NULL); + kill(getpid(),sig); } void sigtermHandler(int sig) { diff --git a/src/redis.h b/src/redis.h index 24dfb9b5..bc961a71 100644 --- a/src/redis.h +++ b/src/redis.h @@ -198,6 +198,13 @@ #define REDIS_OP_DIFF 1 #define REDIS_OP_INTER 2 +/* Redis maxmemory strategies */ +#define REDIS_MAXMEMORY_VOLATILE_LRU 0 +#define REDIS_MAXMEMORY_VOLATILE_TTL 1 +#define REDIS_MAXMEMORY_VOLATILE_RANDOM 2 +#define REDIS_MAXMEMORY_ALLKEYS_LRU 3 +#define REDIS_MAXMEMORY_ALLKEYS_RANDOM 4 + /* We can print the stacktrace, so our assert is defined this way: */ #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1))) #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1) @@ -211,6 +218,8 @@ void _redisPanic(char *msg, char *file, int line); /* A redis object, that is a type able to hold a string / list / set */ /* The actual Redis Object */ +#define REDIS_LRU_CLOCK_MAX ((1<<21)-1) /* Max value of obj->lru */ +#define REDIS_LRU_CLOCK_RESOLUTION 10 /* LRU clock resolution in seconds */ typedef struct redisObject { unsigned type:4; unsigned storage:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */ @@ -348,12 +357,14 @@ struct redisServer { aeEventLoop *el; int cronloops; /* number of times the cron function run */ list *objfreelist; /* A list of freed objects to avoid malloc() */ - time_t lastsave; /* Unix time of last save succeeede */ + time_t lastsave; /* Unix time of last save succeeede */ /* Fields used only for stats */ - time_t stat_starttime; /* server start time */ - long long stat_numcommands; /* number of processed commands */ - long long stat_numconnections; /* number of connections received */ - long long stat_expiredkeys; /* number of expired keys */ + time_t stat_starttime; /* server start time */ + long long stat_numcommands; /* number of processed commands */ + long long stat_numconnections; /* number of connections received */ + long long stat_expiredkeys; /* number of expired keys */ + long long stat_keyspace_hits; /* number of successful lookups of keys */ + long long stat_keyspace_misses; /* number of failed lookups of keys */ /* Configuration */ int verbosity; int glueoutputbuf; @@ -390,6 +401,8 @@ struct redisServer { int replstate; unsigned int maxclients; unsigned long long maxmemory; + int maxmemory_policy; + int maxmemory_samples; unsigned int blpop_blocked_clients; unsigned int vm_blocked_clients; /* Sort parameters - qsort_r() is only available under BSD so we @@ -678,6 +691,16 @@ int getLongLongFromObject(robj *o, long long *target); char *strEncoding(int encoding); int compareStringObjects(robj *a, robj *b); int equalStringObjects(robj *a, robj *b); +unsigned long estimateObjectIdleTime(robj *o); + +/* Synchronous I/O with timeout */ +int syncWrite(int fd, char *ptr, ssize_t size, int timeout); +int syncRead(int fd, char *ptr, ssize_t size, int timeout); +int syncReadLine(int fd, char *ptr, ssize_t size, int timeout); +int fwriteBulkString(FILE *fp, char *s, unsigned long len); +int fwriteBulkDouble(FILE *fp, double d); +int fwriteBulkLongLong(FILE *fp, long long l); +int fwriteBulkObject(FILE *fp, robj *obj); /* Replication */ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc); diff --git a/src/replication.c b/src/replication.c index 8c629006..7687206a 100644 --- a/src/replication.c +++ b/src/replication.c @@ -110,68 +110,6 @@ void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc) decrRefCount(cmdobj); } -int syncWrite(int fd, char *ptr, ssize_t size, int timeout) { - ssize_t nwritten, ret = size; - time_t start = time(NULL); - - timeout++; - while(size) { - if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) { - nwritten = write(fd,ptr,size); - if (nwritten == -1) return -1; - ptr += nwritten; - size -= nwritten; - } - if ((time(NULL)-start) > timeout) { - errno = ETIMEDOUT; - return -1; - } - } - return ret; -} - -int syncRead(int fd, char *ptr, ssize_t size, int timeout) { - ssize_t nread, totread = 0; - time_t start = time(NULL); - - timeout++; - while(size) { - if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) { - nread = read(fd,ptr,size); - if (nread <= 0) return -1; - ptr += nread; - size -= nread; - totread += nread; - } - if ((time(NULL)-start) > timeout) { - errno = ETIMEDOUT; - return -1; - } - } - return totread; -} - -int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) { - ssize_t nread = 0; - - size--; - while(size) { - char c; - - if (syncRead(fd,&c,1,timeout) == -1) return -1; - if (c == '\n') { - *ptr = '\0'; - if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0'; - return nread; - } else { - *ptr++ = c; - *ptr = '\0'; - nread++; - } - } - return nread; -} - void syncCommand(redisClient *c) { /* ignore SYNC if aleady slave or in monitor mode */ if (c->flags & REDIS_SLAVE) return; diff --git a/src/syncio.c b/src/syncio.c new file mode 100644 index 00000000..28ac1811 --- /dev/null +++ b/src/syncio.c @@ -0,0 +1,154 @@ +/* Synchronous socket and file I/O operations useful across the core. + * + * Copyright (c) 2009-2010, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "redis.h" + +/* ----------------- Blocking sockets I/O with timeouts --------------------- */ + +/* Redis performs most of the I/O in a nonblocking way, with the exception + * of the SYNC command where the slave does it in a blocking way, and + * the MIGRATE command that must be blocking in order to be atomic from the + * point of view of the two instances (one migrating the key and one receiving + * the key). This is why need the following blocking I/O functions. */ + +int syncWrite(int fd, char *ptr, ssize_t size, int timeout) { + ssize_t nwritten, ret = size; + time_t start = time(NULL); + + timeout++; + while(size) { + if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) { + nwritten = write(fd,ptr,size); + if (nwritten == -1) return -1; + ptr += nwritten; + size -= nwritten; + } + if ((time(NULL)-start) > timeout) { + errno = ETIMEDOUT; + return -1; + } + } + return ret; +} + +int syncRead(int fd, char *ptr, ssize_t size, int timeout) { + ssize_t nread, totread = 0; + time_t start = time(NULL); + + timeout++; + while(size) { + if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) { + nread = read(fd,ptr,size); + if (nread <= 0) return -1; + ptr += nread; + size -= nread; + totread += nread; + } + if ((time(NULL)-start) > timeout) { + errno = ETIMEDOUT; + return -1; + } + } + return totread; +} + +int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) { + ssize_t nread = 0; + + size--; + while(size) { + char c; + + if (syncRead(fd,&c,1,timeout) == -1) return -1; + if (c == '\n') { + *ptr = '\0'; + if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0'; + return nread; + } else { + *ptr++ = c; + *ptr = '\0'; + nread++; + } + } + return nread; +} + +/* ----------------- Blocking sockets I/O with timeouts --------------------- */ + +/* Write binary-safe string into a file in the bulkformat + * $\r\n\r\n */ +int fwriteBulkString(FILE *fp, char *s, unsigned long len) { + char cbuf[128]; + int clen; + cbuf[0] = '$'; + clen = 1+ll2string(cbuf+1,sizeof(cbuf)-1,len); + cbuf[clen++] = '\r'; + cbuf[clen++] = '\n'; + if (fwrite(cbuf,clen,1,fp) == 0) return 0; + if (len > 0 && fwrite(s,len,1,fp) == 0) return 0; + if (fwrite("\r\n",2,1,fp) == 0) return 0; + return 1; +} + +/* Write a double value in bulk format $\r\n\r\n */ +int fwriteBulkDouble(FILE *fp, double d) { + char buf[128], dbuf[128]; + + snprintf(dbuf,sizeof(dbuf),"%.17g\r\n",d); + snprintf(buf,sizeof(buf),"$%lu\r\n",(unsigned long)strlen(dbuf)-2); + if (fwrite(buf,strlen(buf),1,fp) == 0) return 0; + if (fwrite(dbuf,strlen(dbuf),1,fp) == 0) return 0; + return 1; +} + +/* Write a long value in bulk format $\r\n\r\n */ +int fwriteBulkLongLong(FILE *fp, long long l) { + char bbuf[128], lbuf[128]; + unsigned int blen, llen; + llen = ll2string(lbuf,32,l); + blen = snprintf(bbuf,sizeof(bbuf),"$%u\r\n%s\r\n",llen,lbuf); + if (fwrite(bbuf,blen,1,fp) == 0) return 0; + return 1; +} + +/* Delegate writing an object to writing a bulk string or bulk long long. */ +int fwriteBulkObject(FILE *fp, robj *obj) { + /* Avoid using getDecodedObject to help copy-on-write (we are often + * in a child process when this function is called). */ + if (obj->encoding == REDIS_ENCODING_INT) { + return fwriteBulkLongLong(fp,(long)obj->ptr); + } else if (obj->encoding == REDIS_ENCODING_RAW) { + return fwriteBulkString(fp,obj->ptr,sdslen(obj->ptr)); + } else { + redisPanic("Unknown string encoding"); + } +} + + diff --git a/src/t_hash.c b/src/t_hash.c index 5cef1cab..071b7754 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -310,6 +310,7 @@ void hmgetCommand(redisClient *c) { o = lookupKeyRead(c->db,c->argv[1]); if (o != NULL && o->type != REDIS_HASH) { addReply(c,shared.wrongtypeerr); + return; } /* Note the check for o != NULL happens inside the loop. This is diff --git a/src/version.h b/src/version.h index 80decef1..b139f9f8 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define REDIS_VERSION "2.1.4" +#define REDIS_VERSION "2.1.5" diff --git a/src/vm.c b/src/vm.c index ee831fb9..1aad95d7 100644 --- a/src/vm.c +++ b/src/vm.c @@ -362,7 +362,7 @@ robj *vmPreviewObject(robj *o) { double computeObjectSwappability(robj *o) { /* actual age can be >= minage, but not < minage. As we use wrapping * 21 bit clocks with minutes resolution for the LRU. */ - time_t minage = abs(server.lruclock - o->lru); + time_t minage = estimateObjectIdleTime(o); long asize = 0, elesize; robj *ele; list *l; diff --git a/src/ziplist.c b/src/ziplist.c index 4f44bd58..98ad3113 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -409,6 +409,7 @@ static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p /* Advance the cursor */ p += rawlen; + curlen += extra; } else { if (next.prevrawlensize > rawlensize) { /* This would result in shrinking, which we want to avoid. @@ -781,6 +782,10 @@ void ziplistRepr(unsigned char *zl) { #ifdef ZIPLIST_TEST_MAIN #include +#include "adlist.h" +#include "sds.h" + +#define debug(f, ...) { if (DEBUG) printf(f, __VA_ARGS__); } unsigned char *createList() { unsigned char *zl = ziplistNew(); @@ -864,6 +869,32 @@ void pop(unsigned char *zl, int where) { } } +void randstring(char *target, unsigned int min, unsigned int max) { + int p, len = min+rand()%(max-min+1); + int minval, maxval; + switch(rand() % 3) { + case 0: + minval = 0; + maxval = 255; + break; + case 1: + minval = 48; + maxval = 122; + break; + case 2: + minval = 48; + maxval = 52; + break; + default: + assert(NULL); + } + + while(p < len) + target[p++] = minval+rand()%(maxval-minval+1); + return; +} + + int main(int argc, char **argv) { unsigned char *zl, *p; unsigned char *entry; @@ -1137,10 +1168,10 @@ int main(int argc, char **argv) { /* Pop values again and compare their value. */ p = ziplistIndex(zl,0); assert(ziplistGet(p,&entry,&elen,&value)); - assert(strncmp(v1,entry,elen) == 0); + assert(strncmp(v1,(char*)entry,elen) == 0); p = ziplistIndex(zl,1); assert(ziplistGet(p,&entry,&elen,&value)); - assert(strncmp(v2,entry,elen) == 0); + assert(strncmp(v2,(char*)entry,elen) == 0); printf("SUCCESS\n\n"); } @@ -1192,50 +1223,78 @@ int main(int argc, char **argv) { printf("Stress with random payloads of different encoding:\n"); { - int i, idx, where, len; - long long v; + int i,j,len,where; unsigned char *p; - char buf[0x4041]; /* max length of generated string */ - zl = ziplistNew(); - for (i = 0; i < 100000; i++) { - where = (rand() & 1) ? ZIPLIST_HEAD : ZIPLIST_TAIL; - if (rand() & 1) { - /* equally likely create a 16, 32 or 64 bit int */ - v = (rand() & INT16_MAX) + ((1ll << 32) >> ((rand() % 3)*16)); - v *= 2*(rand() & 1)-1; /* randomly flip sign */ - sprintf(buf, "%lld", v); + char buf[1024]; + list *ref; + listNode *refnode; + + /* Hold temp vars from ziplist */ + unsigned char *sstr; + unsigned int slen; + long long sval; + + /* In the regression for the cascade bug, it was triggered + * with a random seed of 2. */ + srand(2); + + for (i = 0; i < 20000; i++) { + zl = ziplistNew(); + ref = listCreate(); + listSetFreeMethod(ref,sdsfree); + len = rand() % 256; + + /* Create lists */ + for (j = 0; j < len; j++) { + where = (rand() & 1) ? ZIPLIST_HEAD : ZIPLIST_TAIL; + switch(rand() % 4) { + case 0: + sprintf(buf,"%lld",(0LL + rand()) >> 20); + break; + case 1: + sprintf(buf,"%lld",(0LL + rand())); + break; + case 2: + sprintf(buf,"%lld",(0LL + rand()) << 20); + break; + case 3: + randstring(buf,0,256); + break; + default: + assert(NULL); + } + + /* Add to ziplist */ zl = ziplistPush(zl, (unsigned char*)buf, strlen(buf), where); - } else { - /* equally likely generate 6, 14 or >14 bit length */ - v = rand() & 0x3f; - v += 0x4000 >> ((rand() % 3)*8); - memset(buf, 'x', v); - zl = ziplistPush(zl, (unsigned char*)buf, v, where); - } - /* delete a random element */ - if ((len = ziplistLen(zl)) >= 10) { - idx = rand() % len; - // printf("Delete index %d\n", idx); - // ziplistRepr(zl); - ziplistDeleteRange(zl, idx, 1); - // ziplistRepr(zl); - len--; + /* Add to reference list */ + if (where == ZIPLIST_HEAD) { + listAddNodeHead(ref,sdsnew(buf)); + } else if (where == ZIPLIST_TAIL) { + listAddNodeTail(ref,sdsnew(buf)); + } else { + assert(NULL); + } } - /* iterate from front to back */ - idx = 0; - p = ziplistIndex(zl, 0); - while((p = ziplistNext(zl,p))) - idx++; - assert(len == idx+1); - - /* iterate from back to front */ - idx = 0; - p = ziplistIndex(zl, -1); - while((p = ziplistPrev(zl,p))) - idx++; - assert(len == idx+1); + assert(listLength(ref) == ziplistLen(zl)); + for (j = 0; j < len; j++) { + /* Naive way to get elements, but similar to the stresser + * executed from the Tcl test suite. */ + p = ziplistIndex(zl,j); + refnode = listIndex(ref,j); + + assert(ziplistGet(p,&sstr,&slen,&sval)); + if (sstr == NULL) { + sprintf(buf,"%lld",sval); + } else { + memcpy(buf,sstr,slen); + buf[slen] = '\0'; + } + assert(strcmp(buf,listNodeValue(refnode)) == 0); + } + zfree(zl); + listRelease(ref); } printf("SUCCESS\n\n"); } diff --git a/src/zmalloc.c b/src/zmalloc.c index 544155e7..8cfb18d9 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -32,13 +32,24 @@ #include #include #include - #include "config.h" +#ifdef HAVE_MALLOC_SIZE +#define PREFIX_SIZE (0) +#else #if defined(__sun) -#define PREFIX_SIZE sizeof(long long) +#define PREFIX_SIZE (sizeof(long long)) #else -#define PREFIX_SIZE sizeof(size_t) +#define PREFIX_SIZE (sizeof(size_t)) +#endif +#endif + +/* Explicitly override malloc/free etc when using tcmalloc. */ +#if defined(USE_TCMALLOC) +#define malloc(size) tc_malloc(size) +#define calloc(count,size) tc_calloc(count,size) +#define realloc(ptr,size) tc_realloc(ptr,size) +#define free(ptr) tc_free(ptr) #endif #define increment_used_memory(__n) do { \ diff --git a/tests/unit/type/hash.tcl b/tests/unit/type/hash.tcl index 2c0bd534..8559dc3c 100644 --- a/tests/unit/type/hash.tcl +++ b/tests/unit/type/hash.tcl @@ -140,6 +140,11 @@ start_server {tags {"hash"}} { set _ $rv } {{{} {}} {{} {}} {{} {}}} + test {HMGET against wrong type} { + r set wrongtype somevalue + assert_error "*wrong*" {r hmget wrongtype field1 field2} + } + test {HMGET - small hash} { set keys {} set vals {}