Active rehashing

author antirez <antirez@gmail.com>

Thu, 15 Apr 2010 16:07:57 +0000 (18:07 +0200)

committer antirez <antirez@gmail.com>

Thu, 15 Apr 2010 16:07:57 +0000 (18:07 +0200)
author antirez <antirez@gmail.com>
Thu, 15 Apr 2010 16:07:57 +0000 (18:07 +0200)
committer antirez <antirez@gmail.com>
Thu, 15 Apr 2010 16:07:57 +0000 (18:07 +0200)
diff --git a/dict.c b/dict.c

index 0d332b3bd74eef6db602667e0caf93618f43b129..08bffbff8e44cf631c71883cd5057148f6c5cfe1 100644 (file)
--- a/dict.c
+++ b/dict.c
@@ -41,6 +41,7 @@
  #include <stdarg.h>
  #include <assert.h>
  #include <limits.h>
+#include <sys/time.h>
  
  #include "dict.h"
  #include "zmalloc.h"
@@ -237,6 +238,25 @@ int dictRehash(dict *d, int n) {
      return 1;
  }
  
+long long timeInMilliseconds(void) {
+    struct timeval tv;
+
+    gettimeofday(&tv,NULL);
+    return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);
+}
+
+/* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */
+int dictRehashMilliseconds(dict *d, int ms) {
+    long long start = timeInMilliseconds();
+    int rehashes = 0;
+
+    while(dictRehash(d,100)) {
+        rehashes += 100;
+        if (timeInMilliseconds()-start > ms) break;
+    }
+    return rehashes;
+}
+
  /* This function performs just a step of rehashing, and only if there are
   * not iterators bound to our hash table. When we have iterators in the middle
   * of a rehashing we can't mess with the two hash tables otherwise some element
@@ -527,7 +547,7 @@ static unsigned long _dictNextPower(unsigned long size)
   * index is always returned in the context of the second (new) hash table. */
  static int _dictKeyIndex(dict *d, const void *key)
  {
-    unsigned int h, h1, h2;
+    unsigned int h, idx, table;
      dictEntry *he;
  
      /* Expand the hashtable if needed */
@@ -535,24 +555,18 @@ static int _dictKeyIndex(dict *d, const void *key)
          return -1;
      /* Compute the key hash value */
      h = dictHashKey(d, key);
-    h1 = h & d->ht[0].sizemask;
-    h2 = h & d->ht[1].sizemask;
-    /* Search if this slot does not already contain the given key */
-    he = d->ht[0].table[h1];
-    while(he) {
-        if (dictCompareHashKeys(d, key, he->key))
-            return -1;
-        he = he->next;
-    }
-    if (!dictIsRehashing(d)) return h1;
-    /* Check the second hash table */
-    he = d->ht[1].table[h2];
-    while(he) {
-        if (dictCompareHashKeys(d, key, he->key))
-            return -1;
-        he = he->next;
+    for (table = 0; table <= 1; table++) {
+        idx = h & d->ht[table].sizemask;
+        /* Search if this slot does not already contain the given key */
+        he = d->ht[table].table[idx];
+        while(he) {
+            if (dictCompareHashKeys(d, key, he->key))
+                return -1;
+            he = he->next;
+        }
+        if (!dictIsRehashing(d)) break;
      }
-    return h2;
+    return idx;
  }
  
  void dictEmpty(dict *d) {
diff --git a/dict.h b/dict.h

index 15dbb37f3e9ec9e8581a7ecea1cb7ebc39401b85..ba8f86951b5dc414c6450d06abd59e9b9f7baf89 100644 (file)
--- a/dict.h
+++ b/dict.h
@@ -122,24 +122,25 @@ typedef struct dictIterator {
  
  /* API */
  dict *dictCreate(dictType *type, void *privDataPtr);
-int dictExpand(dict *ht, unsigned long size);
-int dictAdd(dict *ht, void *key, void *val);
-int dictReplace(dict *ht, void *key, void *val);
-int dictDelete(dict *ht, const void *key);
-int dictDeleteNoFree(dict *ht, const void *key);
-void dictRelease(dict *ht);
-dictEntry * dictFind(dict *ht, const void *key);
-int dictResize(dict *ht);
-dictIterator *dictGetIterator(dict *ht);
+int dictExpand(dict *d, unsigned long size);
+int dictAdd(dict *d, void *key, void *val);
+int dictReplace(dict *d, void *key, void *val);
+int dictDelete(dict *d, const void *key);
+int dictDeleteNoFree(dict *d, const void *key);
+void dictRelease(dict *d);
+dictEntry * dictFind(dict *d, const void *key);
+int dictResize(dict *d);
+dictIterator *dictGetIterator(dict *d);
  dictEntry *dictNext(dictIterator *iter);
  void dictReleaseIterator(dictIterator *iter);
-dictEntry *dictGetRandomKey(dict *ht);
-void dictPrintStats(dict *ht);
+dictEntry *dictGetRandomKey(dict *d);
+void dictPrintStats(dict *d);
  unsigned int dictGenHashFunction(const unsigned char *buf, int len);
-void dictEmpty(dict *ht);
+void dictEmpty(dict *d);
  void dictEnableResize(void);
  void dictDisableResize(void);
  int dictRehash(dict *d, int n);
+int dictRehashMilliseconds(dict *d, int ms);
  
  /* Hash table types */
  extern dictType dictTypeHeapStringCopyKey;
diff --git a/redis.c b/redis.c

index 55e188010eda9d7f9cbd35e94392da3956d05e28..db46763b8a8db0a81231be3d7a9ffde857634445 100644 (file)
--- a/redis.c
+++ b/redis.c
@@ -91,7 +91,7 @@
  #define REDIS_CONFIGLINE_MAX    1024
  #define REDIS_OBJFREELIST_MAX   1000000 /* Max number of objects to cache */
  #define REDIS_MAX_SYNC_TIME     60      /* Slave can't take more to sync */
-#define REDIS_EXPIRELOOKUPS_PER_CRON    10 /* try to expire 10 keys/loop */
+#define REDIS_EXPIRELOOKUPS_PER_CRON    10 /* lookup 10 expires per loop */
  #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
  #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
  
@@ -379,6 +379,7 @@ struct redisServer {
      char *requirepass;
      int shareobjects;
      int rdbcompression;
+    int activerehashing;
      /* Replication related */
      int isslave;
      char *masterauth;
@@ -1208,6 +1209,21 @@ static void tryResizeHashTables(void) {
      }
  }
  
+/* Our hash table implementation performs rehashing incrementally while
+ * we write/read from the hash table. Still if the server is idle, the hash
+ * table will use two tables for a long time. So we try to use 1 millisecond
+ * of CPU time at every serverCron() loop in order to rehash some key. */
+static void incrementallyRehash(void) {
+    int j;
+
+    for (j = 0; j < server.dbnum; j++) {
+        if (dictIsRehashing(server.db[j].dict)) {
+            dictRehashMilliseconds(server.db[j].dict,1);
+            break; /* already used our millisecond for this loop... */
+        }
+    }
+}
+
  /* A background saving child (BGSAVE) terminated its work. Handle this. */
  void backgroundSaveDoneHandler(int statloc) {
      int exitcode = WEXITSTATUS(statloc);
@@ -1337,10 +1353,9 @@ static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientD
       * if we resize the HT while there is the saving child at work actually
       * a lot of memory movements in the parent will cause a lot of pages
       * copied. */
-    if (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1 &&
-        !(loops % 10))
-    {
-        tryResizeHashTables();
+    if (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1) {
+        if (!(loops % 10)) tryResizeHashTables();
+        if (server.activerehashing) incrementallyRehash();
      }
  
      /* Show information about connected clients */
@@ -1568,6 +1583,7 @@ static void initServerConfig() {
      server.requirepass = NULL;
      server.shareobjects = 0;
      server.rdbcompression = 1;
+    server.activerehashing = 1;
      server.maxclients = 0;
      server.blpop_blocked_clients = 0;
      server.maxmemory = 0;
@@ -1801,6 +1817,10 @@ static void loadServerConfig(char *filename) {
              if ((server.rdbcompression = yesnotoi(argv[1])) == -1) {
                  err = "argument must be 'yes' or 'no'"; goto loaderr;
              }
+        } else if (!strcasecmp(argv[0],"activerehashing") && argc == 2) {
+            if ((server.activerehashing = yesnotoi(argv[1])) == -1) {
+                err = "argument must be 'yes' or 'no'"; goto loaderr;
+            }
          } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
              if ((server.daemonize = yesnotoi(argv[1])) == -1) {
                  err = "argument must be 'yes' or 'no'"; goto loaderr;
diff --git a/redis.conf b/redis.conf

index 3b9a8e2249a520673be27746fc55d34e7589a34f..c9ff26cf97b73c7f97d271ef6e147e785ac3f7b1 100644 (file)
--- a/redis.conf
+++ b/redis.conf
@@ -262,6 +262,26 @@ glueoutputbuf yes
  hash-max-zipmap-entries 64
  hash-max-zipmap-value 512
  
+# Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in
+# order to help rehashing the main Redis hash table (the one mapping top-level
+# keys to values). The hash table implementation redis uses (see dict.c)
+# performs a lazy rehashing: the more operation you run into an hash table
+# that is rhashing, the more rehashing "steps" are performed, so if the
+# server is idle the rehashing is never complete and some more memory is used
+# by the hash table.
+# 
+# The default is to use this millisecond 10 times every second in order to
+# active rehashing the main dictionaries, freeing memory when possible.
+#
+# If unsure:
+# use "activerehashing no" if you have hard latency requirements and it is
+# not a good thing in your environment that Redis can reply form time to time
+# to queries with 2 milliseconds delay.
+#
+# use "activerehashing yes" if you don't have such hard requirements but
+# want to free memory asap when possible.
+activerehashing yes
+
  ################################## INCLUDES ###################################
  
  # Include one or more other config files here.  This is useful if you
author	antirez <antirez@gmail.com>
	Thu, 15 Apr 2010 16:07:57 +0000 (18:07 +0200)
committer	antirez <antirez@gmail.com>
	Thu, 15 Apr 2010 16:07:57 +0000 (18:07 +0200)
dict.c		patch \| blob \| blame \| history
dict.h		patch \| blob \| blame \| history
redis.c		patch \| blob \| blame \| history
redis.conf		patch \| blob \| blame \| history