Fix overflow in mstime() in redis-cli and benchmark.

[redis.git] / src / dict.c
diff --git a/src/dict.c b/src/dict.c

index d5010708c37ce6729e3068a170e30029f2fa1d38..1e7d7609d2841726e435b4da9f3bfbce529dab21 100644 (file)
--- a/src/dict.c
+++ b/src/dict.c
@@ -5,7 +5,7 @@
   * tables of power of two in size are used, collisions are handled by
   * chaining. See the source code for more information... :)
   *
- * Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -42,6 +42,7 @@
  #include <assert.h>
  #include <limits.h>
  #include <sys/time.h>
+#include <ctype.h>
  
  #include "dict.h"
  #include "zmalloc.h"
@@ -49,35 +50,13 @@
  /* Using dictEnableResize() / dictDisableResize() we make possible to
   * enable/disable resizing of the hash table as needed. This is very important
   * for Redis, as we use copy-on-write and don't want to move too much memory
- * around when there is a child performing saving operations. */
+ * around when there is a child performing saving operations.
+ *
+ * Note that even when dict_can_resize is set to 0, not all resizes are
+ * prevented: an hash table is still allowed to grow if the ratio between
+ * the number of elements and the buckets > dict_force_resize_ratio. */
  static int dict_can_resize = 1;
-
-/* ---------------------------- Utility funcitons --------------------------- */
-
-static void _dictPanic(const char *fmt, ...)
-{
-    va_list ap;
-
-    va_start(ap, fmt);
-    fprintf(stderr, "\nDICT LIBRARY PANIC: ");
-    vfprintf(stderr, fmt, ap);
-    fprintf(stderr, "\n\n");
-    va_end(ap);
-}
-
-/* ------------------------- Heap Management Wrappers------------------------ */
-
-static void *_dictAlloc(size_t size)
-{
-    void *p = zmalloc(size);
-    if (p == NULL)
-        _dictPanic("Out of memory");
-    return p;
-}
-
-static void _dictFree(void *ptr) {
-    zfree(ptr);
-}
+static unsigned int dict_force_resize_ratio = 5;
  
  /* -------------------------- private prototypes ---------------------------- */
  
@@ -106,20 +85,83 @@ unsigned int dictIdentityHashFunction(unsigned int key)
      return key;
  }
  
-/* Generic hash function (a popular one from Bernstein).
- * I tested a few and this was the best. */
-unsigned int dictGenHashFunction(const unsigned char *buf, int len) {
-    unsigned int hash = 5381;
+static uint32_t dict_hash_function_seed = 5381;
+
+void dictSetHashFunctionSeed(uint32_t seed) {
+    dict_hash_function_seed = seed;
+}
+
+uint32_t dictGetHashFunctionSeed(void) {
+    return dict_hash_function_seed;
+}
+
+/* MurmurHash2, by Austin Appleby
+ * Note - This code makes a few assumptions about how your machine behaves -
+ * 1. We can read a 4-byte value from any address without crashing
+ * 2. sizeof(int) == 4
+ *
+ * And it has a few limitations -
+ *
+ * 1. It will not work incrementally.
+ * 2. It will not produce the same results on little-endian and big-endian
+ *    machines.
+ */
+unsigned int dictGenHashFunction(const void *key, int len) {
+    /* 'm' and 'r' are mixing constants generated offline.
+     They're not really 'magic', they just happen to work well.  */
+    uint32_t seed = dict_hash_function_seed;
+    const uint32_t m = 0x5bd1e995;
+    const int r = 24;
+
+    /* Initialize the hash to a 'random' value */
+    uint32_t h = seed ^ len;
+
+    /* Mix 4 bytes at a time into the hash */
+    const unsigned char *data = (const unsigned char *)key;
+
+    while(len >= 4) {
+        uint32_t k = *(uint32_t*)data;
+
+        k *= m;
+        k ^= k >> r;
+        k *= m;
+
+        h *= m;
+        h ^= k;
+
+        data += 4;
+        len -= 4;
+    }
+
+    /* Handle the last few bytes of the input array  */
+    switch(len) {
+    case 3: h ^= data[2] << 16;
+    case 2: h ^= data[1] << 8;
+    case 1: h ^= data[0]; h *= m;
+    };
+
+    /* Do a few final mixes of the hash to ensure the last few
+     * bytes are well-incorporated. */
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return (unsigned int)h;
+}
+
+/* And a case insensitive hash function (based on djb hash) */
+unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
+    unsigned int hash = (unsigned int)dict_hash_function_seed;
  
      while (len--)
-        hash = ((hash << 5) + hash) + (*buf++); /* hash * 33 + c */
+        hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */
      return hash;
  }
  
  /* ----------------------------- API implementation ------------------------- */
  
-/* Reset an hashtable already initialized with ht_init().
- * NOTE: This function should only called by ht_destroy(). */
+/* Reset a hash table already initialized with ht_init().
+ * NOTE: This function should only be called by ht_destroy(). */
  static void _dictReset(dictht *ht)
  {
      ht->table = NULL;
@@ -132,7 +174,7 @@ static void _dictReset(dictht *ht)
  dict *dictCreate(dictType *type,
          void *privDataPtr)
  {
-    dict *d = _dictAlloc(sizeof(*d));
+    dict *d = zmalloc(sizeof(*d));
  
      _dictInit(d,type,privDataPtr);
      return d;
@@ -152,7 +194,7 @@ int _dictInit(dict *d, dictType *type,
  }
  
  /* Resize the table to the minimal size that contains all the elements,
- * but with the invariant of a USER/BUCKETS ration near to <= 1 */
+ * but with the invariant of a USED/BUCKETS ratio near to <= 1 */
  int dictResize(dict *d)
  {
      int minimal;
@@ -164,25 +206,23 @@ int dictResize(dict *d)
      return dictExpand(d, minimal);
  }
  
-/* Expand or create the hashtable */
+/* Expand or create the hash table */
  int dictExpand(dict *d, unsigned long size)
  {
-    dictht n; /* the new hashtable */
+    dictht n; /* the new hash table */
      unsigned long realsize = _dictNextPower(size);
  
      /* the size is invalid if it is smaller than the number of
-     * elements already inside the hashtable */
+     * elements already inside the hash table */
      if (dictIsRehashing(d) || d->ht[0].used > size)
          return DICT_ERR;
  
+    /* Allocate the new hash table and initialize all pointers to NULL */
      n.size = realsize;
      n.sizemask = realsize-1;
-    n.table = _dictAlloc(realsize*sizeof(dictEntry*));
+    n.table = zcalloc(realsize*sizeof(dictEntry*));
      n.used = 0;
  
-    /* Initialize all the pointers to NULL */
-    memset(n.table, 0, realsize*sizeof(dictEntry*));
-
      /* Is this the first initialization? If so it's not really a rehashing
       * we just set the first hash table so that it can accept keys. */
      if (d->ht[0].table == NULL) {
@@ -208,7 +248,7 @@ int dictRehash(dict *d, int n) {
  
          /* Check if we already rehashed the whole table... */
          if (d->ht[0].used == 0) {
-            _dictFree(d->ht[0].table);
+            zfree(d->ht[0].table);
              d->ht[0] = d->ht[1];
              _dictReset(&d->ht[1]);
              d->rehashidx = -1;
@@ -217,6 +257,7 @@ int dictRehash(dict *d, int n) {
  
          /* Note that rehashidx can't overflow as we are sure there are more
           * elements because ht[0].used != 0 */
+        assert(d->ht[0].size > (unsigned)d->rehashidx);
          while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
          de = d->ht[0].table[d->rehashidx];
          /* Move all the keys in this bucket from the old to the new hash HT */
@@ -258,9 +299,9 @@ int dictRehashMilliseconds(dict *d, int ms) {
  }
  
  /* This function performs just a step of rehashing, and only if there are
- * not iterators bound to our hash table. When we have iterators in the middle
- * of a rehashing we can't mess with the two hash tables otherwise some element
- * can be missed or duplicated.
+ * no safe iterators bound to our hash table. When we have iterators in the
+ * middle of a rehashing we can't mess with the two hash tables otherwise
+ * some element can be missed or duplicated.
   *
   * This function is called by common lookup or update operations in the
   * dictionary so that the hash table automatically migrates from H1 to H2
@@ -271,6 +312,30 @@ static void _dictRehashStep(dict *d) {
  
  /* Add an element to the target hash table */
  int dictAdd(dict *d, void *key, void *val)
+{
+    dictEntry *entry = dictAddRaw(d,key);
+
+    if (!entry) return DICT_ERR;
+    dictSetVal(d, entry, val);
+    return DICT_OK;
+}
+
+/* Low level add. This function adds the entry but instead of setting
+ * a value returns the dictEntry structure to the user, that will make
+ * sure to fill the value field as he wishes.
+ *
+ * This function is also directly exposed to user API to be called
+ * mainly in order to store non-pointers inside the hash value, example:
+ *
+ * entry = dictAddRaw(dict,mykey);
+ * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
+ *
+ * Return values:
+ *
+ * If key already exists NULL is returned.
+ * If key was added, the hash entry is returned to be manipulated by the caller.
+ */
+dictEntry *dictAddRaw(dict *d, void *key)
  {
      int index;
      dictEntry *entry;
@@ -281,19 +346,18 @@ int dictAdd(dict *d, void *key, void *val)
      /* Get the index of the new element, or -1 if
       * the element already exists. */
      if ((index = _dictKeyIndex(d, key)) == -1)
-        return DICT_ERR;
+        return NULL;
  
-    /* Allocates the memory and stores key */
+    /* Allocate the memory and store the new entry */
      ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
-    entry = _dictAlloc(sizeof(*entry));
+    entry = zmalloc(sizeof(*entry));
      entry->next = ht->table[index];
      ht->table[index] = entry;
      ht->used++;
  
      /* Set the hash entry fields. */
-    dictSetHashKey(d, entry, key);
-    dictSetHashVal(d, entry, val);
-    return DICT_OK;
+    dictSetKey(d, entry, key);
+    return entry;
  }
  
  /* Add an element, discarding the old if the key already exists.
@@ -310,18 +374,29 @@ int dictReplace(dict *d, void *key, void *val)
          return 1;
      /* It already exists, get the entry */
      entry = dictFind(d, key);
-    /* Free the old value and set the new one */
      /* Set the new value and free the old one. Note that it is important
       * to do that in this order, as the value may just be exactly the same
       * as the previous one. In this context, think to reference counting,
       * you want to increment (set), and then decrement (free), and not the
       * reverse. */
      auxentry = *entry;
-    dictSetHashVal(d, entry, val);
-    dictFreeEntryVal(d, &auxentry);
+    dictSetVal(d, entry, val);
+    dictFreeVal(d, &auxentry);
      return 0;
  }
  
+/* dictReplaceRaw() is simply a version of dictAddRaw() that always
+ * returns the hash entry of the specified key, even if the key already
+ * exists and can't be added (in that case the entry of the already
+ * existing key is returned.)
+ *
+ * See dictAddRaw() for more information. */
+dictEntry *dictReplaceRaw(dict *d, void *key) {
+    dictEntry *entry = dictFind(d,key);
+
+    return entry ? entry : dictAddRaw(d,key);
+}
+
  /* Search and remove an element */
  static int dictGenericDelete(dict *d, const void *key, int nofree)
  {
@@ -338,17 +413,17 @@ static int dictGenericDelete(dict *d, const void *key, int nofree)
          he = d->ht[table].table[idx];
          prevHe = NULL;
          while(he) {
-            if (dictCompareHashKeys(d, key, he->key)) {
+            if (dictCompareKeys(d, key, he->key)) {
                  /* Unlink the element from the list */
                  if (prevHe)
                      prevHe->next = he->next;
                  else
                      d->ht[table].table[idx] = he->next;
                  if (!nofree) {
-                    dictFreeEntryKey(d, he);
-                    dictFreeEntryVal(d, he);
+                    dictFreeKey(d, he);
+                    dictFreeVal(d, he);
                  }
-                _dictFree(he);
+                zfree(he);
                  d->ht[table].used--;
                  return DICT_OK;
              }
@@ -380,15 +455,15 @@ int _dictClear(dict *d, dictht *ht)
          if ((he = ht->table[i]) == NULL) continue;
          while(he) {
              nextHe = he->next;
-            dictFreeEntryKey(d, he);
-            dictFreeEntryVal(d, he);
-            _dictFree(he);
+            dictFreeKey(d, he);
+            dictFreeVal(d, he);
+            zfree(he);
              ht->used--;
              he = nextHe;
          }
      }
      /* Free the table and the allocated cache structure */
-    _dictFree(ht->table);
+    zfree(ht->table);
      /* Re-initialize the table */
      _dictReset(ht);
      return DICT_OK; /* never fails */
@@ -399,7 +474,7 @@ void dictRelease(dict *d)
  {
      _dictClear(d,&d->ht[0]);
      _dictClear(d,&d->ht[1]);
-    _dictFree(d);
+    zfree(d);
  }
  
  dictEntry *dictFind(dict *d, const void *key)
@@ -414,7 +489,7 @@ dictEntry *dictFind(dict *d, const void *key)
          idx = h & d->ht[table].sizemask;
          he = d->ht[table].table[idx];
          while(he) {
-            if (dictCompareHashKeys(d, key, he->key))
+            if (dictCompareKeys(d, key, he->key))
                  return he;
              he = he->next;
          }
@@ -427,27 +502,36 @@ void *dictFetchValue(dict *d, const void *key) {
      dictEntry *he;
  
      he = dictFind(d,key);
-    return he ? dictGetEntryVal(he) : NULL;
+    return he ? dictGetVal(he) : NULL;
  }
  
  dictIterator *dictGetIterator(dict *d)
  {
-    dictIterator *iter = _dictAlloc(sizeof(*iter));
+    dictIterator *iter = zmalloc(sizeof(*iter));
  
      iter->d = d;
      iter->table = 0;
      iter->index = -1;
+    iter->safe = 0;
      iter->entry = NULL;
      iter->nextEntry = NULL;
      return iter;
  }
  
+dictIterator *dictGetSafeIterator(dict *d) {
+    dictIterator *i = dictGetIterator(d);
+
+    i->safe = 1;
+    return i;
+}
+
  dictEntry *dictNext(dictIterator *iter)
  {
      while (1) {
          if (iter->entry == NULL) {
              dictht *ht = &iter->d->ht[iter->table];
-            if (iter->index == -1 && iter->table == 0) iter->d->iterators++;
+            if (iter->safe && iter->index == -1 && iter->table == 0)
+                iter->d->iterators++;
              iter->index++;
              if (iter->index >= (signed) ht->size) {
                  if (dictIsRehashing(iter->d) && iter->table == 0) {
@@ -474,8 +558,9 @@ dictEntry *dictNext(dictIterator *iter)
  
  void dictReleaseIterator(dictIterator *iter)
  {
-    if (!(iter->index == -1 && iter->table == 0)) iter->d->iterators--;
-    _dictFree(iter);
+    if (iter->safe && !(iter->index == -1 && iter->table == 0))
+        iter->d->iterators--;
+    zfree(iter);
  }
  
  /* Return a random entry from the hash table. Useful to
@@ -522,14 +607,23 @@ dictEntry *dictGetRandomKey(dict *d)
  /* Expand the hash table if needed */
  static int _dictExpandIfNeeded(dict *d)
  {
-    /* If the hash table is empty expand it to the intial size,
-     * if the table is "full" dobule its size. */
+    /* Incremental rehashing already in progress. Return. */
      if (dictIsRehashing(d)) return DICT_OK;
-    if (d->ht[0].size == 0)
-        return dictExpand(d, DICT_HT_INITIAL_SIZE);
-    if (d->ht[0].used >= d->ht[0].size && dict_can_resize)
+
+    /* If the hash table is empty expand it to the intial size. */
+    if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);
+
+    /* If we reached the 1:1 ratio, and we are allowed to resize the hash
+     * table (global setting) or we should avoid it but the ratio between
+     * elements/buckets is over the "safe" threshold, we resize doubling
+     * the number of buckets. */
+    if (d->ht[0].used >= d->ht[0].size &&
+        (dict_can_resize ||
+         d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
+    {
          return dictExpand(d, ((d->ht[0].size > d->ht[0].used) ?
                                      d->ht[0].size : d->ht[0].used)*2);
+    }
      return DICT_OK;
  }
  
@@ -557,7 +651,7 @@ static int _dictKeyIndex(dict *d, const void *key)
      unsigned int h, idx, table;
      dictEntry *he;
  
-    /* Expand the hashtable if needed */
+    /* Expand the hash table if needed */
      if (_dictExpandIfNeeded(d) == DICT_ERR)
          return -1;
      /* Compute the key hash value */
@@ -567,7 +661,7 @@ static int _dictKeyIndex(dict *d, const void *key)
          /* Search if this slot does not already contain the given key */
          he = d->ht[table].table[idx];
          while(he) {
-            if (dictCompareHashKeys(d, key, he->key))
+            if (dictCompareKeys(d, key, he->key))
                  return -1;
              he = he->next;
          }
@@ -583,6 +677,21 @@ void dictEmpty(dict *d) {
      d->iterators = 0;
  }
  
+void dictEnableResize(void) {
+    dict_can_resize = 1;
+}
+
+void dictDisableResize(void) {
+    dict_can_resize = 0;
+}
+
+#if 0
+
+/* The following is code that we don't use for Redis currently, but that is part
+of the library. */
+
+/* ----------------------- Debugging ------------------------*/
+
  #define DICT_STATS_VECTLEN 50
  static void _dictPrintStatsHt(dictht *ht) {
      unsigned long i, slots = 0, chainlen, maxchainlen = 0;
@@ -636,14 +745,6 @@ void dictPrintStats(dict *d) {
      }
  }
  
-void dictEnableResize(void) {
-    dict_can_resize = 1;
-}
-
-void dictDisableResize(void) {
-    dict_can_resize = 0;
-}
-
  /* ----------------------- StringCopy Hash Table Type ------------------------*/
  
  static unsigned int _dictStringCopyHTHashFunction(const void *key)
@@ -651,10 +752,10 @@ static unsigned int _dictStringCopyHTHashFunction(const void *key)
      return dictGenHashFunction(key, strlen(key));
  }
  
-static void *_dictStringCopyHTKeyDup(void *privdata, const void *key)
+static void *_dictStringDup(void *privdata, const void *key)
  {
      int len = strlen(key);
-    char *copy = _dictAlloc(len+1);
+    char *copy = zmalloc(len+1);
      DICT_NOTUSED(privdata);
  
      memcpy(copy, key, len);
@@ -662,17 +763,6 @@ static void *_dictStringCopyHTKeyDup(void *privdata, const void *key)
      return copy;
  }
  
-static void *_dictStringKeyValCopyHTValDup(void *privdata, const void *val)
-{
-    int len = strlen(val);
-    char *copy = _dictAlloc(len+1);
-    DICT_NOTUSED(privdata);
-
-    memcpy(copy, val, len);
-    copy[len] = '\0';
-    return copy;
-}
-
  static int _dictStringCopyHTKeyCompare(void *privdata, const void *key1,
          const void *key2)
  {
@@ -681,47 +771,41 @@ static int _dictStringCopyHTKeyCompare(void *privdata, const void *key1,
      return strcmp(key1, key2) == 0;
  }
  
-static void _dictStringCopyHTKeyDestructor(void *privdata, void *key)
-{
-    DICT_NOTUSED(privdata);
-
-    _dictFree((void*)key); /* ATTENTION: const cast */
-}
-
-static void _dictStringKeyValCopyHTValDestructor(void *privdata, void *val)
+static void _dictStringDestructor(void *privdata, void *key)
  {
      DICT_NOTUSED(privdata);
  
-    _dictFree((void*)val); /* ATTENTION: const cast */
+    zfree(key);
  }
  
  dictType dictTypeHeapStringCopyKey = {
-    _dictStringCopyHTHashFunction,        /* hash function */
-    _dictStringCopyHTKeyDup,              /* key dup */
-    NULL,                               /* val dup */
-    _dictStringCopyHTKeyCompare,          /* key compare */
-    _dictStringCopyHTKeyDestructor,       /* key destructor */
-    NULL                                /* val destructor */
+    _dictStringCopyHTHashFunction, /* hash function */
+    _dictStringDup,                /* key dup */
+    NULL,                          /* val dup */
+    _dictStringCopyHTKeyCompare,   /* key compare */
+    _dictStringDestructor,         /* key destructor */
+    NULL                           /* val destructor */
  };
  
  /* This is like StringCopy but does not auto-duplicate the key.
   * It's used for intepreter's shared strings. */
  dictType dictTypeHeapStrings = {
-    _dictStringCopyHTHashFunction,        /* hash function */
-    NULL,                               /* key dup */
-    NULL,                               /* val dup */
-    _dictStringCopyHTKeyCompare,          /* key compare */
-    _dictStringCopyHTKeyDestructor,       /* key destructor */
-    NULL                                /* val destructor */
+    _dictStringCopyHTHashFunction, /* hash function */
+    NULL,                          /* key dup */
+    NULL,                          /* val dup */
+    _dictStringCopyHTKeyCompare,   /* key compare */
+    _dictStringDestructor,         /* key destructor */
+    NULL                           /* val destructor */
  };
  
  /* This is like StringCopy but also automatically handle dynamic
   * allocated C strings as values. */
  dictType dictTypeHeapStringCopyKeyValue = {
-    _dictStringCopyHTHashFunction,        /* hash function */
-    _dictStringCopyHTKeyDup,              /* key dup */
-    _dictStringKeyValCopyHTValDup,        /* val dup */
-    _dictStringCopyHTKeyCompare,          /* key compare */
-    _dictStringCopyHTKeyDestructor,       /* key destructor */
-    _dictStringKeyValCopyHTValDestructor, /* val destructor */
+    _dictStringCopyHTHashFunction, /* hash function */
+    _dictStringDup,                /* key dup */
+    _dictStringDup,                /* val dup */
+    _dictStringCopyHTKeyCompare,   /* key compare */
+    _dictStringDestructor,         /* key destructor */
+    _dictStringDestructor,         /* val destructor */
  };
+#endif