#include <assert.h>
#include <limits.h>
#include <sys/time.h>
+#include <ctype.h>
#include "dict.h"
#include "zmalloc.h"
/* Using dictEnableResize() / dictDisableResize() we make possible to
* enable/disable resizing of the hash table as needed. This is very important
* for Redis, as we use copy-on-write and don't want to move too much memory
- * around when there is a child performing saving operations. */
+ * around when there is a child performing saving operations.
+ *
+ * Note that even when dict_can_resize is set to 0, not all resizes are
+ * prevented: an hash table is still allowed to grow if the ratio between
+ * the number of elements and the buckets > dict_force_resize_ratio. */
static int dict_can_resize = 1;
-
-/* ---------------------------- Utility funcitons --------------------------- */
-
-static void _dictPanic(const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- fprintf(stderr, "\nDICT LIBRARY PANIC: ");
- vfprintf(stderr, fmt, ap);
- fprintf(stderr, "\n\n");
- va_end(ap);
-}
-
-/* ------------------------- Heap Management Wrappers------------------------ */
-
-static void *_dictAlloc(size_t size)
-{
- void *p = zmalloc(size);
- if (p == NULL)
- _dictPanic("Out of memory");
- return p;
-}
-
-static void _dictFree(void *ptr) {
- zfree(ptr);
-}
+static unsigned int dict_force_resize_ratio = 5;
/* -------------------------- private prototypes ---------------------------- */
return key;
}
-/* Generic hash function (a popular one from Bernstein).
- * I tested a few and this was the best. */
-unsigned int dictGenHashFunction(const unsigned char *buf, int len) {
- unsigned int hash = 5381;
+static uint32_t dict_hash_function_seed = 5381;
+
+void dictSetHashFunctionSeed(uint32_t seed) {
+ dict_hash_function_seed = seed;
+}
+
+uint32_t dictGetHashFunctionSeed(void) {
+ return dict_hash_function_seed;
+}
+
+/* MurmurHash2, by Austin Appleby
+ * Note - This code makes a few assumptions about how your machine behaves -
+ * 1. We can read a 4-byte value from any address without crashing
+ * 2. sizeof(int) == 4
+ *
+ * And it has a few limitations -
+ *
+ * 1. It will not work incrementally.
+ * 2. It will not produce the same results on little-endian and big-endian
+ * machines.
+ */
+unsigned int dictGenHashFunction(const void *key, int len) {
+ /* 'm' and 'r' are mixing constants generated offline.
+ They're not really 'magic', they just happen to work well. */
+ uint32_t seed = dict_hash_function_seed;
+ const uint32_t m = 0x5bd1e995;
+ const int r = 24;
+
+ /* Initialize the hash to a 'random' value */
+ uint32_t h = seed ^ len;
+
+ /* Mix 4 bytes at a time into the hash */
+ const unsigned char *data = (const unsigned char *)key;
+
+ while(len >= 4) {
+ uint32_t k = *(uint32_t*)data;
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h *= m;
+ h ^= k;
+
+ data += 4;
+ len -= 4;
+ }
+
+ /* Handle the last few bytes of the input array */
+ switch(len) {
+ case 3: h ^= data[2] << 16;
+ case 2: h ^= data[1] << 8;
+ case 1: h ^= data[0]; h *= m;
+ };
+
+ /* Do a few final mixes of the hash to ensure the last few
+ * bytes are well-incorporated. */
+ h ^= h >> 13;
+ h *= m;
+ h ^= h >> 15;
+
+ return (unsigned int)h;
+}
+
+/* And a case insensitive hash function (based on djb hash) */
+unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
+ unsigned int hash = (unsigned int)dict_hash_function_seed;
while (len--)
- hash = ((hash << 5) + hash) + (*buf++); /* hash * 33 + c */
+ hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */
return hash;
}
/* ----------------------------- API implementation ------------------------- */
-/* Reset an hashtable already initialized with ht_init().
- * NOTE: This function should only called by ht_destroy(). */
+/* Reset a hash table already initialized with ht_init().
+ * NOTE: This function should only be called by ht_destroy(). */
static void _dictReset(dictht *ht)
{
ht->table = NULL;
dict *dictCreate(dictType *type,
void *privDataPtr)
{
- dict *d = _dictAlloc(sizeof(*d));
+ dict *d = zmalloc(sizeof(*d));
_dictInit(d,type,privDataPtr);
return d;
}
/* Resize the table to the minimal size that contains all the elements,
- * but with the invariant of a USER/BUCKETS ration near to <= 1 */
+ * but with the invariant of a USED/BUCKETS ratio near to <= 1 */
int dictResize(dict *d)
{
int minimal;
return dictExpand(d, minimal);
}
-/* Expand or create the hashtable */
+/* Expand or create the hash table */
int dictExpand(dict *d, unsigned long size)
{
- dictht n; /* the new hashtable */
+ dictht n; /* the new hash table */
unsigned long realsize = _dictNextPower(size);
/* the size is invalid if it is smaller than the number of
- * elements already inside the hashtable */
+ * elements already inside the hash table */
if (dictIsRehashing(d) || d->ht[0].used > size)
return DICT_ERR;
+ /* Allocate the new hash table and initialize all pointers to NULL */
n.size = realsize;
n.sizemask = realsize-1;
- n.table = _dictAlloc(realsize*sizeof(dictEntry*));
+ n.table = zcalloc(realsize*sizeof(dictEntry*));
n.used = 0;
- /* Initialize all the pointers to NULL */
- memset(n.table, 0, realsize*sizeof(dictEntry*));
-
/* Is this the first initialization? If so it's not really a rehashing
* we just set the first hash table so that it can accept keys. */
if (d->ht[0].table == NULL) {
/* Check if we already rehashed the whole table... */
if (d->ht[0].used == 0) {
- _dictFree(d->ht[0].table);
+ zfree(d->ht[0].table);
d->ht[0] = d->ht[1];
_dictReset(&d->ht[1]);
d->rehashidx = -1;
/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
+ assert(d->ht[0].size > (unsigned)d->rehashidx);
while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
de = d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
}
/* This function performs just a step of rehashing, and only if there are
- * not iterators bound to our hash table. When we have iterators in the middle
- * of a rehashing we can't mess with the two hash tables otherwise some element
- * can be missed or duplicated.
+ * no safe iterators bound to our hash table. When we have iterators in the
+ * middle of a rehashing we can't mess with the two hash tables otherwise
+ * some element can be missed or duplicated.
*
* This function is called by common lookup or update operations in the
* dictionary so that the hash table automatically migrates from H1 to H2
/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
+{
+ dictEntry *entry = dictAddRaw(d,key);
+
+ if (!entry) return DICT_ERR;
+ dictSetVal(d, entry, val);
+ return DICT_OK;
+}
+
+/* Low level add. This function adds the entry but instead of setting
+ * a value returns the dictEntry structure to the user, that will make
+ * sure to fill the value field as he wishes.
+ *
+ * This function is also directly exposed to the user API to be called
+ * mainly in order to store non-pointers inside the hash value, example:
+ *
+ * entry = dictAddRaw(dict,mykey);
+ * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
+ *
+ * Return values:
+ *
+ * If key already exists NULL is returned.
+ * If key was added, the hash entry is returned to be manipulated by the caller.
+ */
+dictEntry *dictAddRaw(dict *d, void *key)
{
int index;
dictEntry *entry;
/* Get the index of the new element, or -1 if
* the element already exists. */
if ((index = _dictKeyIndex(d, key)) == -1)
- return DICT_ERR;
+ return NULL;
- /* Allocates the memory and stores key */
+ /* Allocate the memory and store the new entry */
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
- entry = _dictAlloc(sizeof(*entry));
+ entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
ht->table[index] = entry;
ht->used++;
/* Set the hash entry fields. */
- dictSetHashKey(d, entry, key);
- dictSetHashVal(d, entry, val);
- return DICT_OK;
+ dictSetKey(d, entry, key);
+ return entry;
}
/* Add an element, discarding the old if the key already exists.
return 1;
/* It already exists, get the entry */
entry = dictFind(d, key);
- /* Free the old value and set the new one */
/* Set the new value and free the old one. Note that it is important
* to do that in this order, as the value may just be exactly the same
* as the previous one. In this context, think to reference counting,
* you want to increment (set), and then decrement (free), and not the
* reverse. */
auxentry = *entry;
- dictSetHashVal(d, entry, val);
- dictFreeEntryVal(d, &auxentry);
+ dictSetVal(d, entry, val);
+ dictFreeVal(d, &auxentry);
return 0;
}
+/* dictReplaceRaw() is simply a version of dictAddRaw() that always
+ * returns the hash entry of the specified key, even if the key already
+ * exists and can't be added (in that case the entry of the already
+ * existing key is returned.)
+ *
+ * See dictAddRaw() for more information. */
+dictEntry *dictReplaceRaw(dict *d, void *key) {
+ dictEntry *entry = dictFind(d,key);
+
+ return entry ? entry : dictAddRaw(d,key);
+}
+
/* Search and remove an element */
static int dictGenericDelete(dict *d, const void *key, int nofree)
{
he = d->ht[table].table[idx];
prevHe = NULL;
while(he) {
- if (dictCompareHashKeys(d, key, he->key)) {
+ if (dictCompareKeys(d, key, he->key)) {
/* Unlink the element from the list */
if (prevHe)
prevHe->next = he->next;
else
d->ht[table].table[idx] = he->next;
if (!nofree) {
- dictFreeEntryKey(d, he);
- dictFreeEntryVal(d, he);
+ dictFreeKey(d, he);
+ dictFreeVal(d, he);
}
- _dictFree(he);
+ zfree(he);
d->ht[table].used--;
return DICT_OK;
}
if ((he = ht->table[i]) == NULL) continue;
while(he) {
nextHe = he->next;
- dictFreeEntryKey(d, he);
- dictFreeEntryVal(d, he);
- _dictFree(he);
+ dictFreeKey(d, he);
+ dictFreeVal(d, he);
+ zfree(he);
ht->used--;
he = nextHe;
}
}
/* Free the table and the allocated cache structure */
- _dictFree(ht->table);
+ zfree(ht->table);
/* Re-initialize the table */
_dictReset(ht);
return DICT_OK; /* never fails */
{
_dictClear(d,&d->ht[0]);
_dictClear(d,&d->ht[1]);
- _dictFree(d);
+ zfree(d);
}
dictEntry *dictFind(dict *d, const void *key)
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
while(he) {
- if (dictCompareHashKeys(d, key, he->key))
+ if (dictCompareKeys(d, key, he->key))
return he;
he = he->next;
}
dictEntry *he;
he = dictFind(d,key);
- return he ? dictGetEntryVal(he) : NULL;
+ return he ? dictGetVal(he) : NULL;
}
dictIterator *dictGetIterator(dict *d)
{
- dictIterator *iter = _dictAlloc(sizeof(*iter));
+ dictIterator *iter = zmalloc(sizeof(*iter));
iter->d = d;
iter->table = 0;
iter->index = -1;
+ iter->safe = 0;
iter->entry = NULL;
iter->nextEntry = NULL;
return iter;
}
+dictIterator *dictGetSafeIterator(dict *d) {
+ dictIterator *i = dictGetIterator(d);
+
+ i->safe = 1;
+ return i;
+}
+
dictEntry *dictNext(dictIterator *iter)
{
while (1) {
if (iter->entry == NULL) {
dictht *ht = &iter->d->ht[iter->table];
- if (iter->index == -1 && iter->table == 0) iter->d->iterators++;
+ if (iter->safe && iter->index == -1 && iter->table == 0)
+ iter->d->iterators++;
iter->index++;
if (iter->index >= (signed) ht->size) {
if (dictIsRehashing(iter->d) && iter->table == 0) {
void dictReleaseIterator(dictIterator *iter)
{
- if (!(iter->index == -1 && iter->table == 0)) iter->d->iterators--;
- _dictFree(iter);
+ if (iter->safe && !(iter->index == -1 && iter->table == 0))
+ iter->d->iterators--;
+ zfree(iter);
}
/* Return a random entry from the hash table. Useful to
/* Expand the hash table if needed */
static int _dictExpandIfNeeded(dict *d)
{
- /* If the hash table is empty expand it to the intial size,
- * if the table is "full" dobule its size. */
+ /* Incremental rehashing already in progress. Return. */
if (dictIsRehashing(d)) return DICT_OK;
- if (d->ht[0].size == 0)
- return dictExpand(d, DICT_HT_INITIAL_SIZE);
- if (d->ht[0].used >= d->ht[0].size && dict_can_resize)
+
+ /* If the hash table is empty expand it to the intial size. */
+ if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);
+
+ /* If we reached the 1:1 ratio, and we are allowed to resize the hash
+ * table (global setting) or we should avoid it but the ratio between
+ * elements/buckets is over the "safe" threshold, we resize doubling
+ * the number of buckets. */
+ if (d->ht[0].used >= d->ht[0].size &&
+ (dict_can_resize ||
+ d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
+ {
return dictExpand(d, ((d->ht[0].size > d->ht[0].used) ?
d->ht[0].size : d->ht[0].used)*2);
+ }
return DICT_OK;
}
unsigned int h, idx, table;
dictEntry *he;
- /* Expand the hashtable if needed */
+ /* Expand the hash table if needed */
if (_dictExpandIfNeeded(d) == DICT_ERR)
return -1;
/* Compute the key hash value */
/* Search if this slot does not already contain the given key */
he = d->ht[table].table[idx];
while(he) {
- if (dictCompareHashKeys(d, key, he->key))
+ if (dictCompareKeys(d, key, he->key))
return -1;
he = he->next;
}
d->iterators = 0;
}
+void dictEnableResize(void) {
+ dict_can_resize = 1;
+}
+
+void dictDisableResize(void) {
+ dict_can_resize = 0;
+}
+
+#if 0
+
+/* The following is code that we don't use for Redis currently, but that is part
+of the library. */
+
+/* ----------------------- Debugging ------------------------*/
+
#define DICT_STATS_VECTLEN 50
static void _dictPrintStatsHt(dictht *ht) {
unsigned long i, slots = 0, chainlen, maxchainlen = 0;
}
}
-void dictEnableResize(void) {
- dict_can_resize = 1;
-}
-
-void dictDisableResize(void) {
- dict_can_resize = 0;
-}
-
/* ----------------------- StringCopy Hash Table Type ------------------------*/
static unsigned int _dictStringCopyHTHashFunction(const void *key)
static void *_dictStringDup(void *privdata, const void *key)
{
int len = strlen(key);
- char *copy = _dictAlloc(len+1);
+ char *copy = zmalloc(len+1);
DICT_NOTUSED(privdata);
memcpy(copy, key, len);
{
DICT_NOTUSED(privdata);
- _dictFree(key);
+ zfree(key);
}
dictType dictTypeHeapStringCopyKey = {
_dictStringDestructor, /* key destructor */
_dictStringDestructor, /* val destructor */
};
+#endif