dict.c

   1 /* Hash Tables Implementation.
   2  *
   3  * This file implements in memory hash tables with insert/del/replace/find/
   4  * get-random-element operations. Hash tables will auto resize if needed
   5  * tables of power of two in size are used, collisions are handled by
   6  * chaining. See the source code for more information... :)
   7  *
   8  * Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com>
   9  * All rights reserved.
  10  *
  11  * Redistribution and use in source and binary forms, with or without
  12  * modification, are permitted provided that the following conditions are met:
  13  *
  14  *   * Redistributions of source code must retain the above copyright notice,
  15  *     this list of conditions and the following disclaimer.
  16  *   * Redistributions in binary form must reproduce the above copyright
  17  *     notice, this list of conditions and the following disclaimer in the
  18  *     documentation and/or other materials provided with the distribution.
  19  *   * Neither the name of Redis nor the names of its contributors may be used
  20  *     to endorse or promote products derived from this software without
  21  *     specific prior written permission.
  22  *
  23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  24  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  26  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33  * POSSIBILITY OF SUCH DAMAGE.
  34  */
  35
  36 #include "fmacros.h"
  37
  38 #include <stdio.h>
  39 #include <stdlib.h>
  40 #include <string.h>
  41 #include <stdarg.h>
  42 #include <assert.h>
  43 #include <limits.h>
  44
  45 #include "dict.h"
  46 #include "zmalloc.h"
  47
  48 /* Using dictEnableResize() / dictDisableResize() we make possible to
  49  * enable/disable resizing of the hash table as needed. This is very important
  50  * for Redis, as we use copy-on-write and don't want to move too much memory
  51  * around when there is a child performing saving operations. */
  52 static int dict_can_resize = 1;
  53
  54 /* ---------------------------- Utility funcitons --------------------------- */
  55
  56 static void _dictPanic(const char *fmt, ...)
  57 {
  58     va_list ap;
  59
  60     va_start(ap, fmt);
  61     fprintf(stderr, "\nDICT LIBRARY PANIC: ");
  62     vfprintf(stderr, fmt, ap);
  63     fprintf(stderr, "\n\n");
  64     va_end(ap);
  65 }
  66
  67 /* ------------------------- Heap Management Wrappers------------------------ */
  68
  69 static void *_dictAlloc(size_t size)
  70 {
  71     void *p = zmalloc(size);
  72     if (p == NULL)
  73         _dictPanic("Out of memory");
  74     return p;
  75 }
  76
  77 static void _dictFree(void *ptr) {
  78     zfree(ptr);
  79 }
  80
  81 /* -------------------------- private prototypes ---------------------------- */
  82
  83 static int _dictExpandIfNeeded(dict *ht);
  84 static unsigned long _dictNextPower(unsigned long size);
  85 static int _dictKeyIndex(dict *ht, const void *key);
  86 static int _dictInit(dict *ht, dictType *type, void *privDataPtr);
  87
  88 /* -------------------------- hash functions -------------------------------- */
  89
  90 /* Thomas Wang's 32 bit Mix Function */
  91 unsigned int dictIntHashFunction(unsigned int key)
  92 {
  93     key += ~(key << 15);
  94     key ^=  (key >> 10);
  95     key +=  (key << 3);
  96     key ^=  (key >> 6);
  97     key += ~(key << 11);
  98     key ^=  (key >> 16);
  99     return key;
 100 }
 101
 102 /* Identity hash function for integer keys */
 103 unsigned int dictIdentityHashFunction(unsigned int key)
 104 {
 105     return key;
 106 }
 107
 108 /* Generic hash function (a popular one from Bernstein).
 109  * I tested a few and this was the best. */
 110 unsigned int dictGenHashFunction(const unsigned char *buf, int len) {
 111     unsigned int hash = 5381;
 112
 113     while (len--)
 114         hash = ((hash << 5) + hash) + (*buf++); /* hash * 33 + c */
 115     return hash;
 116 }
 117
 118 /* ----------------------------- API implementation ------------------------- */
 119
 120 /* Reset an hashtable already initialized with ht_init().
 121  * NOTE: This function should only called by ht_destroy(). */
 122 static void _dictReset(dict *ht)
 123 {
 124     ht->table = NULL;
 125     ht->size = 0;
 126     ht->sizemask = 0;
 127     ht->used = 0;
 128 }
 129
 130 /* Create a new hash table */
 131 dict *dictCreate(dictType *type,
 132         void *privDataPtr)
 133 {
 134     dict *ht = _dictAlloc(sizeof(*ht));
 135
 136     _dictInit(ht,type,privDataPtr);
 137     return ht;
 138 }
 139
 140 /* Initialize the hash table */
 141 int _dictInit(dict *ht, dictType *type,
 142         void *privDataPtr)
 143 {
 144     _dictReset(ht);
 145     ht->type = type;
 146     ht->privdata = privDataPtr;
 147     return DICT_OK;
 148 }
 149
 150 /* Resize the table to the minimal size that contains all the elements,
 151  * but with the invariant of a USER/BUCKETS ration near to <= 1 */
 152 int dictResize(dict *ht)
 153 {
 154     int minimal = ht->used;
 155
 156     if (!dict_can_resize) return DICT_ERR;
 157     if (minimal < DICT_HT_INITIAL_SIZE)
 158         minimal = DICT_HT_INITIAL_SIZE;
 159     return dictExpand(ht, minimal);
 160 }
 161
 162 /* Expand or create the hashtable */
 163 int dictExpand(dict *ht, unsigned long size)
 164 {
 165     dict n; /* the new hashtable */
 166     unsigned long realsize = _dictNextPower(size), i;
 167
 168     /* the size is invalid if it is smaller than the number of
 169      * elements already inside the hashtable */
 170     if (ht->used > size)
 171         return DICT_ERR;
 172
 173     _dictInit(&n, ht->type, ht->privdata);
 174     n.size = realsize;
 175     n.sizemask = realsize-1;
 176     n.table = _dictAlloc(realsize*sizeof(dictEntry*));
 177
 178     /* Initialize all the pointers to NULL */
 179     memset(n.table, 0, realsize*sizeof(dictEntry*));
 180
 181     /* Copy all the elements from the old to the new table:
 182      * note that if the old hash table is empty ht->size is zero,
 183      * so dictExpand just creates an hash table. */
 184     n.used = ht->used;
 185     for (i = 0; i < ht->size && ht->used > 0; i++) {
 186         dictEntry *he, *nextHe;
 187
 188         if (ht->table[i] == NULL) continue;
 189
 190         /* For each hash entry on this slot... */
 191         he = ht->table[i];
 192         while(he) {
 193             unsigned int h;
 194
 195             nextHe = he->next;
 196             /* Get the new element index */
 197             h = dictHashKey(ht, he->key) & n.sizemask;
 198             he->next = n.table[h];
 199             n.table[h] = he;
 200             ht->used--;
 201             /* Pass to the next element */
 202             he = nextHe;
 203         }
 204     }
 205     assert(ht->used == 0);
 206     _dictFree(ht->table);
 207
 208     /* Remap the new hashtable in the old */
 209     *ht = n;
 210     return DICT_OK;
 211 }
 212
 213 /* Add an element to the target hash table */
 214 int dictAdd(dict *ht, void *key, void *val)
 215 {
 216     int index;
 217     dictEntry *entry;
 218
 219     /* Get the index of the new element, or -1 if
 220      * the element already exists. */
 221     if ((index = _dictKeyIndex(ht, key)) == -1)
 222         return DICT_ERR;
 223
 224     /* Allocates the memory and stores key */
 225     entry = _dictAlloc(sizeof(*entry));
 226     entry->next = ht->table[index];
 227     ht->table[index] = entry;
 228
 229     /* Set the hash entry fields. */
 230     dictSetHashKey(ht, entry, key);
 231     dictSetHashVal(ht, entry, val);
 232     ht->used++;
 233     return DICT_OK;
 234 }
 235
 236 /* Add an element, discarding the old if the key already exists.
 237  * Return 1 if the key was added from scratch, 0 if there was already an
 238  * element with such key and dictReplace() just performed a value update
 239  * operation. */
 240 int dictReplace(dict *ht, void *key, void *val)
 241 {
 242     dictEntry *entry, auxentry;
 243
 244     /* Try to add the element. If the key
 245      * does not exists dictAdd will suceed. */
 246     if (dictAdd(ht, key, val) == DICT_OK)
 247         return 1;
 248     /* It already exists, get the entry */
 249     entry = dictFind(ht, key);
 250     /* Free the old value and set the new one */
 251     /* Set the new value and free the old one. Note that it is important
 252      * to do that in this order, as the value may just be exactly the same
 253      * as the previous one. In this context, think to reference counting,
 254      * you want to increment (set), and then decrement (free), and not the
 255      * reverse. */
 256     auxentry = *entry;
 257     dictSetHashVal(ht, entry, val);
 258     dictFreeEntryVal(ht, &auxentry);
 259     return 0;
 260 }
 261
 262 /* Search and remove an element */
 263 static int dictGenericDelete(dict *ht, const void *key, int nofree)
 264 {
 265     unsigned int h;
 266     dictEntry *he, *prevHe;
 267
 268     if (ht->size == 0)
 269         return DICT_ERR;
 270     h = dictHashKey(ht, key) & ht->sizemask;
 271     he = ht->table[h];
 272
 273     prevHe = NULL;
 274     while(he) {
 275         if (dictCompareHashKeys(ht, key, he->key)) {
 276             /* Unlink the element from the list */
 277             if (prevHe)
 278                 prevHe->next = he->next;
 279             else
 280                 ht->table[h] = he->next;
 281             if (!nofree) {
 282                 dictFreeEntryKey(ht, he);
 283                 dictFreeEntryVal(ht, he);
 284             }
 285             _dictFree(he);
 286             ht->used--;
 287             return DICT_OK;
 288         }
 289         prevHe = he;
 290         he = he->next;
 291     }
 292     return DICT_ERR; /* not found */
 293 }
 294
 295 int dictDelete(dict *ht, const void *key) {
 296     return dictGenericDelete(ht,key,0);
 297 }
 298
 299 int dictDeleteNoFree(dict *ht, const void *key) {
 300     return dictGenericDelete(ht,key,1);
 301 }
 302
 303 /* Destroy an entire hash table */
 304 int _dictClear(dict *ht)
 305 {
 306     unsigned long i;
 307
 308     /* Free all the elements */
 309     for (i = 0; i < ht->size && ht->used > 0; i++) {
 310         dictEntry *he, *nextHe;
 311
 312         if ((he = ht->table[i]) == NULL) continue;
 313         while(he) {
 314             nextHe = he->next;
 315             dictFreeEntryKey(ht, he);
 316             dictFreeEntryVal(ht, he);
 317             _dictFree(he);
 318             ht->used--;
 319             he = nextHe;
 320         }
 321     }
 322     /* Free the table and the allocated cache structure */
 323     _dictFree(ht->table);
 324     /* Re-initialize the table */
 325     _dictReset(ht);
 326     return DICT_OK; /* never fails */
 327 }
 328
 329 /* Clear & Release the hash table */
 330 void dictRelease(dict *ht)
 331 {
 332     _dictClear(ht);
 333     _dictFree(ht);
 334 }
 335
 336 dictEntry *dictFind(dict *ht, const void *key)
 337 {
 338     dictEntry *he;
 339     unsigned int h;
 340
 341     if (ht->size == 0) return NULL;
 342     h = dictHashKey(ht, key) & ht->sizemask;
 343     he = ht->table[h];
 344     while(he) {
 345         if (dictCompareHashKeys(ht, key, he->key))
 346             return he;
 347         he = he->next;
 348     }
 349     return NULL;
 350 }
 351
 352 dictIterator *dictGetIterator(dict *ht)
 353 {
 354     dictIterator *iter = _dictAlloc(sizeof(*iter));
 355
 356     iter->ht = ht;
 357     iter->index = -1;
 358     iter->entry = NULL;
 359     iter->nextEntry = NULL;
 360     return iter;
 361 }
 362
 363 dictEntry *dictNext(dictIterator *iter)
 364 {
 365     while (1) {
 366         if (iter->entry == NULL) {
 367             iter->index++;
 368             if (iter->index >=
 369                     (signed)iter->ht->size) break;
 370             iter->entry = iter->ht->table[iter->index];
 371         } else {
 372             iter->entry = iter->nextEntry;
 373         }
 374         if (iter->entry) {
 375             /* We need to save the 'next' here, the iterator user
 376              * may delete the entry we are returning. */
 377             iter->nextEntry = iter->entry->next;
 378             return iter->entry;
 379         }
 380     }
 381     return NULL;
 382 }
 383
 384 void dictReleaseIterator(dictIterator *iter)
 385 {
 386     _dictFree(iter);
 387 }
 388
 389 /* Return a random entry from the hash table. Useful to
 390  * implement randomized algorithms */
 391 dictEntry *dictGetRandomKey(dict *ht)
 392 {
 393     dictEntry *he;
 394     unsigned int h;
 395     int listlen, listele;
 396
 397     if (ht->used == 0) return NULL;
 398     do {
 399         h = random() & ht->sizemask;
 400         he = ht->table[h];
 401     } while(he == NULL);
 402
 403     /* Now we found a non empty bucket, but it is a linked
 404      * list and we need to get a random element from the list.
 405      * The only sane way to do so is to count the element and
 406      * select a random index. */
 407     listlen = 0;
 408     while(he) {
 409         he = he->next;
 410         listlen++;
 411     }
 412     listele = random() % listlen;
 413     he = ht->table[h];
 414     while(listele--) he = he->next;
 415     return he;
 416 }
 417
 418 /* ------------------------- private functions ------------------------------ */
 419
 420 /* Expand the hash table if needed */
 421 static int _dictExpandIfNeeded(dict *ht)
 422 {
 423     /* If the hash table is empty expand it to the intial size,
 424      * if the table is "full" dobule its size. */
 425     if (ht->size == 0)
 426         return dictExpand(ht, DICT_HT_INITIAL_SIZE);
 427     if (ht->used >= ht->size && dict_can_resize)
 428         return dictExpand(ht, ht->size*2);
 429     return DICT_OK;
 430 }
 431
 432 /* Our hash table capability is a power of two */
 433 static unsigned long _dictNextPower(unsigned long size)
 434 {
 435     unsigned long i = DICT_HT_INITIAL_SIZE;
 436
 437     if (size >= LONG_MAX) return LONG_MAX;
 438     while(1) {
 439         if (i >= size)
 440             return i;
 441         i *= 2;
 442     }
 443 }
 444
 445 /* Returns the index of a free slot that can be populated with
 446  * an hash entry for the given 'key'.
 447  * If the key already exists, -1 is returned. */
 448 static int _dictKeyIndex(dict *ht, const void *key)
 449 {
 450     unsigned int h;
 451     dictEntry *he;
 452
 453     /* Expand the hashtable if needed */
 454     if (_dictExpandIfNeeded(ht) == DICT_ERR)
 455         return -1;
 456     /* Compute the key hash value */
 457     h = dictHashKey(ht, key) & ht->sizemask;
 458     /* Search if this slot does not already contain the given key */
 459     he = ht->table[h];
 460     while(he) {
 461         if (dictCompareHashKeys(ht, key, he->key))
 462             return -1;
 463         he = he->next;
 464     }
 465     return h;
 466 }
 467
 468 void dictEmpty(dict *ht) {
 469     _dictClear(ht);
 470 }
 471
 472 #define DICT_STATS_VECTLEN 50
 473 void dictPrintStats(dict *ht) {
 474     unsigned long i, slots = 0, chainlen, maxchainlen = 0;
 475     unsigned long totchainlen = 0;
 476     unsigned long clvector[DICT_STATS_VECTLEN];
 477
 478     if (ht->used == 0) {
 479         printf("No stats available for empty dictionaries\n");
 480         return;
 481     }
 482
 483     for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0;
 484     for (i = 0; i < ht->size; i++) {
 485         dictEntry *he;
 486
 487         if (ht->table[i] == NULL) {
 488             clvector[0]++;
 489             continue;
 490         }
 491         slots++;
 492         /* For each hash entry on this slot... */
 493         chainlen = 0;
 494         he = ht->table[i];
 495         while(he) {
 496             chainlen++;
 497             he = he->next;
 498         }
 499         clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++;
 500         if (chainlen > maxchainlen) maxchainlen = chainlen;
 501         totchainlen += chainlen;
 502     }
 503     printf("Hash table stats:\n");
 504     printf(" table size: %ld\n", ht->size);
 505     printf(" number of elements: %ld\n", ht->used);
 506     printf(" different slots: %ld\n", slots);
 507     printf(" max chain length: %ld\n", maxchainlen);
 508     printf(" avg chain length (counted): %.02f\n", (float)totchainlen/slots);
 509     printf(" avg chain length (computed): %.02f\n", (float)ht->used/slots);
 510     printf(" Chain length distribution:\n");
 511     for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {
 512         if (clvector[i] == 0) continue;
 513         printf("   %s%ld: %ld (%.02f%%)\n",(i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100);
 514     }
 515 }
 516
 517 void dictEnableResize(void) {
 518     dict_can_resize = 1;
 519 }
 520
 521 void dictDisableResize(void) {
 522     dict_can_resize = 1;
 523 }
 524
 525 /* ----------------------- StringCopy Hash Table Type ------------------------*/
 526
 527 static unsigned int _dictStringCopyHTHashFunction(const void *key)
 528 {
 529     return dictGenHashFunction(key, strlen(key));
 530 }
 531
 532 static void *_dictStringCopyHTKeyDup(void *privdata, const void *key)
 533 {
 534     int len = strlen(key);
 535     char *copy = _dictAlloc(len+1);
 536     DICT_NOTUSED(privdata);
 537
 538     memcpy(copy, key, len);
 539     copy[len] = '\0';
 540     return copy;
 541 }
 542
 543 static void *_dictStringKeyValCopyHTValDup(void *privdata, const void *val)
 544 {
 545     int len = strlen(val);
 546     char *copy = _dictAlloc(len+1);
 547     DICT_NOTUSED(privdata);
 548
 549     memcpy(copy, val, len);
 550     copy[len] = '\0';
 551     return copy;
 552 }
 553
 554 static int _dictStringCopyHTKeyCompare(void *privdata, const void *key1,
 555         const void *key2)
 556 {
 557     DICT_NOTUSED(privdata);
 558
 559     return strcmp(key1, key2) == 0;
 560 }
 561
 562 static void _dictStringCopyHTKeyDestructor(void *privdata, void *key)
 563 {
 564     DICT_NOTUSED(privdata);
 565
 566     _dictFree((void*)key); /* ATTENTION: const cast */
 567 }
 568
 569 static void _dictStringKeyValCopyHTValDestructor(void *privdata, void *val)
 570 {
 571     DICT_NOTUSED(privdata);
 572
 573     _dictFree((void*)val); /* ATTENTION: const cast */
 574 }
 575
 576 dictType dictTypeHeapStringCopyKey = {
 577     _dictStringCopyHTHashFunction,        /* hash function */
 578     _dictStringCopyHTKeyDup,              /* key dup */
 579     NULL,                               /* val dup */
 580     _dictStringCopyHTKeyCompare,          /* key compare */
 581     _dictStringCopyHTKeyDestructor,       /* key destructor */
 582     NULL                                /* val destructor */
 583 };
 584
 585 /* This is like StringCopy but does not auto-duplicate the key.
 586  * It's used for intepreter's shared strings. */
 587 dictType dictTypeHeapStrings = {
 588     _dictStringCopyHTHashFunction,        /* hash function */
 589     NULL,                               /* key dup */
 590     NULL,                               /* val dup */
 591     _dictStringCopyHTKeyCompare,          /* key compare */
 592     _dictStringCopyHTKeyDestructor,       /* key destructor */
 593     NULL                                /* val destructor */
 594 };
 595
 596 /* This is like StringCopy but also automatically handle dynamic
 597  * allocated C strings as values. */
 598 dictType dictTypeHeapStringCopyKeyValue = {
 599     _dictStringCopyHTHashFunction,        /* hash function */
 600     _dictStringCopyHTKeyDup,              /* key dup */
 601     _dictStringKeyValCopyHTValDup,        /* val dup */
 602     _dictStringCopyHTKeyCompare,          /* key compare */
 603     _dictStringCopyHTKeyDestructor,       /* key destructor */
 604     _dictStringKeyValCopyHTValDestructor, /* val destructor */
 605 };