dict.c

   1 /* Hash Tables Implementation.
   2  *
   3  * This file implements in memory hash tables with insert/del/replace/find/
   4  * get-random-element operations. Hash tables will auto resize if needed
   5  * tables of power of two in size are used, collisions are handled by
   6  * chaining. See the source code for more information... :)
   7  *
   8  * Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com>
   9  * All rights reserved.
  10  *
  11  * Redistribution and use in source and binary forms, with or without
  12  * modification, are permitted provided that the following conditions are met:
  13  *
  14  *   * Redistributions of source code must retain the above copyright notice,
  15  *     this list of conditions and the following disclaimer.
  16  *   * Redistributions in binary form must reproduce the above copyright
  17  *     notice, this list of conditions and the following disclaimer in the
  18  *     documentation and/or other materials provided with the distribution.
  19  *   * Neither the name of Redis nor the names of its contributors may be used
  20  *     to endorse or promote products derived from this software without
  21  *     specific prior written permission.
  22  *
  23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  24  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  26  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33  * POSSIBILITY OF SUCH DAMAGE.
  34  */
  35
  36 #include "fmacros.h"
  37
  38 #include <stdio.h>
  39 #include <stdlib.h>
  40 #include <string.h>
  41 #include <stdarg.h>
  42 #include <assert.h>
  43 #include <limits.h>
  44
  45 #include "dict.h"
  46 #include "zmalloc.h"
  47
  48 /* ---------------------------- Utility funcitons --------------------------- */
  49
  50 static void _dictPanic(const char *fmt, ...)
  51 {
  52     va_list ap;
  53
  54     va_start(ap, fmt);
  55     fprintf(stderr, "\nDICT LIBRARY PANIC: ");
  56     vfprintf(stderr, fmt, ap);
  57     fprintf(stderr, "\n\n");
  58     va_end(ap);
  59 }
  60
  61 /* ------------------------- Heap Management Wrappers------------------------ */
  62
  63 static void *_dictAlloc(size_t size)
  64 {
  65     void *p = zmalloc(size);
  66     if (p == NULL)
  67         _dictPanic("Out of memory");
  68     return p;
  69 }
  70
  71 static void _dictFree(void *ptr) {
  72     zfree(ptr);
  73 }
  74
  75 /* -------------------------- private prototypes ---------------------------- */
  76
  77 static int _dictExpandIfNeeded(dict *ht);
  78 static unsigned long _dictNextPower(unsigned long size);
  79 static int _dictKeyIndex(dict *ht, const void *key);
  80 static int _dictInit(dict *ht, dictType *type, void *privDataPtr);
  81
  82 /* -------------------------- hash functions -------------------------------- */
  83
  84 /* Thomas Wang's 32 bit Mix Function */
  85 unsigned int dictIntHashFunction(unsigned int key)
  86 {
  87     key += ~(key << 15);
  88     key ^=  (key >> 10);
  89     key +=  (key << 3);
  90     key ^=  (key >> 6);
  91     key += ~(key << 11);
  92     key ^=  (key >> 16);
  93     return key;
  94 }
  95
  96 /* Identity hash function for integer keys */
  97 unsigned int dictIdentityHashFunction(unsigned int key)
  98 {
  99     return key;
 100 }
 101
 102 /* Generic hash function (a popular one from Bernstein).
 103  * I tested a few and this was the best. */
 104 unsigned int dictGenHashFunction(const unsigned char *buf, int len) {
 105     unsigned int hash = 5381;
 106
 107     while (len--)
 108         hash = ((hash << 5) + hash) + (*buf++); /* hash * 33 + c */
 109     return hash;
 110 }
 111
 112 /* ----------------------------- API implementation ------------------------- */
 113
 114 /* Reset an hashtable already initialized with ht_init().
 115  * NOTE: This function should only called by ht_destroy(). */
 116 static void _dictReset(dict *ht)
 117 {
 118     ht->table = NULL;
 119     ht->size = 0;
 120     ht->sizemask = 0;
 121     ht->used = 0;
 122 }
 123
 124 /* Create a new hash table */
 125 dict *dictCreate(dictType *type,
 126         void *privDataPtr)
 127 {
 128     dict *ht = _dictAlloc(sizeof(*ht));
 129
 130     _dictInit(ht,type,privDataPtr);
 131     return ht;
 132 }
 133
 134 /* Initialize the hash table */
 135 int _dictInit(dict *ht, dictType *type,
 136         void *privDataPtr)
 137 {
 138     _dictReset(ht);
 139     ht->type = type;
 140     ht->privdata = privDataPtr;
 141     return DICT_OK;
 142 }
 143
 144 /* Resize the table to the minimal size that contains all the elements,
 145  * but with the invariant of a USER/BUCKETS ration near to <= 1 */
 146 int dictResize(dict *ht)
 147 {
 148     int minimal = ht->used;
 149
 150     if (minimal < DICT_HT_INITIAL_SIZE)
 151         minimal = DICT_HT_INITIAL_SIZE;
 152     return dictExpand(ht, minimal);
 153 }
 154
 155 /* Expand or create the hashtable */
 156 int dictExpand(dict *ht, unsigned long size)
 157 {
 158     dict n; /* the new hashtable */
 159     unsigned long realsize = _dictNextPower(size), i;
 160
 161     /* the size is invalid if it is smaller than the number of
 162      * elements already inside the hashtable */
 163     if (ht->used > size)
 164         return DICT_ERR;
 165
 166     _dictInit(&n, ht->type, ht->privdata);
 167     n.size = realsize;
 168     n.sizemask = realsize-1;
 169     n.table = _dictAlloc(realsize*sizeof(dictEntry*));
 170
 171     /* Initialize all the pointers to NULL */
 172     memset(n.table, 0, realsize*sizeof(dictEntry*));
 173
 174     /* Copy all the elements from the old to the new table:
 175      * note that if the old hash table is empty ht->size is zero,
 176      * so dictExpand just creates an hash table. */
 177     n.used = ht->used;
 178     for (i = 0; i < ht->size && ht->used > 0; i++) {
 179         dictEntry *he, *nextHe;
 180
 181         if (ht->table[i] == NULL) continue;
 182
 183         /* For each hash entry on this slot... */
 184         he = ht->table[i];
 185         while(he) {
 186             unsigned int h;
 187
 188             nextHe = he->next;
 189             /* Get the new element index */
 190             h = dictHashKey(ht, he->key) & n.sizemask;
 191             he->next = n.table[h];
 192             n.table[h] = he;
 193             ht->used--;
 194             /* Pass to the next element */
 195             he = nextHe;
 196         }
 197     }
 198     assert(ht->used == 0);
 199     _dictFree(ht->table);
 200
 201     /* Remap the new hashtable in the old */
 202     *ht = n;
 203     return DICT_OK;
 204 }
 205
 206 /* Add an element to the target hash table */
 207 int dictAdd(dict *ht, void *key, void *val)
 208 {
 209     int index;
 210     dictEntry *entry;
 211
 212     /* Get the index of the new element, or -1 if
 213      * the element already exists. */
 214     if ((index = _dictKeyIndex(ht, key)) == -1)
 215         return DICT_ERR;
 216
 217     /* Allocates the memory and stores key */
 218     entry = _dictAlloc(sizeof(*entry));
 219     entry->next = ht->table[index];
 220     ht->table[index] = entry;
 221
 222     /* Set the hash entry fields. */
 223     dictSetHashKey(ht, entry, key);
 224     dictSetHashVal(ht, entry, val);
 225     ht->used++;
 226     return DICT_OK;
 227 }
 228
 229 /* Add an element, discarding the old if the key already exists.
 230  * Return 1 if the key was added from scratch, 0 if there was already an
 231  * element with such key and dictReplace() just performed a value update
 232  * operation. */
 233 int dictReplace(dict *ht, void *key, void *val)
 234 {
 235     dictEntry *entry;
 236
 237     /* Try to add the element. If the key
 238      * does not exists dictAdd will suceed. */
 239     if (dictAdd(ht, key, val) == DICT_OK)
 240         return 1;
 241     /* It already exists, get the entry */
 242     entry = dictFind(ht, key);
 243     /* Free the old value and set the new one */
 244     dictFreeEntryVal(ht, entry);
 245     dictSetHashVal(ht, entry, val);
 246     return 0;
 247 }
 248
 249 /* Search and remove an element */
 250 static int dictGenericDelete(dict *ht, const void *key, int nofree)
 251 {
 252     unsigned int h;
 253     dictEntry *he, *prevHe;
 254
 255     if (ht->size == 0)
 256         return DICT_ERR;
 257     h = dictHashKey(ht, key) & ht->sizemask;
 258     he = ht->table[h];
 259
 260     prevHe = NULL;
 261     while(he) {
 262         if (dictCompareHashKeys(ht, key, he->key)) {
 263             /* Unlink the element from the list */
 264             if (prevHe)
 265                 prevHe->next = he->next;
 266             else
 267                 ht->table[h] = he->next;
 268             if (!nofree) {
 269                 dictFreeEntryKey(ht, he);
 270                 dictFreeEntryVal(ht, he);
 271             }
 272             _dictFree(he);
 273             ht->used--;
 274             return DICT_OK;
 275         }
 276         prevHe = he;
 277         he = he->next;
 278     }
 279     return DICT_ERR; /* not found */
 280 }
 281
 282 int dictDelete(dict *ht, const void *key) {
 283     return dictGenericDelete(ht,key,0);
 284 }
 285
 286 int dictDeleteNoFree(dict *ht, const void *key) {
 287     return dictGenericDelete(ht,key,1);
 288 }
 289
 290 /* Destroy an entire hash table */
 291 int _dictClear(dict *ht)
 292 {
 293     unsigned long i;
 294
 295     /* Free all the elements */
 296     for (i = 0; i < ht->size && ht->used > 0; i++) {
 297         dictEntry *he, *nextHe;
 298
 299         if ((he = ht->table[i]) == NULL) continue;
 300         while(he) {
 301             nextHe = he->next;
 302             dictFreeEntryKey(ht, he);
 303             dictFreeEntryVal(ht, he);
 304             _dictFree(he);
 305             ht->used--;
 306             he = nextHe;
 307         }
 308     }
 309     /* Free the table and the allocated cache structure */
 310     _dictFree(ht->table);
 311     /* Re-initialize the table */
 312     _dictReset(ht);
 313     return DICT_OK; /* never fails */
 314 }
 315
 316 /* Clear & Release the hash table */
 317 void dictRelease(dict *ht)
 318 {
 319     _dictClear(ht);
 320     _dictFree(ht);
 321 }
 322
 323 dictEntry *dictFind(dict *ht, const void *key)
 324 {
 325     dictEntry *he;
 326     unsigned int h;
 327
 328     if (ht->size == 0) return NULL;
 329     h = dictHashKey(ht, key) & ht->sizemask;
 330     he = ht->table[h];
 331     while(he) {
 332         if (dictCompareHashKeys(ht, key, he->key))
 333             return he;
 334         he = he->next;
 335     }
 336     return NULL;
 337 }
 338
 339 dictIterator *dictGetIterator(dict *ht)
 340 {
 341     dictIterator *iter = _dictAlloc(sizeof(*iter));
 342
 343     iter->ht = ht;
 344     iter->index = -1;
 345     iter->entry = NULL;
 346     iter->nextEntry = NULL;
 347     return iter;
 348 }
 349
 350 dictEntry *dictNext(dictIterator *iter)
 351 {
 352     while (1) {
 353         if (iter->entry == NULL) {
 354             iter->index++;
 355             if (iter->index >=
 356                     (signed)iter->ht->size) break;
 357             iter->entry = iter->ht->table[iter->index];
 358         } else {
 359             iter->entry = iter->nextEntry;
 360         }
 361         if (iter->entry) {
 362             /* We need to save the 'next' here, the iterator user
 363              * may delete the entry we are returning. */
 364             iter->nextEntry = iter->entry->next;
 365             return iter->entry;
 366         }
 367     }
 368     return NULL;
 369 }
 370
 371 void dictReleaseIterator(dictIterator *iter)
 372 {
 373     _dictFree(iter);
 374 }
 375
 376 /* Return a random entry from the hash table. Useful to
 377  * implement randomized algorithms */
 378 dictEntry *dictGetRandomKey(dict *ht)
 379 {
 380     dictEntry *he;
 381     unsigned int h;
 382     int listlen, listele;
 383
 384     if (ht->used == 0) return NULL;
 385     do {
 386         h = random() & ht->sizemask;
 387         he = ht->table[h];
 388     } while(he == NULL);
 389
 390     /* Now we found a non empty bucket, but it is a linked
 391      * list and we need to get a random element from the list.
 392      * The only sane way to do so is to count the element and
 393      * select a random index. */
 394     listlen = 0;
 395     while(he) {
 396         he = he->next;
 397         listlen++;
 398     }
 399     listele = random() % listlen;
 400     he = ht->table[h];
 401     while(listele--) he = he->next;
 402     return he;
 403 }
 404
 405 /* ------------------------- private functions ------------------------------ */
 406
 407 /* Expand the hash table if needed */
 408 static int _dictExpandIfNeeded(dict *ht)
 409 {
 410     /* If the hash table is empty expand it to the intial size,
 411      * if the table is "full" dobule its size. */
 412     if (ht->size == 0)
 413         return dictExpand(ht, DICT_HT_INITIAL_SIZE);
 414     if (ht->used == ht->size)
 415         return dictExpand(ht, ht->size*2);
 416     return DICT_OK;
 417 }
 418
 419 /* Our hash table capability is a power of two */
 420 static unsigned long _dictNextPower(unsigned long size)
 421 {
 422     unsigned long i = DICT_HT_INITIAL_SIZE;
 423
 424     if (size >= LONG_MAX) return LONG_MAX;
 425     while(1) {
 426         if (i >= size)
 427             return i;
 428         i *= 2;
 429     }
 430 }
 431
 432 /* Returns the index of a free slot that can be populated with
 433  * an hash entry for the given 'key'.
 434  * If the key already exists, -1 is returned. */
 435 static int _dictKeyIndex(dict *ht, const void *key)
 436 {
 437     unsigned int h;
 438     dictEntry *he;
 439
 440     /* Expand the hashtable if needed */
 441     if (_dictExpandIfNeeded(ht) == DICT_ERR)
 442         return -1;
 443     /* Compute the key hash value */
 444     h = dictHashKey(ht, key) & ht->sizemask;
 445     /* Search if this slot does not already contain the given key */
 446     he = ht->table[h];
 447     while(he) {
 448         if (dictCompareHashKeys(ht, key, he->key))
 449             return -1;
 450         he = he->next;
 451     }
 452     return h;
 453 }
 454
 455 void dictEmpty(dict *ht) {
 456     _dictClear(ht);
 457 }
 458
 459 #define DICT_STATS_VECTLEN 50
 460 void dictPrintStats(dict *ht) {
 461     unsigned long i, slots = 0, chainlen, maxchainlen = 0;
 462     unsigned long totchainlen = 0;
 463     unsigned long clvector[DICT_STATS_VECTLEN];
 464
 465     if (ht->used == 0) {
 466         printf("No stats available for empty dictionaries\n");
 467         return;
 468     }
 469
 470     for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0;
 471     for (i = 0; i < ht->size; i++) {
 472         dictEntry *he;
 473
 474         if (ht->table[i] == NULL) {
 475             clvector[0]++;
 476             continue;
 477         }
 478         slots++;
 479         /* For each hash entry on this slot... */
 480         chainlen = 0;
 481         he = ht->table[i];
 482         while(he) {
 483             chainlen++;
 484             he = he->next;
 485         }
 486         clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++;
 487         if (chainlen > maxchainlen) maxchainlen = chainlen;
 488         totchainlen += chainlen;
 489     }
 490     printf("Hash table stats:\n");
 491     printf(" table size: %ld\n", ht->size);
 492     printf(" number of elements: %ld\n", ht->used);
 493     printf(" different slots: %ld\n", slots);
 494     printf(" max chain length: %ld\n", maxchainlen);
 495     printf(" avg chain length (counted): %.02f\n", (float)totchainlen/slots);
 496     printf(" avg chain length (computed): %.02f\n", (float)ht->used/slots);
 497     printf(" Chain length distribution:\n");
 498     for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {
 499         if (clvector[i] == 0) continue;
 500         printf("   %s%ld: %ld (%.02f%%)\n",(i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100);
 501     }
 502 }
 503
 504 /* ----------------------- StringCopy Hash Table Type ------------------------*/
 505
 506 static unsigned int _dictStringCopyHTHashFunction(const void *key)
 507 {
 508     return dictGenHashFunction(key, strlen(key));
 509 }
 510
 511 static void *_dictStringCopyHTKeyDup(void *privdata, const void *key)
 512 {
 513     int len = strlen(key);
 514     char *copy = _dictAlloc(len+1);
 515     DICT_NOTUSED(privdata);
 516
 517     memcpy(copy, key, len);
 518     copy[len] = '\0';
 519     return copy;
 520 }
 521
 522 static void *_dictStringKeyValCopyHTValDup(void *privdata, const void *val)
 523 {
 524     int len = strlen(val);
 525     char *copy = _dictAlloc(len+1);
 526     DICT_NOTUSED(privdata);
 527
 528     memcpy(copy, val, len);
 529     copy[len] = '\0';
 530     return copy;
 531 }
 532
 533 static int _dictStringCopyHTKeyCompare(void *privdata, const void *key1,
 534         const void *key2)
 535 {
 536     DICT_NOTUSED(privdata);
 537
 538     return strcmp(key1, key2) == 0;
 539 }
 540
 541 static void _dictStringCopyHTKeyDestructor(void *privdata, void *key)
 542 {
 543     DICT_NOTUSED(privdata);
 544
 545     _dictFree((void*)key); /* ATTENTION: const cast */
 546 }
 547
 548 static void _dictStringKeyValCopyHTValDestructor(void *privdata, void *val)
 549 {
 550     DICT_NOTUSED(privdata);
 551
 552     _dictFree((void*)val); /* ATTENTION: const cast */
 553 }
 554
 555 dictType dictTypeHeapStringCopyKey = {
 556     _dictStringCopyHTHashFunction,        /* hash function */
 557     _dictStringCopyHTKeyDup,              /* key dup */
 558     NULL,                               /* val dup */
 559     _dictStringCopyHTKeyCompare,          /* key compare */
 560     _dictStringCopyHTKeyDestructor,       /* key destructor */
 561     NULL                                /* val destructor */
 562 };
 563
 564 /* This is like StringCopy but does not auto-duplicate the key.
 565  * It's used for intepreter's shared strings. */
 566 dictType dictTypeHeapStrings = {
 567     _dictStringCopyHTHashFunction,        /* hash function */
 568     NULL,                               /* key dup */
 569     NULL,                               /* val dup */
 570     _dictStringCopyHTKeyCompare,          /* key compare */
 571     _dictStringCopyHTKeyDestructor,       /* key destructor */
 572     NULL                                /* val destructor */
 573 };
 574
 575 /* This is like StringCopy but also automatically handle dynamic
 576  * allocated C strings as values. */
 577 dictType dictTypeHeapStringCopyKeyValue = {
 578     _dictStringCopyHTHashFunction,        /* hash function */
 579     _dictStringCopyHTKeyDup,              /* key dup */
 580     _dictStringKeyValCopyHTValDup,        /* val dup */
 581     _dictStringCopyHTKeyCompare,          /* key compare */
 582     _dictStringCopyHTKeyDestructor,       /* key destructor */
 583     _dictStringKeyValCopyHTValDestructor, /* val destructor */
 584 };