1 /* Hash Tables Implementation. 
   3  * This file implements in memory hash tables with insert/del/replace/find/ 
   4  * get-random-element operations. Hash tables will auto resize if needed 
   5  * tables of power of two in size are used, collisions are handled by 
   6  * chaining. See the source code for more information... :) 
   8  * Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com> 
  11  * Redistribution and use in source and binary forms, with or without 
  12  * modification, are permitted provided that the following conditions are met: 
  14  *   * Redistributions of source code must retain the above copyright notice, 
  15  *     this list of conditions and the following disclaimer. 
  16  *   * Redistributions in binary form must reproduce the above copyright 
  17  *     notice, this list of conditions and the following disclaimer in the 
  18  *     documentation and/or other materials provided with the distribution. 
  19  *   * Neither the name of Redis nor the names of its contributors may be used 
  20  *     to endorse or promote products derived from this software without 
  21  *     specific prior written permission. 
  23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  24  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  26  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
  27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  33  * POSSIBILITY OF SUCH DAMAGE. 
  49 /* Using dictEnableResize() / dictDisableResize() we make possible to 
  50  * enable/disable resizing of the hash table as needed. This is very important 
  51  * for Redis, as we use copy-on-write and don't want to move too much memory 
  52  * around when there is a child performing saving operations. 
  54  * Note that even when dict_can_resize is set to 0, not all resizes are 
  55  * prevented: an hash table is still allowed to grow if the ratio between 
  56  * the number of elements and the buckets > dict_force_resize_ratio. */ 
  57 static int dict_can_resize 
= 1; 
  58 static unsigned int dict_force_resize_ratio 
= 5; 
  60 /* -------------------------- private prototypes ---------------------------- */ 
  62 static int _dictExpandIfNeeded(dict 
*ht
); 
  63 static unsigned long _dictNextPower(unsigned long size
); 
  64 static int _dictKeyIndex(dict 
*ht
, const void *key
); 
  65 static int _dictInit(dict 
*ht
, dictType 
*type
, void *privDataPtr
); 
  67 /* -------------------------- hash functions -------------------------------- */ 
  69 /* Thomas Wang's 32 bit Mix Function */ 
  70 unsigned int dictIntHashFunction(unsigned int key
) 
  81 /* Identity hash function for integer keys */ 
  82 unsigned int dictIdentityHashFunction(unsigned int key
) 
  87 /* Generic hash function (a popular one from Bernstein). 
  88  * I tested a few and this was the best. */ 
  89 unsigned int dictGenHashFunction(const unsigned char *buf
, int len
) { 
  90     unsigned int hash 
= 5381; 
  93         hash 
= ((hash 
<< 5) + hash
) + (*buf
++); /* hash * 33 + c */ 
  97 /* ----------------------------- API implementation ------------------------- */ 
  99 /* Reset an hashtable already initialized with ht_init(). 
 100  * NOTE: This function should only called by ht_destroy(). */ 
 101 static void _dictReset(dictht 
*ht
) 
 109 /* Create a new hash table */ 
 110 dict 
*dictCreate(dictType 
*type
, 
 113     dict 
*d 
= zmalloc(sizeof(*d
)); 
 115     _dictInit(d
,type
,privDataPtr
); 
 119 /* Initialize the hash table */ 
 120 int _dictInit(dict 
*d
, dictType 
*type
, 
 123     _dictReset(&d
->ht
[0]); 
 124     _dictReset(&d
->ht
[1]); 
 126     d
->privdata 
= privDataPtr
; 
 132 /* Resize the table to the minimal size that contains all the elements, 
 133  * but with the invariant of a USER/BUCKETS ratio near to <= 1 */ 
 134 int dictResize(dict 
*d
) 
 138     if (!dict_can_resize 
|| dictIsRehashing(d
)) return DICT_ERR
; 
 139     minimal 
= d
->ht
[0].used
; 
 140     if (minimal 
< DICT_HT_INITIAL_SIZE
) 
 141         minimal 
= DICT_HT_INITIAL_SIZE
; 
 142     return dictExpand(d
, minimal
); 
 145 /* Expand or create the hashtable */ 
 146 int dictExpand(dict 
*d
, unsigned long size
) 
 148     dictht n
; /* the new hashtable */ 
 149     unsigned long realsize 
= _dictNextPower(size
); 
 151     /* the size is invalid if it is smaller than the number of 
 152      * elements already inside the hashtable */ 
 153     if (dictIsRehashing(d
) || d
->ht
[0].used 
> size
) 
 156     /* Allocate the new hashtable and initialize all pointers to NULL */ 
 158     n
.sizemask 
= realsize
-1; 
 159     n
.table 
= zcalloc(realsize
*sizeof(dictEntry
*)); 
 162     /* Is this the first initialization? If so it's not really a rehashing 
 163      * we just set the first hash table so that it can accept keys. */ 
 164     if (d
->ht
[0].table 
== NULL
) { 
 169     /* Prepare a second hash table for incremental rehashing */ 
 175 /* Performs N steps of incremental rehashing. Returns 1 if there are still 
 176  * keys to move from the old to the new hash table, otherwise 0 is returned. 
 177  * Note that a rehashing step consists in moving a bucket (that may have more 
 178  * thank one key as we use chaining) from the old to the new hash table. */ 
 179 int dictRehash(dict 
*d
, int n
) { 
 180     if (!dictIsRehashing(d
)) return 0; 
 183         dictEntry 
*de
, *nextde
; 
 185         /* Check if we already rehashed the whole table... */ 
 186         if (d
->ht
[0].used 
== 0) { 
 187             zfree(d
->ht
[0].table
); 
 189             _dictReset(&d
->ht
[1]); 
 194         /* Note that rehashidx can't overflow as we are sure there are more 
 195          * elements because ht[0].used != 0 */ 
 196         while(d
->ht
[0].table
[d
->rehashidx
] == NULL
) d
->rehashidx
++; 
 197         de 
= d
->ht
[0].table
[d
->rehashidx
]; 
 198         /* Move all the keys in this bucket from the old to the new hash HT */ 
 203             /* Get the index in the new hash table */ 
 204             h 
= dictHashKey(d
, de
->key
) & d
->ht
[1].sizemask
; 
 205             de
->next 
= d
->ht
[1].table
[h
]; 
 206             d
->ht
[1].table
[h
] = de
; 
 211         d
->ht
[0].table
[d
->rehashidx
] = NULL
; 
 217 long long timeInMilliseconds(void) { 
 220     gettimeofday(&tv
,NULL
); 
 221     return (((long long)tv
.tv_sec
)*1000)+(tv
.tv_usec
/1000); 
 224 /* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */ 
 225 int dictRehashMilliseconds(dict 
*d
, int ms
) { 
 226     long long start 
= timeInMilliseconds(); 
 229     while(dictRehash(d
,100)) { 
 231         if (timeInMilliseconds()-start 
> ms
) break; 
 236 /* This function performs just a step of rehashing, and only if there are 
 237  * not iterators bound to our hash table. When we have iterators in the middle 
 238  * of a rehashing we can't mess with the two hash tables otherwise some element 
 239  * can be missed or duplicated. 
 241  * This function is called by common lookup or update operations in the 
 242  * dictionary so that the hash table automatically migrates from H1 to H2 
 243  * while it is actively used. */ 
 244 static void _dictRehashStep(dict 
*d
) { 
 245     if (d
->iterators 
== 0) dictRehash(d
,1); 
 248 /* Add an element to the target hash table */ 
 249 int dictAdd(dict 
*d
, void *key
, void *val
) 
 255     if (dictIsRehashing(d
)) _dictRehashStep(d
); 
 257     /* Get the index of the new element, or -1 if 
 258      * the element already exists. */ 
 259     if ((index 
= _dictKeyIndex(d
, key
)) == -1) 
 262     /* Allocates the memory and stores key */ 
 263     ht 
= dictIsRehashing(d
) ? &d
->ht
[1] : &d
->ht
[0]; 
 264     entry 
= zmalloc(sizeof(*entry
)); 
 265     entry
->next 
= ht
->table
[index
]; 
 266     ht
->table
[index
] = entry
; 
 269     /* Set the hash entry fields. */ 
 270     dictSetHashKey(d
, entry
, key
); 
 271     dictSetHashVal(d
, entry
, val
); 
 275 /* Add an element, discarding the old if the key already exists. 
 276  * Return 1 if the key was added from scratch, 0 if there was already an 
 277  * element with such key and dictReplace() just performed a value update 
 279 int dictReplace(dict 
*d
, void *key
, void *val
) 
 281     dictEntry 
*entry
, auxentry
; 
 283     /* Try to add the element. If the key 
 284      * does not exists dictAdd will suceed. */ 
 285     if (dictAdd(d
, key
, val
) == DICT_OK
) 
 287     /* It already exists, get the entry */ 
 288     entry 
= dictFind(d
, key
); 
 289     /* Free the old value and set the new one */ 
 290     /* Set the new value and free the old one. Note that it is important 
 291      * to do that in this order, as the value may just be exactly the same 
 292      * as the previous one. In this context, think to reference counting, 
 293      * you want to increment (set), and then decrement (free), and not the 
 296     dictSetHashVal(d
, entry
, val
); 
 297     dictFreeEntryVal(d
, &auxentry
); 
 301 /* Search and remove an element */ 
 302 static int dictGenericDelete(dict 
*d
, const void *key
, int nofree
) 
 305     dictEntry 
*he
, *prevHe
; 
 308     if (d
->ht
[0].size 
== 0) return DICT_ERR
; /* d->ht[0].table is NULL */ 
 309     if (dictIsRehashing(d
)) _dictRehashStep(d
); 
 310     h 
= dictHashKey(d
, key
); 
 312     for (table 
= 0; table 
<= 1; table
++) { 
 313         idx 
= h 
& d
->ht
[table
].sizemask
; 
 314         he 
= d
->ht
[table
].table
[idx
]; 
 317             if (dictCompareHashKeys(d
, key
, he
->key
)) { 
 318                 /* Unlink the element from the list */ 
 320                     prevHe
->next 
= he
->next
; 
 322                     d
->ht
[table
].table
[idx
] = he
->next
; 
 324                     dictFreeEntryKey(d
, he
); 
 325                     dictFreeEntryVal(d
, he
); 
 334         if (!dictIsRehashing(d
)) break; 
 336     return DICT_ERR
; /* not found */ 
 339 int dictDelete(dict 
*ht
, const void *key
) { 
 340     return dictGenericDelete(ht
,key
,0); 
 343 int dictDeleteNoFree(dict 
*ht
, const void *key
) { 
 344     return dictGenericDelete(ht
,key
,1); 
 347 /* Destroy an entire dictionary */ 
 348 int _dictClear(dict 
*d
, dictht 
*ht
) 
 352     /* Free all the elements */ 
 353     for (i 
= 0; i 
< ht
->size 
&& ht
->used 
> 0; i
++) { 
 354         dictEntry 
*he
, *nextHe
; 
 356         if ((he 
= ht
->table
[i
]) == NULL
) continue; 
 359             dictFreeEntryKey(d
, he
); 
 360             dictFreeEntryVal(d
, he
); 
 366     /* Free the table and the allocated cache structure */ 
 368     /* Re-initialize the table */ 
 370     return DICT_OK
; /* never fails */ 
 373 /* Clear & Release the hash table */ 
 374 void dictRelease(dict 
*d
) 
 376     _dictClear(d
,&d
->ht
[0]); 
 377     _dictClear(d
,&d
->ht
[1]); 
 381 dictEntry 
*dictFind(dict 
*d
, const void *key
) 
 384     unsigned int h
, idx
, table
; 
 386     if (d
->ht
[0].size 
== 0) return NULL
; /* We don't have a table at all */ 
 387     if (dictIsRehashing(d
)) _dictRehashStep(d
); 
 388     h 
= dictHashKey(d
, key
); 
 389     for (table 
= 0; table 
<= 1; table
++) { 
 390         idx 
= h 
& d
->ht
[table
].sizemask
; 
 391         he 
= d
->ht
[table
].table
[idx
]; 
 393             if (dictCompareHashKeys(d
, key
, he
->key
)) 
 397         if (!dictIsRehashing(d
)) return NULL
; 
 402 void *dictFetchValue(dict 
*d
, const void *key
) { 
 405     he 
= dictFind(d
,key
); 
 406     return he 
? dictGetEntryVal(he
) : NULL
; 
 409 dictIterator 
*dictGetIterator(dict 
*d
) 
 411     dictIterator 
*iter 
= zmalloc(sizeof(*iter
)); 
 417     iter
->nextEntry 
= NULL
; 
 421 dictEntry 
*dictNext(dictIterator 
*iter
) 
 424         if (iter
->entry 
== NULL
) { 
 425             dictht 
*ht 
= &iter
->d
->ht
[iter
->table
]; 
 426             if (iter
->index 
== -1 && iter
->table 
== 0) iter
->d
->iterators
++; 
 428             if (iter
->index 
>= (signed) ht
->size
) { 
 429                 if (dictIsRehashing(iter
->d
) && iter
->table 
== 0) { 
 432                     ht 
= &iter
->d
->ht
[1]; 
 437             iter
->entry 
= ht
->table
[iter
->index
]; 
 439             iter
->entry 
= iter
->nextEntry
; 
 442             /* We need to save the 'next' here, the iterator user 
 443              * may delete the entry we are returning. */ 
 444             iter
->nextEntry 
= iter
->entry
->next
; 
 451 void dictReleaseIterator(dictIterator 
*iter
) 
 453     if (!(iter
->index 
== -1 && iter
->table 
== 0)) iter
->d
->iterators
--; 
 457 /* Return a random entry from the hash table. Useful to 
 458  * implement randomized algorithms */ 
 459 dictEntry 
*dictGetRandomKey(dict 
*d
) 
 461     dictEntry 
*he
, *orighe
; 
 463     int listlen
, listele
; 
 465     if (dictSize(d
) == 0) return NULL
; 
 466     if (dictIsRehashing(d
)) _dictRehashStep(d
); 
 467     if (dictIsRehashing(d
)) { 
 469             h 
= random() % (d
->ht
[0].size
+d
->ht
[1].size
); 
 470             he 
= (h 
>= d
->ht
[0].size
) ? d
->ht
[1].table
[h 
- d
->ht
[0].size
] : 
 475             h 
= random() & d
->ht
[0].sizemask
; 
 476             he 
= d
->ht
[0].table
[h
]; 
 480     /* Now we found a non empty bucket, but it is a linked 
 481      * list and we need to get a random element from the list. 
 482      * The only sane way to do so is counting the elements and 
 483      * select a random index. */ 
 490     listele 
= random() % listlen
; 
 492     while(listele
--) he 
= he
->next
; 
 496 /* ------------------------- private functions ------------------------------ */ 
 498 /* Expand the hash table if needed */ 
 499 static int _dictExpandIfNeeded(dict 
*d
) 
 501     /* Incremental rehashing already in progress. Return. */ 
 502     if (dictIsRehashing(d
)) return DICT_OK
; 
 504     /* If the hash table is empty expand it to the intial size. */ 
 505     if (d
->ht
[0].size 
== 0) return dictExpand(d
, DICT_HT_INITIAL_SIZE
); 
 507     /* If we reached the 1:1 ratio, and we are allowed to resize the hash 
 508      * table (global setting) or we should avoid it but the ratio between 
 509      * elements/buckets is over the "safe" threshold, we resize doubling 
 510      * the number of buckets. */ 
 511     if (d
->ht
[0].used 
>= d
->ht
[0].size 
&& 
 513          d
->ht
[0].used
/d
->ht
[0].size 
> dict_force_resize_ratio
)) 
 515         return dictExpand(d
, ((d
->ht
[0].size 
> d
->ht
[0].used
) ? 
 516                                     d
->ht
[0].size 
: d
->ht
[0].used
)*2); 
 521 /* Our hash table capability is a power of two */ 
 522 static unsigned long _dictNextPower(unsigned long size
) 
 524     unsigned long i 
= DICT_HT_INITIAL_SIZE
; 
 526     if (size 
>= LONG_MAX
) return LONG_MAX
; 
 534 /* Returns the index of a free slot that can be populated with 
 535  * an hash entry for the given 'key'. 
 536  * If the key already exists, -1 is returned. 
 538  * Note that if we are in the process of rehashing the hash table, the 
 539  * index is always returned in the context of the second (new) hash table. */ 
 540 static int _dictKeyIndex(dict 
*d
, const void *key
) 
 542     unsigned int h
, idx
, table
; 
 545     /* Expand the hashtable if needed */ 
 546     if (_dictExpandIfNeeded(d
) == DICT_ERR
) 
 548     /* Compute the key hash value */ 
 549     h 
= dictHashKey(d
, key
); 
 550     for (table 
= 0; table 
<= 1; table
++) { 
 551         idx 
= h 
& d
->ht
[table
].sizemask
; 
 552         /* Search if this slot does not already contain the given key */ 
 553         he 
= d
->ht
[table
].table
[idx
]; 
 555             if (dictCompareHashKeys(d
, key
, he
->key
)) 
 559         if (!dictIsRehashing(d
)) break; 
 564 void dictEmpty(dict 
*d
) { 
 565     _dictClear(d
,&d
->ht
[0]); 
 566     _dictClear(d
,&d
->ht
[1]); 
 571 #define DICT_STATS_VECTLEN 50 
 572 static void _dictPrintStatsHt(dictht 
*ht
) { 
 573     unsigned long i
, slots 
= 0, chainlen
, maxchainlen 
= 0; 
 574     unsigned long totchainlen 
= 0; 
 575     unsigned long clvector
[DICT_STATS_VECTLEN
]; 
 578         printf("No stats available for empty dictionaries\n"); 
 582     for (i 
= 0; i 
< DICT_STATS_VECTLEN
; i
++) clvector
[i
] = 0; 
 583     for (i 
= 0; i 
< ht
->size
; i
++) { 
 586         if (ht
->table
[i
] == NULL
) { 
 591         /* For each hash entry on this slot... */ 
 598         clvector
[(chainlen 
< DICT_STATS_VECTLEN
) ? chainlen 
: (DICT_STATS_VECTLEN
-1)]++; 
 599         if (chainlen 
> maxchainlen
) maxchainlen 
= chainlen
; 
 600         totchainlen 
+= chainlen
; 
 602     printf("Hash table stats:\n"); 
 603     printf(" table size: %ld\n", ht
->size
); 
 604     printf(" number of elements: %ld\n", ht
->used
); 
 605     printf(" different slots: %ld\n", slots
); 
 606     printf(" max chain length: %ld\n", maxchainlen
); 
 607     printf(" avg chain length (counted): %.02f\n", (float)totchainlen
/slots
); 
 608     printf(" avg chain length (computed): %.02f\n", (float)ht
->used
/slots
); 
 609     printf(" Chain length distribution:\n"); 
 610     for (i 
= 0; i 
< DICT_STATS_VECTLEN
-1; i
++) { 
 611         if (clvector
[i
] == 0) continue; 
 612         printf("   %s%ld: %ld (%.02f%%)\n",(i 
== DICT_STATS_VECTLEN
-1)?">= ":"", i
, clvector
[i
], ((float)clvector
[i
]/ht
->size
)*100); 
 616 void dictPrintStats(dict 
*d
) { 
 617     _dictPrintStatsHt(&d
->ht
[0]); 
 618     if (dictIsRehashing(d
)) { 
 619         printf("-- Rehashing into ht[1]:\n"); 
 620         _dictPrintStatsHt(&d
->ht
[1]); 
 624 void dictEnableResize(void) { 
 628 void dictDisableResize(void) { 
 634 /* The following are just example hash table types implementations. 
 635  * Not useful for Redis so they are commented out. 
 638 /* ----------------------- StringCopy Hash Table Type ------------------------*/ 
 640 static unsigned int _dictStringCopyHTHashFunction(const void *key
) 
 642     return dictGenHashFunction(key
, strlen(key
)); 
 645 static void *_dictStringDup(void *privdata
, const void *key
) 
 647     int len 
= strlen(key
); 
 648     char *copy 
= zmalloc(len
+1); 
 649     DICT_NOTUSED(privdata
); 
 651     memcpy(copy
, key
, len
); 
 656 static int _dictStringCopyHTKeyCompare(void *privdata
, const void *key1
, 
 659     DICT_NOTUSED(privdata
); 
 661     return strcmp(key1
, key2
) == 0; 
 664 static void _dictStringDestructor(void *privdata
, void *key
) 
 666     DICT_NOTUSED(privdata
); 
 671 dictType dictTypeHeapStringCopyKey 
= { 
 672     _dictStringCopyHTHashFunction
, /* hash function */ 
 673     _dictStringDup
,                /* key dup */ 
 675     _dictStringCopyHTKeyCompare
,   /* key compare */ 
 676     _dictStringDestructor
,         /* key destructor */ 
 677     NULL                           
/* val destructor */ 
 680 /* This is like StringCopy but does not auto-duplicate the key. 
 681  * It's used for intepreter's shared strings. */ 
 682 dictType dictTypeHeapStrings 
= { 
 683     _dictStringCopyHTHashFunction
, /* hash function */ 
 686     _dictStringCopyHTKeyCompare
,   /* key compare */ 
 687     _dictStringDestructor
,         /* key destructor */ 
 688     NULL                           
/* val destructor */ 
 691 /* This is like StringCopy but also automatically handle dynamic 
 692  * allocated C strings as values. */ 
 693 dictType dictTypeHeapStringCopyKeyValue 
= { 
 694     _dictStringCopyHTHashFunction
, /* hash function */ 
 695     _dictStringDup
,                /* key dup */ 
 696     _dictStringDup
,                /* val dup */ 
 697     _dictStringCopyHTKeyCompare
,   /* key compare */ 
 698     _dictStringDestructor
,         /* key destructor */ 
 699     _dictStringDestructor
,         /* val destructor */