]> git.saurik.com Git - redis.git/blame_incremental - src/dict.c
Rename zset range functions
[redis.git] / src / dict.c
... / ...
CommitLineData
1/* Hash Tables Implementation.
2 *
3 * This file implements in memory hash tables with insert/del/replace/find/
4 * get-random-element operations. Hash tables will auto resize if needed
5 * tables of power of two in size are used, collisions are handled by
6 * chaining. See the source code for more information... :)
7 *
8 * Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com>
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions are met:
13 *
14 * * Redistributions of source code must retain the above copyright notice,
15 * this list of conditions and the following disclaimer.
16 * * Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * * Neither the name of Redis nor the names of its contributors may be used
20 * to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36#include "fmacros.h"
37
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
41#include <stdarg.h>
42#include <assert.h>
43#include <limits.h>
44#include <sys/time.h>
45#include <ctype.h>
46
47#include "dict.h"
48#include "zmalloc.h"
49
50/* Using dictEnableResize() / dictDisableResize() we make possible to
51 * enable/disable resizing of the hash table as needed. This is very important
52 * for Redis, as we use copy-on-write and don't want to move too much memory
53 * around when there is a child performing saving operations.
54 *
55 * Note that even when dict_can_resize is set to 0, not all resizes are
56 * prevented: an hash table is still allowed to grow if the ratio between
57 * the number of elements and the buckets > dict_force_resize_ratio. */
58static int dict_can_resize = 1;
59static unsigned int dict_force_resize_ratio = 5;
60
61/* -------------------------- private prototypes ---------------------------- */
62
63static int _dictExpandIfNeeded(dict *ht);
64static unsigned long _dictNextPower(unsigned long size);
65static int _dictKeyIndex(dict *ht, const void *key);
66static int _dictInit(dict *ht, dictType *type, void *privDataPtr);
67
68/* -------------------------- hash functions -------------------------------- */
69
70/* Thomas Wang's 32 bit Mix Function */
71unsigned int dictIntHashFunction(unsigned int key)
72{
73 key += ~(key << 15);
74 key ^= (key >> 10);
75 key += (key << 3);
76 key ^= (key >> 6);
77 key += ~(key << 11);
78 key ^= (key >> 16);
79 return key;
80}
81
82/* Identity hash function for integer keys */
83unsigned int dictIdentityHashFunction(unsigned int key)
84{
85 return key;
86}
87
88/* Generic hash function (a popular one from Bernstein).
89 * I tested a few and this was the best. */
90unsigned int dictGenHashFunction(const unsigned char *buf, int len) {
91 unsigned int hash = 5381;
92
93 while (len--)
94 hash = ((hash << 5) + hash) + (*buf++); /* hash * 33 + c */
95 return hash;
96}
97
98/* And a case insensitive version */
99unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
100 unsigned int hash = 5381;
101
102 while (len--)
103 hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */
104 return hash;
105}
106
107/* ----------------------------- API implementation ------------------------- */
108
109/* Reset an hashtable already initialized with ht_init().
110 * NOTE: This function should only called by ht_destroy(). */
111static void _dictReset(dictht *ht)
112{
113 ht->table = NULL;
114 ht->size = 0;
115 ht->sizemask = 0;
116 ht->used = 0;
117}
118
119/* Create a new hash table */
120dict *dictCreate(dictType *type,
121 void *privDataPtr)
122{
123 dict *d = zmalloc(sizeof(*d));
124
125 _dictInit(d,type,privDataPtr);
126 return d;
127}
128
129/* Initialize the hash table */
130int _dictInit(dict *d, dictType *type,
131 void *privDataPtr)
132{
133 _dictReset(&d->ht[0]);
134 _dictReset(&d->ht[1]);
135 d->type = type;
136 d->privdata = privDataPtr;
137 d->rehashidx = -1;
138 d->iterators = 0;
139 return DICT_OK;
140}
141
142/* Resize the table to the minimal size that contains all the elements,
143 * but with the invariant of a USER/BUCKETS ratio near to <= 1 */
144int dictResize(dict *d)
145{
146 int minimal;
147
148 if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
149 minimal = d->ht[0].used;
150 if (minimal < DICT_HT_INITIAL_SIZE)
151 minimal = DICT_HT_INITIAL_SIZE;
152 return dictExpand(d, minimal);
153}
154
155/* Expand or create the hashtable */
156int dictExpand(dict *d, unsigned long size)
157{
158 dictht n; /* the new hashtable */
159 unsigned long realsize = _dictNextPower(size);
160
161 /* the size is invalid if it is smaller than the number of
162 * elements already inside the hashtable */
163 if (dictIsRehashing(d) || d->ht[0].used > size)
164 return DICT_ERR;
165
166 /* Allocate the new hashtable and initialize all pointers to NULL */
167 n.size = realsize;
168 n.sizemask = realsize-1;
169 n.table = zcalloc(realsize*sizeof(dictEntry*));
170 n.used = 0;
171
172 /* Is this the first initialization? If so it's not really a rehashing
173 * we just set the first hash table so that it can accept keys. */
174 if (d->ht[0].table == NULL) {
175 d->ht[0] = n;
176 return DICT_OK;
177 }
178
179 /* Prepare a second hash table for incremental rehashing */
180 d->ht[1] = n;
181 d->rehashidx = 0;
182 return DICT_OK;
183}
184
185/* Performs N steps of incremental rehashing. Returns 1 if there are still
186 * keys to move from the old to the new hash table, otherwise 0 is returned.
187 * Note that a rehashing step consists in moving a bucket (that may have more
188 * thank one key as we use chaining) from the old to the new hash table. */
189int dictRehash(dict *d, int n) {
190 if (!dictIsRehashing(d)) return 0;
191
192 while(n--) {
193 dictEntry *de, *nextde;
194
195 /* Check if we already rehashed the whole table... */
196 if (d->ht[0].used == 0) {
197 zfree(d->ht[0].table);
198 d->ht[0] = d->ht[1];
199 _dictReset(&d->ht[1]);
200 d->rehashidx = -1;
201 return 0;
202 }
203
204 /* Note that rehashidx can't overflow as we are sure there are more
205 * elements because ht[0].used != 0 */
206 assert(d->ht[0].size > (unsigned)d->rehashidx);
207 while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
208 de = d->ht[0].table[d->rehashidx];
209 /* Move all the keys in this bucket from the old to the new hash HT */
210 while(de) {
211 unsigned int h;
212
213 nextde = de->next;
214 /* Get the index in the new hash table */
215 h = dictHashKey(d, de->key) & d->ht[1].sizemask;
216 de->next = d->ht[1].table[h];
217 d->ht[1].table[h] = de;
218 d->ht[0].used--;
219 d->ht[1].used++;
220 de = nextde;
221 }
222 d->ht[0].table[d->rehashidx] = NULL;
223 d->rehashidx++;
224 }
225 return 1;
226}
227
228long long timeInMilliseconds(void) {
229 struct timeval tv;
230
231 gettimeofday(&tv,NULL);
232 return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);
233}
234
235/* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */
236int dictRehashMilliseconds(dict *d, int ms) {
237 long long start = timeInMilliseconds();
238 int rehashes = 0;
239
240 while(dictRehash(d,100)) {
241 rehashes += 100;
242 if (timeInMilliseconds()-start > ms) break;
243 }
244 return rehashes;
245}
246
247/* This function performs just a step of rehashing, and only if there are
248 * not iterators bound to our hash table. When we have iterators in the middle
249 * of a rehashing we can't mess with the two hash tables otherwise some element
250 * can be missed or duplicated.
251 *
252 * This function is called by common lookup or update operations in the
253 * dictionary so that the hash table automatically migrates from H1 to H2
254 * while it is actively used. */
255static void _dictRehashStep(dict *d) {
256 if (d->iterators == 0) dictRehash(d,1);
257}
258
259/* Add an element to the target hash table */
260int dictAdd(dict *d, void *key, void *val)
261{
262 int index;
263 dictEntry *entry;
264 dictht *ht;
265
266 if (dictIsRehashing(d)) _dictRehashStep(d);
267
268 /* Get the index of the new element, or -1 if
269 * the element already exists. */
270 if ((index = _dictKeyIndex(d, key)) == -1)
271 return DICT_ERR;
272
273 /* Allocates the memory and stores key */
274 ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
275 entry = zmalloc(sizeof(*entry));
276 entry->next = ht->table[index];
277 ht->table[index] = entry;
278 ht->used++;
279
280 /* Set the hash entry fields. */
281 dictSetHashKey(d, entry, key);
282 dictSetHashVal(d, entry, val);
283 return DICT_OK;
284}
285
286/* Add an element, discarding the old if the key already exists.
287 * Return 1 if the key was added from scratch, 0 if there was already an
288 * element with such key and dictReplace() just performed a value update
289 * operation. */
290int dictReplace(dict *d, void *key, void *val)
291{
292 dictEntry *entry, auxentry;
293
294 /* Try to add the element. If the key
295 * does not exists dictAdd will suceed. */
296 if (dictAdd(d, key, val) == DICT_OK)
297 return 1;
298 /* It already exists, get the entry */
299 entry = dictFind(d, key);
300 /* Free the old value and set the new one */
301 /* Set the new value and free the old one. Note that it is important
302 * to do that in this order, as the value may just be exactly the same
303 * as the previous one. In this context, think to reference counting,
304 * you want to increment (set), and then decrement (free), and not the
305 * reverse. */
306 auxentry = *entry;
307 dictSetHashVal(d, entry, val);
308 dictFreeEntryVal(d, &auxentry);
309 return 0;
310}
311
312/* Search and remove an element */
313static int dictGenericDelete(dict *d, const void *key, int nofree)
314{
315 unsigned int h, idx;
316 dictEntry *he, *prevHe;
317 int table;
318
319 if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */
320 if (dictIsRehashing(d)) _dictRehashStep(d);
321 h = dictHashKey(d, key);
322
323 for (table = 0; table <= 1; table++) {
324 idx = h & d->ht[table].sizemask;
325 he = d->ht[table].table[idx];
326 prevHe = NULL;
327 while(he) {
328 if (dictCompareHashKeys(d, key, he->key)) {
329 /* Unlink the element from the list */
330 if (prevHe)
331 prevHe->next = he->next;
332 else
333 d->ht[table].table[idx] = he->next;
334 if (!nofree) {
335 dictFreeEntryKey(d, he);
336 dictFreeEntryVal(d, he);
337 }
338 zfree(he);
339 d->ht[table].used--;
340 return DICT_OK;
341 }
342 prevHe = he;
343 he = he->next;
344 }
345 if (!dictIsRehashing(d)) break;
346 }
347 return DICT_ERR; /* not found */
348}
349
350int dictDelete(dict *ht, const void *key) {
351 return dictGenericDelete(ht,key,0);
352}
353
354int dictDeleteNoFree(dict *ht, const void *key) {
355 return dictGenericDelete(ht,key,1);
356}
357
358/* Destroy an entire dictionary */
359int _dictClear(dict *d, dictht *ht)
360{
361 unsigned long i;
362
363 /* Free all the elements */
364 for (i = 0; i < ht->size && ht->used > 0; i++) {
365 dictEntry *he, *nextHe;
366
367 if ((he = ht->table[i]) == NULL) continue;
368 while(he) {
369 nextHe = he->next;
370 dictFreeEntryKey(d, he);
371 dictFreeEntryVal(d, he);
372 zfree(he);
373 ht->used--;
374 he = nextHe;
375 }
376 }
377 /* Free the table and the allocated cache structure */
378 zfree(ht->table);
379 /* Re-initialize the table */
380 _dictReset(ht);
381 return DICT_OK; /* never fails */
382}
383
384/* Clear & Release the hash table */
385void dictRelease(dict *d)
386{
387 _dictClear(d,&d->ht[0]);
388 _dictClear(d,&d->ht[1]);
389 zfree(d);
390}
391
392dictEntry *dictFind(dict *d, const void *key)
393{
394 dictEntry *he;
395 unsigned int h, idx, table;
396
397 if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */
398 if (dictIsRehashing(d)) _dictRehashStep(d);
399 h = dictHashKey(d, key);
400 for (table = 0; table <= 1; table++) {
401 idx = h & d->ht[table].sizemask;
402 he = d->ht[table].table[idx];
403 while(he) {
404 if (dictCompareHashKeys(d, key, he->key))
405 return he;
406 he = he->next;
407 }
408 if (!dictIsRehashing(d)) return NULL;
409 }
410 return NULL;
411}
412
413void *dictFetchValue(dict *d, const void *key) {
414 dictEntry *he;
415
416 he = dictFind(d,key);
417 return he ? dictGetEntryVal(he) : NULL;
418}
419
420dictIterator *dictGetIterator(dict *d)
421{
422 dictIterator *iter = zmalloc(sizeof(*iter));
423
424 iter->d = d;
425 iter->table = 0;
426 iter->index = -1;
427 iter->entry = NULL;
428 iter->nextEntry = NULL;
429 return iter;
430}
431
432dictEntry *dictNext(dictIterator *iter)
433{
434 while (1) {
435 if (iter->entry == NULL) {
436 dictht *ht = &iter->d->ht[iter->table];
437 if (iter->index == -1 && iter->table == 0) iter->d->iterators++;
438 iter->index++;
439 if (iter->index >= (signed) ht->size) {
440 if (dictIsRehashing(iter->d) && iter->table == 0) {
441 iter->table++;
442 iter->index = 0;
443 ht = &iter->d->ht[1];
444 } else {
445 break;
446 }
447 }
448 iter->entry = ht->table[iter->index];
449 } else {
450 iter->entry = iter->nextEntry;
451 }
452 if (iter->entry) {
453 /* We need to save the 'next' here, the iterator user
454 * may delete the entry we are returning. */
455 iter->nextEntry = iter->entry->next;
456 return iter->entry;
457 }
458 }
459 return NULL;
460}
461
462void dictReleaseIterator(dictIterator *iter)
463{
464 if (!(iter->index == -1 && iter->table == 0)) iter->d->iterators--;
465 zfree(iter);
466}
467
468/* Return a random entry from the hash table. Useful to
469 * implement randomized algorithms */
470dictEntry *dictGetRandomKey(dict *d)
471{
472 dictEntry *he, *orighe;
473 unsigned int h;
474 int listlen, listele;
475
476 if (dictSize(d) == 0) return NULL;
477 if (dictIsRehashing(d)) _dictRehashStep(d);
478 if (dictIsRehashing(d)) {
479 do {
480 h = random() % (d->ht[0].size+d->ht[1].size);
481 he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
482 d->ht[0].table[h];
483 } while(he == NULL);
484 } else {
485 do {
486 h = random() & d->ht[0].sizemask;
487 he = d->ht[0].table[h];
488 } while(he == NULL);
489 }
490
491 /* Now we found a non empty bucket, but it is a linked
492 * list and we need to get a random element from the list.
493 * The only sane way to do so is counting the elements and
494 * select a random index. */
495 listlen = 0;
496 orighe = he;
497 while(he) {
498 he = he->next;
499 listlen++;
500 }
501 listele = random() % listlen;
502 he = orighe;
503 while(listele--) he = he->next;
504 return he;
505}
506
507/* ------------------------- private functions ------------------------------ */
508
509/* Expand the hash table if needed */
510static int _dictExpandIfNeeded(dict *d)
511{
512 /* Incremental rehashing already in progress. Return. */
513 if (dictIsRehashing(d)) return DICT_OK;
514
515 /* If the hash table is empty expand it to the intial size. */
516 if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);
517
518 /* If we reached the 1:1 ratio, and we are allowed to resize the hash
519 * table (global setting) or we should avoid it but the ratio between
520 * elements/buckets is over the "safe" threshold, we resize doubling
521 * the number of buckets. */
522 if (d->ht[0].used >= d->ht[0].size &&
523 (dict_can_resize ||
524 d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
525 {
526 return dictExpand(d, ((d->ht[0].size > d->ht[0].used) ?
527 d->ht[0].size : d->ht[0].used)*2);
528 }
529 return DICT_OK;
530}
531
532/* Our hash table capability is a power of two */
533static unsigned long _dictNextPower(unsigned long size)
534{
535 unsigned long i = DICT_HT_INITIAL_SIZE;
536
537 if (size >= LONG_MAX) return LONG_MAX;
538 while(1) {
539 if (i >= size)
540 return i;
541 i *= 2;
542 }
543}
544
545/* Returns the index of a free slot that can be populated with
546 * an hash entry for the given 'key'.
547 * If the key already exists, -1 is returned.
548 *
549 * Note that if we are in the process of rehashing the hash table, the
550 * index is always returned in the context of the second (new) hash table. */
551static int _dictKeyIndex(dict *d, const void *key)
552{
553 unsigned int h, idx, table;
554 dictEntry *he;
555
556 /* Expand the hashtable if needed */
557 if (_dictExpandIfNeeded(d) == DICT_ERR)
558 return -1;
559 /* Compute the key hash value */
560 h = dictHashKey(d, key);
561 for (table = 0; table <= 1; table++) {
562 idx = h & d->ht[table].sizemask;
563 /* Search if this slot does not already contain the given key */
564 he = d->ht[table].table[idx];
565 while(he) {
566 if (dictCompareHashKeys(d, key, he->key))
567 return -1;
568 he = he->next;
569 }
570 if (!dictIsRehashing(d)) break;
571 }
572 return idx;
573}
574
575void dictEmpty(dict *d) {
576 _dictClear(d,&d->ht[0]);
577 _dictClear(d,&d->ht[1]);
578 d->rehashidx = -1;
579 d->iterators = 0;
580}
581
582#define DICT_STATS_VECTLEN 50
583static void _dictPrintStatsHt(dictht *ht) {
584 unsigned long i, slots = 0, chainlen, maxchainlen = 0;
585 unsigned long totchainlen = 0;
586 unsigned long clvector[DICT_STATS_VECTLEN];
587
588 if (ht->used == 0) {
589 printf("No stats available for empty dictionaries\n");
590 return;
591 }
592
593 for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0;
594 for (i = 0; i < ht->size; i++) {
595 dictEntry *he;
596
597 if (ht->table[i] == NULL) {
598 clvector[0]++;
599 continue;
600 }
601 slots++;
602 /* For each hash entry on this slot... */
603 chainlen = 0;
604 he = ht->table[i];
605 while(he) {
606 chainlen++;
607 he = he->next;
608 }
609 clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++;
610 if (chainlen > maxchainlen) maxchainlen = chainlen;
611 totchainlen += chainlen;
612 }
613 printf("Hash table stats:\n");
614 printf(" table size: %ld\n", ht->size);
615 printf(" number of elements: %ld\n", ht->used);
616 printf(" different slots: %ld\n", slots);
617 printf(" max chain length: %ld\n", maxchainlen);
618 printf(" avg chain length (counted): %.02f\n", (float)totchainlen/slots);
619 printf(" avg chain length (computed): %.02f\n", (float)ht->used/slots);
620 printf(" Chain length distribution:\n");
621 for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {
622 if (clvector[i] == 0) continue;
623 printf(" %s%ld: %ld (%.02f%%)\n",(i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100);
624 }
625}
626
627void dictPrintStats(dict *d) {
628 _dictPrintStatsHt(&d->ht[0]);
629 if (dictIsRehashing(d)) {
630 printf("-- Rehashing into ht[1]:\n");
631 _dictPrintStatsHt(&d->ht[1]);
632 }
633}
634
635void dictEnableResize(void) {
636 dict_can_resize = 1;
637}
638
639void dictDisableResize(void) {
640 dict_can_resize = 0;
641}
642
643#if 0
644
645/* The following are just example hash table types implementations.
646 * Not useful for Redis so they are commented out.
647 */
648
649/* ----------------------- StringCopy Hash Table Type ------------------------*/
650
651static unsigned int _dictStringCopyHTHashFunction(const void *key)
652{
653 return dictGenHashFunction(key, strlen(key));
654}
655
656static void *_dictStringDup(void *privdata, const void *key)
657{
658 int len = strlen(key);
659 char *copy = zmalloc(len+1);
660 DICT_NOTUSED(privdata);
661
662 memcpy(copy, key, len);
663 copy[len] = '\0';
664 return copy;
665}
666
667static int _dictStringCopyHTKeyCompare(void *privdata, const void *key1,
668 const void *key2)
669{
670 DICT_NOTUSED(privdata);
671
672 return strcmp(key1, key2) == 0;
673}
674
675static void _dictStringDestructor(void *privdata, void *key)
676{
677 DICT_NOTUSED(privdata);
678
679 zfree(key);
680}
681
682dictType dictTypeHeapStringCopyKey = {
683 _dictStringCopyHTHashFunction, /* hash function */
684 _dictStringDup, /* key dup */
685 NULL, /* val dup */
686 _dictStringCopyHTKeyCompare, /* key compare */
687 _dictStringDestructor, /* key destructor */
688 NULL /* val destructor */
689};
690
691/* This is like StringCopy but does not auto-duplicate the key.
692 * It's used for intepreter's shared strings. */
693dictType dictTypeHeapStrings = {
694 _dictStringCopyHTHashFunction, /* hash function */
695 NULL, /* key dup */
696 NULL, /* val dup */
697 _dictStringCopyHTKeyCompare, /* key compare */
698 _dictStringDestructor, /* key destructor */
699 NULL /* val destructor */
700};
701
702/* This is like StringCopy but also automatically handle dynamic
703 * allocated C strings as values. */
704dictType dictTypeHeapStringCopyKeyValue = {
705 _dictStringCopyHTHashFunction, /* hash function */
706 _dictStringDup, /* key dup */
707 _dictStringDup, /* val dup */
708 _dictStringCopyHTKeyCompare, /* key compare */
709 _dictStringDestructor, /* key destructor */
710 _dictStringDestructor, /* val destructor */
711};
712#endif