git.saurik.com Git - redis.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* Hash Tables Implementation.
	2	*
	3	* This file implements in memory hash tables with insert/del/replace/find/
	4	* get-random-element operations. Hash tables will auto resize if needed
	5	* tables of power of two in size are used, collisions are handled by
	6	* chaining. See the source code for more information... :)
	7	*
	8	* Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com>
	9	* All rights reserved.
	10	*
	11	* Redistribution and use in source and binary forms, with or without
	12	* modification, are permitted provided that the following conditions are met:
	13	*
	14	* * Redistributions of source code must retain the above copyright notice,
	15	* this list of conditions and the following disclaimer.
	16	* * Redistributions in binary form must reproduce the above copyright
	17	* notice, this list of conditions and the following disclaimer in the
	18	* documentation and/or other materials provided with the distribution.
	19	* * Neither the name of Redis nor the names of its contributors may be used
	20	* to endorse or promote products derived from this software without
	21	* specific prior written permission.
	22	*
	23	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	24	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	25	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	26	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
	27	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	28	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	29	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	30	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	31	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	32	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	33	* POSSIBILITY OF SUCH DAMAGE.
	34	*/
	35
	36	#include "fmacros.h"
	37
	38	#include <stdio.h>
	39	#include <stdlib.h>
	40	#include <string.h>
	41	#include <stdarg.h>
	42	#include <assert.h>
	43	#include <limits.h>
	44
	45	#include "dict.h"
	46	#include "zmalloc.h"
	47
	48	/* Using dictEnableResize() / dictDisableResize() we make possible to
	49	* enable/disable resizing of the hash table as needed. This is very important
	50	* for Redis, as we use copy-on-write and don't want to move too much memory
	51	* around when there is a child performing saving operations. */
	52	static int dict_can_resize = 1;
	53
	54	/* ---------------------------- Utility funcitons --------------------------- */
	55
	56	static void _dictPanic(const char *fmt, ...)
	57	{
	58	va_list ap;
	59
	60	va_start(ap, fmt);
	61	fprintf(stderr, "\nDICT LIBRARY PANIC: ");
	62	vfprintf(stderr, fmt, ap);
	63	fprintf(stderr, "\n\n");
	64	va_end(ap);
	65	}
	66
	67	/* ------------------------- Heap Management Wrappers------------------------ */
	68
	69	static void *_dictAlloc(size_t size)
	70	{
	71	void *p = zmalloc(size);
	72	if (p == NULL)
	73	_dictPanic("Out of memory");
	74	return p;
	75	}
	76
	77	static void _dictFree(void *ptr) {
	78	zfree(ptr);
	79	}
	80
	81	/* -------------------------- private prototypes ---------------------------- */
	82
	83	static int _dictExpandIfNeeded(dict *ht);
	84	static unsigned long _dictNextPower(unsigned long size);
	85	static int _dictKeyIndex(dict ht, const void key);
	86	static int _dictInit(dict ht, dictType type, void *privDataPtr);
	87
	88	/* -------------------------- hash functions -------------------------------- */
	89
	90	/* Thomas Wang's 32 bit Mix Function */
	91	unsigned int dictIntHashFunction(unsigned int key)
	92	{
	93	key += ~(key << 15);
	94	key ^= (key >> 10);
	95	key += (key << 3);
	96	key ^= (key >> 6);
	97	key += ~(key << 11);
	98	key ^= (key >> 16);
	99	return key;
	100	}
	101
	102	/* Identity hash function for integer keys */
	103	unsigned int dictIdentityHashFunction(unsigned int key)
	104	{
	105	return key;
	106	}
	107
	108	/* Generic hash function (a popular one from Bernstein).
	109	* I tested a few and this was the best. */
	110	unsigned int dictGenHashFunction(const unsigned char *buf, int len) {
	111	unsigned int hash = 5381;
	112
	113	while (len--)
	114	hash = ((hash << 5) + hash) + (buf++); / hash * 33 + c */
	115	return hash;
	116	}
	117
	118	/* ----------------------------- API implementation ------------------------- */
	119
	120	/* Reset an hashtable already initialized with ht_init().
	121	* NOTE: This function should only called by ht_destroy(). */
	122	static void _dictReset(dict *ht)
	123	{
	124	ht->table = NULL;
	125	ht->size = 0;
	126	ht->sizemask = 0;
	127	ht->used = 0;
	128	}
	129
	130	/* Create a new hash table */
	131	dict dictCreate(dictType type,
	132	void *privDataPtr)
	133	{
	134	dict ht = _dictAlloc(sizeof(ht));
	135
	136	_dictInit(ht,type,privDataPtr);
	137	return ht;
	138	}
	139
	140	/* Initialize the hash table */
	141	int _dictInit(dict ht, dictType type,
	142	void *privDataPtr)
	143	{
	144	_dictReset(ht);
	145	ht->type = type;
	146	ht->privdata = privDataPtr;
	147	return DICT_OK;
	148	}
	149
	150	/* Resize the table to the minimal size that contains all the elements,
	151	* but with the invariant of a USER/BUCKETS ration near to <= 1 */
	152	int dictResize(dict *ht)
	153	{
	154	int minimal = ht->used;
	155
	156	if (!dict_can_resize) return DICT_ERR;
	157	if (minimal < DICT_HT_INITIAL_SIZE)
	158	minimal = DICT_HT_INITIAL_SIZE;
	159	return dictExpand(ht, minimal);
	160	}
	161
	162	/* Expand or create the hashtable */
	163	int dictExpand(dict *ht, unsigned long size)
	164	{
	165	dict n; /* the new hashtable */
	166	unsigned long realsize = _dictNextPower(size), i;
	167
	168	/* the size is invalid if it is smaller than the number of
	169	* elements already inside the hashtable */
	170	if (ht->used > size)
	171	return DICT_ERR;
	172
	173	_dictInit(&n, ht->type, ht->privdata);
	174	n.size = realsize;
	175	n.sizemask = realsize-1;
	176	n.table = _dictAlloc(realsizesizeof(dictEntry));
	177
	178	/* Initialize all the pointers to NULL */
	179	memset(n.table, 0, realsizesizeof(dictEntry));
	180
	181	/* Copy all the elements from the old to the new table:
	182	* note that if the old hash table is empty ht->size is zero,
	183	* so dictExpand just creates an hash table. */
	184	n.used = ht->used;
	185	for (i = 0; i < ht->size && ht->used > 0; i++) {
	186	dictEntry he, nextHe;
	187
	188	if (ht->table[i] == NULL) continue;
	189
	190	/* For each hash entry on this slot... */
	191	he = ht->table[i];
	192	while(he) {
	193	unsigned int h;
	194
	195	nextHe = he->next;
	196	/* Get the new element index */
	197	h = dictHashKey(ht, he->key) & n.sizemask;
	198	he->next = n.table[h];
	199	n.table[h] = he;
	200	ht->used--;
	201	/* Pass to the next element */
	202	he = nextHe;
	203	}
	204	}
	205	assert(ht->used == 0);
	206	_dictFree(ht->table);
	207
	208	/* Remap the new hashtable in the old */
	209	*ht = n;
	210	return DICT_OK;
	211	}
	212
	213	/* Add an element to the target hash table */
	214	int dictAdd(dict ht, void key, void *val)
	215	{
	216	int index;
	217	dictEntry *entry;
	218
	219	/* Get the index of the new element, or -1 if
	220	* the element already exists. */
	221	if ((index = _dictKeyIndex(ht, key)) == -1)
	222	return DICT_ERR;
	223
	224	/* Allocates the memory and stores key */
	225	entry = _dictAlloc(sizeof(*entry));
	226	entry->next = ht->table[index];
	227	ht->table[index] = entry;
	228
	229	/* Set the hash entry fields. */
	230	dictSetHashKey(ht, entry, key);
	231	dictSetHashVal(ht, entry, val);
	232	ht->used++;
	233	return DICT_OK;
	234	}
	235
	236	/* Add an element, discarding the old if the key already exists.
	237	* Return 1 if the key was added from scratch, 0 if there was already an
	238	* element with such key and dictReplace() just performed a value update
	239	* operation. */
	240	int dictReplace(dict ht, void key, void *val)
	241	{
	242	dictEntry *entry, auxentry;
	243
	244	/* Try to add the element. If the key
	245	* does not exists dictAdd will suceed. */
	246	if (dictAdd(ht, key, val) == DICT_OK)
	247	return 1;
	248	/* It already exists, get the entry */
	249	entry = dictFind(ht, key);
	250	/* Free the old value and set the new one */
	251	/* Set the new value and free the old one. Note that it is important
	252	* to do that in this order, as the value may just be exactly the same
	253	* as the previous one. In this context, think to reference counting,
	254	* you want to increment (set), and then decrement (free), and not the
	255	* reverse. */
	256	auxentry = *entry;
	257	dictSetHashVal(ht, entry, val);
	258	dictFreeEntryVal(ht, &auxentry);
	259	return 0;
	260	}
	261
	262	/* Search and remove an element */
	263	static int dictGenericDelete(dict ht, const void key, int nofree)
	264	{
	265	unsigned int h;
	266	dictEntry he, prevHe;
	267
	268	if (ht->size == 0)
	269	return DICT_ERR;
	270	h = dictHashKey(ht, key) & ht->sizemask;
	271	he = ht->table[h];
	272
	273	prevHe = NULL;
	274	while(he) {
	275	if (dictCompareHashKeys(ht, key, he->key)) {
	276	/* Unlink the element from the list */
	277	if (prevHe)
	278	prevHe->next = he->next;
	279	else
	280	ht->table[h] = he->next;
	281	if (!nofree) {
	282	dictFreeEntryKey(ht, he);
	283	dictFreeEntryVal(ht, he);
	284	}
	285	_dictFree(he);
	286	ht->used--;
	287	return DICT_OK;
	288	}
	289	prevHe = he;
	290	he = he->next;
	291	}
	292	return DICT_ERR; /* not found */
	293	}
	294
	295	int dictDelete(dict ht, const void key) {
	296	return dictGenericDelete(ht,key,0);
	297	}
	298
	299	int dictDeleteNoFree(dict ht, const void key) {
	300	return dictGenericDelete(ht,key,1);
	301	}
	302
	303	/* Destroy an entire hash table */
	304	int _dictClear(dict *ht)
	305	{
	306	unsigned long i;
	307
	308	/* Free all the elements */
	309	for (i = 0; i < ht->size && ht->used > 0; i++) {
	310	dictEntry he, nextHe;
	311
	312	if ((he = ht->table[i]) == NULL) continue;
	313	while(he) {
	314	nextHe = he->next;
	315	dictFreeEntryKey(ht, he);
	316	dictFreeEntryVal(ht, he);
	317	_dictFree(he);
	318	ht->used--;
	319	he = nextHe;
	320	}
	321	}
	322	/* Free the table and the allocated cache structure */
	323	_dictFree(ht->table);
	324	/* Re-initialize the table */
	325	_dictReset(ht);
	326	return DICT_OK; /* never fails */
	327	}
	328
	329	/* Clear & Release the hash table */
	330	void dictRelease(dict *ht)
	331	{
	332	_dictClear(ht);
	333	_dictFree(ht);
	334	}
	335
	336	dictEntry dictFind(dict ht, const void *key)
	337	{
	338	dictEntry *he;
	339	unsigned int h;
	340
	341	if (ht->size == 0) return NULL;
	342	h = dictHashKey(ht, key) & ht->sizemask;
	343	he = ht->table[h];
	344	while(he) {
	345	if (dictCompareHashKeys(ht, key, he->key))
	346	return he;
	347	he = he->next;
	348	}
	349	return NULL;
	350	}
	351
	352	dictIterator dictGetIterator(dict ht)
	353	{
	354	dictIterator iter = _dictAlloc(sizeof(iter));
	355
	356	iter->ht = ht;
	357	iter->index = -1;
	358	iter->entry = NULL;
	359	iter->nextEntry = NULL;
	360	return iter;
	361	}
	362
	363	dictEntry dictNext(dictIterator iter)
	364	{
	365	while (1) {
	366	if (iter->entry == NULL) {
	367	iter->index++;
	368	if (iter->index >=
	369	(signed)iter->ht->size) break;
	370	iter->entry = iter->ht->table[iter->index];
	371	} else {
	372	iter->entry = iter->nextEntry;
	373	}
	374	if (iter->entry) {
	375	/* We need to save the 'next' here, the iterator user
	376	* may delete the entry we are returning. */
	377	iter->nextEntry = iter->entry->next;
	378	return iter->entry;
	379	}
	380	}
	381	return NULL;
	382	}
	383
	384	void dictReleaseIterator(dictIterator *iter)
	385	{
	386	_dictFree(iter);
	387	}
	388
	389	/* Return a random entry from the hash table. Useful to
	390	* implement randomized algorithms */
	391	dictEntry dictGetRandomKey(dict ht)
	392	{
	393	dictEntry *he;
	394	unsigned int h;
	395	int listlen, listele;
	396
	397	if (ht->used == 0) return NULL;
	398	do {
	399	h = random() & ht->sizemask;
	400	he = ht->table[h];
	401	} while(he == NULL);
	402
	403	/* Now we found a non empty bucket, but it is a linked
	404	* list and we need to get a random element from the list.
	405	* The only sane way to do so is to count the element and
	406	* select a random index. */
	407	listlen = 0;
	408	while(he) {
	409	he = he->next;
	410	listlen++;
	411	}
	412	listele = random() % listlen;
	413	he = ht->table[h];
	414	while(listele--) he = he->next;
	415	return he;
	416	}
	417
	418	/* ------------------------- private functions ------------------------------ */
	419
	420	/* Expand the hash table if needed */
	421	static int _dictExpandIfNeeded(dict *ht)
	422	{
	423	/* If the hash table is empty expand it to the intial size,
	424	* if the table is "full" dobule its size. */
	425	if (ht->size == 0)
	426	return dictExpand(ht, DICT_HT_INITIAL_SIZE);
	427	if (ht->used >= ht->size && dict_can_resize)
	428	return dictExpand(ht, ((ht->size > ht->used) ? ht->size : ht->used)*2);
	429	return DICT_OK;
	430	}
	431
	432	/* Our hash table capability is a power of two */
	433	static unsigned long _dictNextPower(unsigned long size)
	434	{
	435	unsigned long i = DICT_HT_INITIAL_SIZE;
	436
	437	if (size >= LONG_MAX) return LONG_MAX;
	438	while(1) {
	439	if (i >= size)
	440	return i;
	441	i *= 2;
	442	}
	443	}
	444
	445	/* Returns the index of a free slot that can be populated with
	446	* an hash entry for the given 'key'.
	447	* If the key already exists, -1 is returned. */
	448	static int _dictKeyIndex(dict ht, const void key)
	449	{
	450	unsigned int h;
	451	dictEntry *he;
	452
	453	/* Expand the hashtable if needed */
	454	if (_dictExpandIfNeeded(ht) == DICT_ERR)
	455	return -1;
	456	/* Compute the key hash value */
	457	h = dictHashKey(ht, key) & ht->sizemask;
	458	/* Search if this slot does not already contain the given key */
	459	he = ht->table[h];
	460	while(he) {
	461	if (dictCompareHashKeys(ht, key, he->key))
	462	return -1;
	463	he = he->next;
	464	}
	465	return h;
	466	}
	467
	468	void dictEmpty(dict *ht) {
	469	_dictClear(ht);
	470	}
	471
	472	#define DICT_STATS_VECTLEN 50
	473	void dictPrintStats(dict *ht) {
	474	unsigned long i, slots = 0, chainlen, maxchainlen = 0;
	475	unsigned long totchainlen = 0;
	476	unsigned long clvector[DICT_STATS_VECTLEN];
	477
	478	if (ht->used == 0) {
	479	printf("No stats available for empty dictionaries\n");
	480	return;
	481	}
	482
	483	for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0;
	484	for (i = 0; i < ht->size; i++) {
	485	dictEntry *he;
	486
	487	if (ht->table[i] == NULL) {
	488	clvector[0]++;
	489	continue;
	490	}
	491	slots++;
	492	/* For each hash entry on this slot... */
	493	chainlen = 0;
	494	he = ht->table[i];
	495	while(he) {
	496	chainlen++;
	497	he = he->next;
	498	}
	499	clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++;
	500	if (chainlen > maxchainlen) maxchainlen = chainlen;
	501	totchainlen += chainlen;
	502	}
	503	printf("Hash table stats:\n");
	504	printf(" table size: %ld\n", ht->size);
	505	printf(" number of elements: %ld\n", ht->used);
	506	printf(" different slots: %ld\n", slots);
	507	printf(" max chain length: %ld\n", maxchainlen);
	508	printf(" avg chain length (counted): %.02f\n", (float)totchainlen/slots);
	509	printf(" avg chain length (computed): %.02f\n", (float)ht->used/slots);
	510	printf(" Chain length distribution:\n");
	511	for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {
	512	if (clvector[i] == 0) continue;
	513	printf(" %s%ld: %ld (%.02f%%)\n",(i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100);
	514	}
	515	}
	516
	517	void dictEnableResize(void) {
	518	dict_can_resize = 1;
	519	}
	520
	521	void dictDisableResize(void) {
	522	dict_can_resize = 0;
	523	}
	524
	525	/* ----------------------- StringCopy Hash Table Type ------------------------*/
	526
	527	static unsigned int _dictStringCopyHTHashFunction(const void *key)
	528	{
	529	return dictGenHashFunction(key, strlen(key));
	530	}
	531
	532	static void _dictStringCopyHTKeyDup(void privdata, const void *key)
	533	{
	534	int len = strlen(key);
	535	char *copy = _dictAlloc(len+1);
	536	DICT_NOTUSED(privdata);
	537
	538	memcpy(copy, key, len);
	539	copy[len] = '\0';
	540	return copy;
	541	}
	542
	543	static void _dictStringKeyValCopyHTValDup(void privdata, const void *val)
	544	{
	545	int len = strlen(val);
	546	char *copy = _dictAlloc(len+1);
	547	DICT_NOTUSED(privdata);
	548
	549	memcpy(copy, val, len);
	550	copy[len] = '\0';
	551	return copy;
	552	}
	553
	554	static int _dictStringCopyHTKeyCompare(void privdata, const void key1,
	555	const void *key2)
	556	{
	557	DICT_NOTUSED(privdata);
	558
	559	return strcmp(key1, key2) == 0;
	560	}
	561
	562	static void _dictStringCopyHTKeyDestructor(void privdata, void key)
	563	{
	564	DICT_NOTUSED(privdata);
	565
	566	_dictFree((void)key); / ATTENTION: const cast */
	567	}
	568
	569	static void _dictStringKeyValCopyHTValDestructor(void privdata, void val)
	570	{
	571	DICT_NOTUSED(privdata);
	572
	573	_dictFree((void)val); / ATTENTION: const cast */
	574	}
	575
	576	dictType dictTypeHeapStringCopyKey = {
	577	_dictStringCopyHTHashFunction, /* hash function */
	578	_dictStringCopyHTKeyDup, /* key dup */
	579	NULL, /* val dup */
	580	_dictStringCopyHTKeyCompare, /* key compare */
	581	_dictStringCopyHTKeyDestructor, /* key destructor */
	582	NULL /* val destructor */
	583	};
	584
	585	/* This is like StringCopy but does not auto-duplicate the key.
	586	* It's used for intepreter's shared strings. */
	587	dictType dictTypeHeapStrings = {
	588	_dictStringCopyHTHashFunction, /* hash function */
	589	NULL, /* key dup */
	590	NULL, /* val dup */
	591	_dictStringCopyHTKeyCompare, /* key compare */
	592	_dictStringCopyHTKeyDestructor, /* key destructor */
	593	NULL /* val destructor */
	594	};
	595
	596	/* This is like StringCopy but also automatically handle dynamic
	597	* allocated C strings as values. */
	598	dictType dictTypeHeapStringCopyKeyValue = {
	599	_dictStringCopyHTHashFunction, /* hash function */
	600	_dictStringCopyHTKeyDup, /* key dup */
	601	_dictStringKeyValCopyHTValDup, /* val dup */
	602	_dictStringCopyHTKeyCompare, /* key compare */
	603	_dictStringCopyHTKeyDestructor, /* key destructor */
	604	_dictStringKeyValCopyHTValDestructor, /* val destructor */
	605	};