git.saurik.com Git - redis.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* Hash Tables Implementation.
	2	*
	3	* This file implements in memory hash tables with insert/del/replace/find/
	4	* get-random-element operations. Hash tables will auto resize if needed
	5	* tables of power of two in size are used, collisions are handled by
	6	* chaining. See the source code for more information... :)
	7	*
	8	* Copyright (c) 2006-2009, Salvatore Sanfilippo <antirez at gmail dot com>
	9	* All rights reserved.
	10	*
	11	* Redistribution and use in source and binary forms, with or without
	12	* modification, are permitted provided that the following conditions are met:
	13	*
	14	* * Redistributions of source code must retain the above copyright notice,
	15	* this list of conditions and the following disclaimer.
	16	* * Redistributions in binary form must reproduce the above copyright
	17	* notice, this list of conditions and the following disclaimer in the
	18	* documentation and/or other materials provided with the distribution.
	19	* * Neither the name of Redis nor the names of its contributors may be used
	20	* to endorse or promote products derived from this software without
	21	* specific prior written permission.
	22	*
	23	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	24	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	25	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	26	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
	27	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	28	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	29	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	30	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	31	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	32	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	33	* POSSIBILITY OF SUCH DAMAGE.
	34	*/
	35
	36	#include <stdio.h>
	37	#include <stdlib.h>
	38	#include <string.h>
	39	#include <stdarg.h>
	40	#include <assert.h>
	41
	42	#include "dict.h"
	43	#include "zmalloc.h"
	44
	45	/* ---------------------------- Utility funcitons --------------------------- */
	46
	47	static void _dictPanic(const char *fmt, ...)
	48	{
	49	va_list ap;
	50
	51	va_start(ap, fmt);
	52	fprintf(stderr, "\nDICT LIBRARY PANIC: ");
	53	vfprintf(stderr, fmt, ap);
	54	fprintf(stderr, "\n\n");
	55	va_end(ap);
	56	}
	57
	58	/* ------------------------- Heap Management Wrappers------------------------ */
	59
	60	static void *_dictAlloc(int size)
	61	{
	62	void *p = zmalloc(size);
	63	if (p == NULL)
	64	_dictPanic("Out of memory");
	65	return p;
	66	}
	67
	68	static void _dictFree(void *ptr) {
	69	zfree(ptr);
	70	}
	71
	72	/* -------------------------- private prototypes ---------------------------- */
	73
	74	static int _dictExpandIfNeeded(dict *ht);
	75	static unsigned int _dictNextPower(unsigned int size);
	76	static int _dictKeyIndex(dict ht, const void key);
	77	static int _dictInit(dict ht, dictType type, void *privDataPtr);
	78
	79	/* -------------------------- hash functions -------------------------------- */
	80
	81	/* Thomas Wang's 32 bit Mix Function */
	82	unsigned int dictIntHashFunction(unsigned int key)
	83	{
	84	key += ~(key << 15);
	85	key ^= (key >> 10);
	86	key += (key << 3);
	87	key ^= (key >> 6);
	88	key += ~(key << 11);
	89	key ^= (key >> 16);
	90	return key;
	91	}
	92
	93	/* Identity hash function for integer keys */
	94	unsigned int dictIdentityHashFunction(unsigned int key)
	95	{
	96	return key;
	97	}
	98
	99	/* Generic hash function (a popular one from Bernstein).
	100	* I tested a few and this was the best. */
	101	unsigned int dictGenHashFunction(const unsigned char *buf, int len) {
	102	unsigned int hash = 5381;
	103
	104	while (len--)
	105	hash = ((hash << 5) + hash) + (buf++); / hash * 33 + c */
	106	return hash;
	107	}
	108
	109	/* ----------------------------- API implementation ------------------------- */
	110
	111	/* Reset an hashtable already initialized with ht_init().
	112	* NOTE: This function should only called by ht_destroy(). */
	113	static void _dictReset(dict *ht)
	114	{
	115	ht->table = NULL;
	116	ht->size = 0;
	117	ht->sizemask = 0;
	118	ht->used = 0;
	119	}
	120
	121	/* Create a new hash table */
	122	dict dictCreate(dictType type,
	123	void *privDataPtr)
	124	{
	125	dict ht = _dictAlloc(sizeof(ht));
	126
	127	_dictInit(ht,type,privDataPtr);
	128	return ht;
	129	}
	130
	131	/* Initialize the hash table */
	132	int _dictInit(dict ht, dictType type,
	133	void *privDataPtr)
	134	{
	135	_dictReset(ht);
	136	ht->type = type;
	137	ht->privdata = privDataPtr;
	138	return DICT_OK;
	139	}
	140
	141	/* Resize the table to the minimal size that contains all the elements,
	142	* but with the invariant of a USER/BUCKETS ration near to <= 1 */
	143	int dictResize(dict *ht)
	144	{
	145	int minimal = ht->used;
	146
	147	if (minimal < DICT_HT_INITIAL_SIZE)
	148	minimal = DICT_HT_INITIAL_SIZE;
	149	return dictExpand(ht, minimal);
	150	}
	151
	152	/* Expand or create the hashtable */
	153	int dictExpand(dict *ht, unsigned int size)
	154	{
	155	dict n; /* the new hashtable */
	156	unsigned int realsize = _dictNextPower(size), i;
	157
	158	/* the size is invalid if it is smaller than the number of
	159	* elements already inside the hashtable */
	160	if (ht->used > size)
	161	return DICT_ERR;
	162
	163	_dictInit(&n, ht->type, ht->privdata);
	164	n.size = realsize;
	165	n.sizemask = realsize-1;
	166	n.table = _dictAlloc(realsizesizeof(dictEntry));
	167
	168	/* Initialize all the pointers to NULL */
	169	memset(n.table, 0, realsizesizeof(dictEntry));
	170
	171	/* Copy all the elements from the old to the new table:
	172	* note that if the old hash table is empty ht->size is zero,
	173	* so dictExpand just creates an hash table. */
	174	n.used = ht->used;
	175	for (i = 0; i < ht->size && ht->used > 0; i++) {
	176	dictEntry he, nextHe;
	177
	178	if (ht->table[i] == NULL) continue;
	179
	180	/* For each hash entry on this slot... */
	181	he = ht->table[i];
	182	while(he) {
	183	unsigned int h;
	184
	185	nextHe = he->next;
	186	/* Get the new element index */
	187	h = dictHashKey(ht, he->key) & n.sizemask;
	188	he->next = n.table[h];
	189	n.table[h] = he;
	190	ht->used--;
	191	/* Pass to the next element */
	192	he = nextHe;
	193	}
	194	}
	195	assert(ht->used == 0);
	196	_dictFree(ht->table);
	197
	198	/* Remap the new hashtable in the old */
	199	*ht = n;
	200	return DICT_OK;
	201	}
	202
	203	/* Add an element to the target hash table */
	204	int dictAdd(dict ht, void key, void *val)
	205	{
	206	int index;
	207	dictEntry *entry;
	208
	209	/* Get the index of the new element, or -1 if
	210	* the element already exists. */
	211	if ((index = _dictKeyIndex(ht, key)) == -1)
	212	return DICT_ERR;
	213
	214	/* Allocates the memory and stores key */
	215	entry = _dictAlloc(sizeof(*entry));
	216	entry->next = ht->table[index];
	217	ht->table[index] = entry;
	218
	219	/* Set the hash entry fields. */
	220	dictSetHashKey(ht, entry, key);
	221	dictSetHashVal(ht, entry, val);
	222	ht->used++;
	223	return DICT_OK;
	224	}
	225
	226	/* Add an element, discarding the old if the key already exists */
	227	int dictReplace(dict ht, void key, void *val)
	228	{
	229	dictEntry *entry;
	230
	231	/* Try to add the element. If the key
	232	* does not exists dictAdd will suceed. */
	233	if (dictAdd(ht, key, val) == DICT_OK)
	234	return DICT_OK;
	235	/* It already exists, get the entry */
	236	entry = dictFind(ht, key);
	237	/* Free the old value and set the new one */
	238	dictFreeEntryVal(ht, entry);
	239	dictSetHashVal(ht, entry, val);
	240	return DICT_OK;
	241	}
	242
	243	/* Search and remove an element */
	244	static int dictGenericDelete(dict ht, const void key, int nofree)
	245	{
	246	unsigned int h;
	247	dictEntry he, prevHe;
	248
	249	if (ht->size == 0)
	250	return DICT_ERR;
	251	h = dictHashKey(ht, key) & ht->sizemask;
	252	he = ht->table[h];
	253
	254	prevHe = NULL;
	255	while(he) {
	256	if (dictCompareHashKeys(ht, key, he->key)) {
	257	/* Unlink the element from the list */
	258	if (prevHe)
	259	prevHe->next = he->next;
	260	else
	261	ht->table[h] = he->next;
	262	if (!nofree) {
	263	dictFreeEntryKey(ht, he);
	264	dictFreeEntryVal(ht, he);
	265	}
	266	_dictFree(he);
	267	ht->used--;
	268	return DICT_OK;
	269	}
	270	prevHe = he;
	271	he = he->next;
	272	}
	273	return DICT_ERR; /* not found */
	274	}
	275
	276	int dictDelete(dict ht, const void key) {
	277	return dictGenericDelete(ht,key,0);
	278	}
	279
	280	int dictDeleteNoFree(dict ht, const void key) {
	281	return dictGenericDelete(ht,key,1);
	282	}
	283
	284	/* Destroy an entire hash table */
	285	int _dictClear(dict *ht)
	286	{
	287	unsigned int i;
	288
	289	/* Free all the elements */
	290	for (i = 0; i < ht->size && ht->used > 0; i++) {
	291	dictEntry he, nextHe;
	292
	293	if ((he = ht->table[i]) == NULL) continue;
	294	while(he) {
	295	nextHe = he->next;
	296	dictFreeEntryKey(ht, he);
	297	dictFreeEntryVal(ht, he);
	298	_dictFree(he);
	299	ht->used--;
	300	he = nextHe;
	301	}
	302	}
	303	/* Free the table and the allocated cache structure */
	304	_dictFree(ht->table);
	305	/* Re-initialize the table */
	306	_dictReset(ht);
	307	return DICT_OK; /* never fails */
	308	}
	309
	310	/* Clear & Release the hash table */
	311	void dictRelease(dict *ht)
	312	{
	313	_dictClear(ht);
	314	_dictFree(ht);
	315	}
	316
	317	dictEntry dictFind(dict ht, const void *key)
	318	{
	319	dictEntry *he;
	320	unsigned int h;
	321
	322	if (ht->size == 0) return NULL;
	323	h = dictHashKey(ht, key) & ht->sizemask;
	324	he = ht->table[h];
	325	while(he) {
	326	if (dictCompareHashKeys(ht, key, he->key))
	327	return he;
	328	he = he->next;
	329	}
	330	return NULL;
	331	}
	332
	333	dictIterator dictGetIterator(dict ht)
	334	{
	335	dictIterator iter = _dictAlloc(sizeof(iter));
	336
	337	iter->ht = ht;
	338	iter->index = -1;
	339	iter->entry = NULL;
	340	iter->nextEntry = NULL;
	341	return iter;
	342	}
	343
	344	dictEntry dictNext(dictIterator iter)
	345	{
	346	while (1) {
	347	if (iter->entry == NULL) {
	348	iter->index++;
	349	if (iter->index >=
	350	(signed)iter->ht->size) break;
	351	iter->entry = iter->ht->table[iter->index];
	352	} else {
	353	iter->entry = iter->nextEntry;
	354	}
	355	if (iter->entry) {
	356	/* We need to save the 'next' here, the iterator user
	357	* may delete the entry we are returning. */
	358	iter->nextEntry = iter->entry->next;
	359	return iter->entry;
	360	}
	361	}
	362	return NULL;
	363	}
	364
	365	void dictReleaseIterator(dictIterator *iter)
	366	{
	367	_dictFree(iter);
	368	}
	369
	370	/* Return a random entry from the hash table. Useful to
	371	* implement randomized algorithms */
	372	dictEntry dictGetRandomKey(dict ht)
	373	{
	374	dictEntry *he;
	375	unsigned int h;
	376	int listlen, listele;
	377
	378	if (ht->size == 0) return NULL;
	379	do {
	380	h = random() & ht->sizemask;
	381	he = ht->table[h];
	382	} while(he == NULL);
	383
	384	/* Now we found a non empty bucket, but it is a linked
	385	* list and we need to get a random element from the list.
	386	* The only sane way to do so is to count the element and
	387	* select a random index. */
	388	listlen = 0;
	389	while(he) {
	390	he = he->next;
	391	listlen++;
	392	}
	393	listele = random() % listlen;
	394	he = ht->table[h];
	395	while(listele--) he = he->next;
	396	return he;
	397	}
	398
	399	/* ------------------------- private functions ------------------------------ */
	400
	401	/* Expand the hash table if needed */
	402	static int _dictExpandIfNeeded(dict *ht)
	403	{
	404	/* If the hash table is empty expand it to the intial size,
	405	* if the table is "full" dobule its size. */
	406	if (ht->size == 0)
	407	return dictExpand(ht, DICT_HT_INITIAL_SIZE);
	408	if (ht->used == ht->size)
	409	return dictExpand(ht, ht->size*2);
	410	return DICT_OK;
	411	}
	412
	413	/* Our hash table capability is a power of two */
	414	static unsigned int _dictNextPower(unsigned int size)
	415	{
	416	unsigned int i = DICT_HT_INITIAL_SIZE;
	417
	418	if (size >= 2147483648U)
	419	return 2147483648U;
	420	while(1) {
	421	if (i >= size)
	422	return i;
	423	i *= 2;
	424	}
	425	}
	426
	427	/* Returns the index of a free slot that can be populated with
	428	* an hash entry for the given 'key'.
	429	* If the key already exists, -1 is returned. */
	430	static int _dictKeyIndex(dict ht, const void key)
	431	{
	432	unsigned int h;
	433	dictEntry *he;
	434
	435	/* Expand the hashtable if needed */
	436	if (_dictExpandIfNeeded(ht) == DICT_ERR)
	437	return -1;
	438	/* Compute the key hash value */
	439	h = dictHashKey(ht, key) & ht->sizemask;
	440	/* Search if this slot does not already contain the given key */
	441	he = ht->table[h];
	442	while(he) {
	443	if (dictCompareHashKeys(ht, key, he->key))
	444	return -1;
	445	he = he->next;
	446	}
	447	return h;
	448	}
	449
	450	void dictEmpty(dict *ht) {
	451	_dictClear(ht);
	452	}
	453
	454	#define DICT_STATS_VECTLEN 50
	455	void dictPrintStats(dict *ht) {
	456	unsigned int i, slots = 0, chainlen, maxchainlen = 0;
	457	unsigned int totchainlen = 0;
	458	unsigned int clvector[DICT_STATS_VECTLEN];
	459
	460	if (ht->used == 0) {
	461	printf("No stats available for empty dictionaries\n");
	462	return;
	463	}
	464
	465	for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0;
	466	for (i = 0; i < ht->size; i++) {
	467	dictEntry *he;
	468
	469	if (ht->table[i] == NULL) {
	470	clvector[0]++;
	471	continue;
	472	}
	473	slots++;
	474	/* For each hash entry on this slot... */
	475	chainlen = 0;
	476	he = ht->table[i];
	477	while(he) {
	478	chainlen++;
	479	he = he->next;
	480	}
	481	clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++;
	482	if (chainlen > maxchainlen) maxchainlen = chainlen;
	483	totchainlen += chainlen;
	484	}
	485	printf("Hash table stats:\n");
	486	printf(" table size: %d\n", ht->size);
	487	printf(" number of elements: %d\n", ht->used);
	488	printf(" different slots: %d\n", slots);
	489	printf(" max chain length: %d\n", maxchainlen);
	490	printf(" avg chain length (counted): %.02f\n", (float)totchainlen/slots);
	491	printf(" avg chain length (computed): %.02f\n", (float)ht->used/slots);
	492	printf(" Chain length distribution:\n");
	493	for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {
	494	if (clvector[i] == 0) continue;
	495	printf(" %s%d: %d (%.02f%%)\n",(i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100);
	496	}
	497	}
	498
	499	/* ----------------------- StringCopy Hash Table Type ------------------------*/
	500
	501	static unsigned int _dictStringCopyHTHashFunction(const void *key)
	502	{
	503	return dictGenHashFunction(key, strlen(key));
	504	}
	505
	506	static void _dictStringCopyHTKeyDup(void privdata, const void *key)
	507	{
	508	int len = strlen(key);
	509	char *copy = _dictAlloc(len+1);
	510	DICT_NOTUSED(privdata);
	511
	512	memcpy(copy, key, len);
	513	copy[len] = '\0';
	514	return copy;
	515	}
	516
	517	static void _dictStringKeyValCopyHTValDup(void privdata, const void *val)
	518	{
	519	int len = strlen(val);
	520	char *copy = _dictAlloc(len+1);
	521	DICT_NOTUSED(privdata);
	522
	523	memcpy(copy, val, len);
	524	copy[len] = '\0';
	525	return copy;
	526	}
	527
	528	static int _dictStringCopyHTKeyCompare(void privdata, const void key1,
	529	const void *key2)
	530	{
	531	DICT_NOTUSED(privdata);
	532
	533	return strcmp(key1, key2) == 0;
	534	}
	535
	536	static void _dictStringCopyHTKeyDestructor(void privdata, void key)
	537	{
	538	DICT_NOTUSED(privdata);
	539
	540	_dictFree((void)key); / ATTENTION: const cast */
	541	}
	542
	543	static void _dictStringKeyValCopyHTValDestructor(void privdata, void val)
	544	{
	545	DICT_NOTUSED(privdata);
	546
	547	_dictFree((void)val); / ATTENTION: const cast */
	548	}
	549
	550	dictType dictTypeHeapStringCopyKey = {
	551	_dictStringCopyHTHashFunction, /* hash function */
	552	_dictStringCopyHTKeyDup, /* key dup */
	553	NULL, /* val dup */
	554	_dictStringCopyHTKeyCompare, /* key compare */
	555	_dictStringCopyHTKeyDestructor, /* key destructor */
	556	NULL /* val destructor */
	557	};
	558
	559	/* This is like StringCopy but does not auto-duplicate the key.
	560	* It's used for intepreter's shared strings. */
	561	dictType dictTypeHeapStrings = {
	562	_dictStringCopyHTHashFunction, /* hash function */
	563	NULL, /* key dup */
	564	NULL, /* val dup */
	565	_dictStringCopyHTKeyCompare, /* key compare */
	566	_dictStringCopyHTKeyDestructor, /* key destructor */
	567	NULL /* val destructor */
	568	};
	569
	570	/* This is like StringCopy but also automatically handle dynamic
	571	* allocated C strings as values. */
	572	dictType dictTypeHeapStringCopyKeyValue = {
	573	_dictStringCopyHTHashFunction, /* hash function */
	574	_dictStringCopyHTKeyDup, /* key dup */
	575	_dictStringKeyValCopyHTValDup, /* val dup */
	576	_dictStringCopyHTKeyCompare, /* key compare */
	577	_dictStringCopyHTKeyDestructor, /* key destructor */
	578	_dictStringKeyValCopyHTValDestructor, /* val destructor */
	579	};