[apple/libc.git] / stdlib / FreeBSD / strhash.c

/*
 *
 *                      Copyright 1990
 *               Terry Jones & Jordan Hubbard
 *
 *		  PCS Computer Systeme, GmbH.
 *	             Munich, West Germany
 *
 *
 *  All rights reserved.
 *
 *  This is unsupported software and is subject to change without notice.
 *  the author makes no representations about the suitability of this software
 *  for any purpose. It is supplied "as is" without express or implied
 *  warranty.
 *
 *  Permission to use, copy, modify, and distribute this software and its
 *  documentation for any purpose and without fee is hereby granted, provided
 *  that the above copyright notice appear in all copies and that both that
 *  copyright notice and this permission notice appear in supporting
 *  documentation, and that the name of the author not be used in
 *  advertising or publicity pertaining to distribution of the software
 *  without specific, written prior permission.
 *
 */

/*
 * This is a fairly simple open addressing hash scheme.
 * Terry did all the code, I just did the spec.
 * Thanks again, you crazy Aussie..
 *
 */

/*
 * $Log: strhash.c,v $
 * Revision 2.0  90/03/26  01:44:26  jkh
 * pre-beta check-in
 *
 * Revision 1.8  90/03/09  19:22:35  jkh
 * Fixed bogus comment.
 *
 * Revision 1.7  90/03/09  19:01:08  jkh
 * Added comments, GPL.
 *
 * Revision 1.6  90/03/08  17:55:58  terry
 * Rearranged hash_purge to be a tiny bit more efficient.
 * Added verbose option to hash_stats.
 *
 * Revision 1.5  90/03/08  17:19:54  terry
 * Added hash_purge. Added arg to hash_traverse. Changed all
 * void * to Generic.
 *
 * Revision 1.4  90/03/08  12:02:35  terry
 * Fixed problems with allocation that I screwed up last night.
 * Changed bucket lists to be singly linked. Thanks to JKH, my hero.
 *
 * Revision 1.3  90/03/07  21:33:33  terry
 * Cleaned up a few decls to keep gcc -Wall quiet.
 *
 * Revision 1.2  90/03/07  21:14:53  terry
 * Comments. Added HASH_STATS define. Removed hash_find()
 * and new_node().
 *
 * Revision 1.1  90/03/07  20:49:45  terry
 * Initial revision
 *
 *
 */

#include <sys/cdefs.h>
__FBSDID("$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.10 2002/03/22 21:53:10 obrien Exp $");

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <strhash.h>

#define HASH_NULL    (hash_table *)0
#define NODE_NULL    (hash_node *)0
#define GENERIC_NULL (void *)0

#define HASH_SZ 97


static int _hash(int size, char *key);
static hash_node *list_find(caddr_t key, hash_node *head);
static int assign_key(char *key, hash_node *node);


/*
 * hash_create()
 *
 * Malloc room for a new hash table and then room for its
 * bucket pointers. Then set all the buckets to
 * point to 0. Return the address of the new table.
 */
hash_table *
hash_create(int size)
{
    int i;
    hash_table *new = (hash_table *)malloc(sizeof(hash_table));

    if (!new || size < 0){
	return HASH_NULL;
    }

    if (size == 0){
	size = HASH_SZ;
    }

    if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){
	return HASH_NULL;
    }

    for (i = 0; i < size; i++){
	new->buckets[i] = NODE_NULL;
    }
    new->size = size;

    return new;
}


/*
 * list_find()
 *
 * Find the key in the linked list pointed to by head.
 */
static hash_node *
list_find(caddr_t key, hash_node *head)
{
    while (head){
	if (!strcmp(head->key, key)){
	    return head;
	}
	head = head->next;
    }
    return NODE_NULL;
}


/*
 * _hash()
 *
 * Compute the hash value for the given key.
 */
static int
_hash(int size, char *key)
{
    unsigned int h = 0x0;

    while (*key){
	h = (h << 1) ^ (h ^ (unsigned char) *key++);
    }

    h %= size;
    return h;
}

/*
 * hash_destroy()
 *
 * Find the key and (if it's there) remove it entirely.
 * The function (*nukefunc)() is in charge of disposing
 * of the storage help by the data associated with the node.
 */
void
hash_destroy(hash_table *table, char *key, void (*nukefunc)())
{
    int bucket = _hash(table->size, key);
    hash_node *found = table->buckets[bucket];
    hash_node *to_free = NODE_NULL;

    if (!found) {
	return;
    }

    if (!strcmp(found->key, key)) {
	/*
	 * It was the head of the list.
	 */
	table->buckets[bucket] = found->next;
	to_free = found;
    }
    else {
	/*
	 * Walk the list, looking one ahead.
	 */
	while (found->next) {
	    if (!strcmp(found->next->key, key)) {
		to_free = found->next;
		found->next = found->next->next;
		break;
	    }
	    found = found->next;
	}

	if (!to_free){
	    return;
	}
    }

    if (nukefunc)
	(*nukefunc)(to_free->key, to_free->data);
    free(to_free);
    return;
}


/*
 * hash_search()
 *
 * Search the table for the given key. Then:
 *
 * 1) If you find it and there is no replacement function, just
 *    return what you found. (This is a simple search).
 * 2) If you find it and there is a replacement function, run
 *    the function on the data you found, and replace the old
 *    data with whatever is passed in datum. Return 0.
 * 3) If you don't find it and there is some datum, insert a
 *    new item into the table. Insertions go at the front of
 *    the bucket. Return 0.
 * 4) Otherwise just return 0.
 *
 */
void *
hash_search(hash_table *table, caddr_t key, void *datum,
	    void (*replace_func)())
{
    int bucket = _hash(table->size, key);
    hash_node *found = list_find(key, table->buckets[bucket]);

    if (found){
	if (!replace_func){
	    return found->data;
	}
	else{
	    (*replace_func)(found->data);
	    found->data = datum;
	}
    }
    else{
	if (datum){

	    hash_node *new = (hash_node *)malloc(sizeof(hash_node));

	    if (!new || !assign_key(key, new)){
		return GENERIC_NULL;
	    }
	    new->data = datum;
	    new->next = table->buckets[bucket];
	    table->buckets[bucket] = new;
	    return new;
	}
    }
    return GENERIC_NULL;
}


/*
 * assign_key()
 *
 * Set the key value of a node to be 'key'. Get some space from
 * malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
 */
static int
assign_key(char *key, hash_node *node)
{
    if (!node || !key){
	return 0;
    }

    if (!(node->key = (char *)malloc(strlen(key) + 1))){
	return 0;
    }

    node->key[0] = '\0';
    strcat(node->key, key);
    return 1;
}

/*
 * hash_traverse()
 *
 * Traverse the hash table and run the function func on the
 * data found at each node and the argument we're passed for it.
 */
void
hash_traverse(hash_table *table, int (*func)(), void *arg)
{
    int i;
    int size = table->size;

    if (!func)
	return;

    for (i = 0; i < size; i++) {
	hash_node *n = table->buckets[i];
	while (n) {
	    if ((*func)(n->key, n->data, arg) == 0)
		return;
	    n = n->next;
	}
    }
    return;
}

/*
 * hash_purge()
 *
 * Run through the entire hash table. Call purge_func
 * on the data found at each node, and then free the node.
 * Set all the bucket pointers to 0.
 */
void
hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2))
{
    int i;
    int size = table->size;

    for (i = 0; i < size; i++) {
	hash_node *n = table->buckets[i];
	if (n) {
	    do {
		hash_node *to_free = n;
		if (purge_func) {
		    (*purge_func)(n->key, n->data);
		}
		n = n->next;
		free(to_free);
	    } while (n);
	    table->buckets[i] = NODE_NULL;
	}
    }
}

#undef min
#define min(a, b) (a) < (b) ? (a) : (b)

/*
 * hash_stats()
 *
 * Print statistics about the current table allocation to stdout.
 */
void
hash_stats(hash_table *table, int verbose)
{
    int i;
    int total_elements = 0;
    int non_empty_buckets = 0;
    int max_count = 0;
    int max_repeats = 0;
    int *counts;
    int size = table->size;

    if (!(counts = (int *)malloc(size * sizeof(int)))){
	fprintf(stderr, "malloc returns 0\n");
	exit(1);
    }

    for (i = 0; i < size; i++){
	int x = 0;
	hash_node *n = table->buckets[i];
	counts[i] = 0;
	while (n){
	    if (!x){
		x = 1;
		non_empty_buckets++;
		if (verbose){
		    printf("bucket %2d: ", i);
		}
	    }
	    if (verbose){
		printf(" %s", n->key);
	    }
	    counts[i]++;
	    n = n->next;
	}

	total_elements += counts[i];
	if (counts[i] > max_count){
	    max_count = counts[i];
	    max_repeats = 1;
	}
	else if (counts[i] == max_count){
	    max_repeats++;
	}

	if (counts[i] && verbose){
	    printf(" (%d)\n", counts[i]);
	}
    }

    printf("\n");
    printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");

    if (total_elements){
	printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
	       (double)100 * (double)non_empty_buckets / (double)(size));
	printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
	printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
	printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
    }
    return;
}
Commit	Line	Data
9385eb3d A	1	/*
	2	*
	3	* Copyright 1990
	4	* Terry Jones & Jordan Hubbard
	5	*
	6	* PCS Computer Systeme, GmbH.
	7	* Munich, West Germany
	8	*
	9	*
	10	* All rights reserved.
	11	*
	12	* This is unsupported software and is subject to change without notice.
	13	* the author makes no representations about the suitability of this software
	14	* for any purpose. It is supplied "as is" without express or implied
	15	* warranty.
	16	*
	17	* Permission to use, copy, modify, and distribute this software and its
	18	* documentation for any purpose and without fee is hereby granted, provided
	19	* that the above copyright notice appear in all copies and that both that
	20	* copyright notice and this permission notice appear in supporting
	21	* documentation, and that the name of the author not be used in
	22	* advertising or publicity pertaining to distribution of the software
	23	* without specific, written prior permission.
	24	*
	25	*/
	26
	27	/*
	28	* This is a fairly simple open addressing hash scheme.
	29	* Terry did all the code, I just did the spec.
	30	* Thanks again, you crazy Aussie..
	31	*
	32	*/
	33
	34	/*
	35	* $Log: strhash.c,v $
	36	* Revision 2.0 90/03/26 01:44:26 jkh
	37	* pre-beta check-in
	38	*
	39	* Revision 1.8 90/03/09 19:22:35 jkh
	40	* Fixed bogus comment.
	41	*
	42	* Revision 1.7 90/03/09 19:01:08 jkh
	43	* Added comments, GPL.
	44	*
	45	* Revision 1.6 90/03/08 17:55:58 terry
	46	* Rearranged hash_purge to be a tiny bit more efficient.
	47	* Added verbose option to hash_stats.
	48	*
	49	* Revision 1.5 90/03/08 17:19:54 terry
	50	* Added hash_purge. Added arg to hash_traverse. Changed all
	51	* void * to Generic.
	52	*
	53	* Revision 1.4 90/03/08 12:02:35 terry
	54	* Fixed problems with allocation that I screwed up last night.
	55	* Changed bucket lists to be singly linked. Thanks to JKH, my hero.
	56	*
	57	* Revision 1.3 90/03/07 21:33:33 terry
	58	* Cleaned up a few decls to keep gcc -Wall quiet.
	59	*
	60	* Revision 1.2 90/03/07 21:14:53 terry
	61	* Comments. Added HASH_STATS define. Removed hash_find()
	62	* and new_node().
	63	*
	64	* Revision 1.1 90/03/07 20:49:45 terry
65	* Initial revision
66	*
67	*
68	*/
69
70	#include <sys/cdefs.h>
71	__FBSDID("$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.10 2002/03/22 21:53:10 obrien Exp $");
72
73	#include <stdio.h>
74	#include <stdlib.h>
75	#include <string.h>
76	#include <sys/types.h>
77	#include <strhash.h>
78
79	#define HASH_NULL (hash_table *)0
80	#define NODE_NULL (hash_node *)0
81	#define GENERIC_NULL (void *)0
82
83	#define HASH_SZ 97
84
85
86	static int _hash(int size, char *key);
87	static hash_node list_find(caddr_t key, hash_node head);
ad3c9f2a	88	static int assign_key(char key, hash_node node);
9385eb3d A	89
	90
	91	/*
	92	* hash_create()
	93	*
	94	* Malloc room for a new hash table and then room for its
	95	* bucket pointers. Then set all the buckets to
	96	* point to 0. Return the address of the new table.
	97	*/
	98	hash_table *
	99	hash_create(int size)
	100	{
	101	int i;
	102	hash_table new = (hash_table )malloc(sizeof(hash_table));
	103
	104	if (!new \|\| size < 0){
	105	return HASH_NULL;
	106	}
	107
	108	if (size == 0){
	109	size = HASH_SZ;
	110	}
	111
	112	if (!(new->buckets = (hash_node *)malloc(size sizeof(hash_node *)))){
	113	return HASH_NULL;
	114	}
	115
	116	for (i = 0; i < size; i++){
	117	new->buckets[i] = NODE_NULL;
	118	}
	119	new->size = size;
	120
	121	return new;
	122	}
	123
	124
	125	/*
	126	* list_find()
	127	*
	128	* Find the key in the linked list pointed to by head.
	129	*/
	130	static hash_node *
	131	list_find(caddr_t key, hash_node *head)
	132	{
	133	while (head){
	134	if (!strcmp(head->key, key)){
	135	return head;
	136	}
	137	head = head->next;
	138	}
	139	return NODE_NULL;
	140	}
	141
	142
	143	/*
	144	* _hash()
	145	*
	146	* Compute the hash value for the given key.
	147	*/
	148	static int
	149	_hash(int size, char *key)
	150	{
	151	unsigned int h = 0x0;
	152
153	while (*key){
154	h = (h << 1) ^ (h ^ (unsigned char) *key++);
155	}
156
157	h %= size;
158	return h;
159	}
160
161	/*
162	* hash_destroy()
163	*
164	* Find the key and (if it's there) remove it entirely.
165	* The function (*nukefunc)() is in charge of disposing
166	* of the storage help by the data associated with the node.
167	*/
168	void
169	hash_destroy(hash_table table, char key, void (*nukefunc)())
170	{
171	int bucket = _hash(table->size, key);
172	hash_node *found = table->buckets[bucket];
173	hash_node *to_free = NODE_NULL;
174
175	if (!found) {
176	return;
177	}
178
179	if (!strcmp(found->key, key)) {
180	/*
181	* It was the head of the list.
182	*/
183	table->buckets[bucket] = found->next;
184	to_free = found;
185	}
186	else {
187	/*
188	* Walk the list, looking one ahead.
189	*/
190	while (found->next) {
191	if (!strcmp(found->next->key, key)) {
192	to_free = found->next;
193	found->next = found->next->next;
194	break;
195	}
196	found = found->next;
197	}
198
199	if (!to_free){
200	return;
201	}
202	}
203
204	if (nukefunc)
205	(*nukefunc)(to_free->key, to_free->data);
206	free(to_free);
207	return;
208	}
209
210
211	/*
212	* hash_search()
213	*
214	* Search the table for the given key. Then:
215	*
216	* 1) If you find it and there is no replacement function, just
217	* return what you found. (This is a simple search).
218	* 2) If you find it and there is a replacement function, run
219	* the function on the data you found, and replace the old
220	* data with whatever is passed in datum. Return 0.
221	* 3) If you don't find it and there is some datum, insert a
222	* new item into the table. Insertions go at the front of
223	* the bucket. Return 0.
224	* 4) Otherwise just return 0.
225	*
226	*/
227	void *
228	hash_search(hash_table table, caddr_t key, void datum,
229	void (*replace_func)())
230	{
231	int bucket = _hash(table->size, key);
232	hash_node *found = list_find(key, table->buckets[bucket]);
233
234	if (found){
235	if (!replace_func){
236	return found->data;
237	}
238	else{
239	(*replace_func)(found->data);
240	found->data = datum;
241	}
242	}
243	else{
244	if (datum){
245
9385eb3d A	246	hash_node new = (hash_node )malloc(sizeof(hash_node));
	247
	248	if (!new \|\| !assign_key(key, new)){
	249	return GENERIC_NULL;
	250	}
	251	new->data = datum;
	252	new->next = table->buckets[bucket];
	253	table->buckets[bucket] = new;
	254	return new;
	255	}
	256	}
	257	return GENERIC_NULL;
	258	}
	259
	260
	261	/*
	262	* assign_key()
	263	*
	264	* Set the key value of a node to be 'key'. Get some space from
	265	* malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
	266	*/
	267	static int
	268	assign_key(char key, hash_node node)
	269	{
	270	if (!node \|\| !key){
	271	return 0;
	272	}
	273
	274	if (!(node->key = (char *)malloc(strlen(key) + 1))){
	275	return 0;
	276	}
	277
	278	node->key[0] = '\0';
	279	strcat(node->key, key);
	280	return 1;
	281	}
	282
	283	/*
	284	* hash_traverse()
	285	*
	286	* Traverse the hash table and run the function func on the
	287	* data found at each node and the argument we're passed for it.
	288	*/
	289	void
	290	hash_traverse(hash_table table, int (func)(), void *arg)
	291	{
	292	int i;
	293	int size = table->size;
	294
	295	if (!func)
	296	return;
	297
	298	for (i = 0; i < size; i++) {
	299	hash_node *n = table->buckets[i];
	300	while (n) {
	301	if ((*func)(n->key, n->data, arg) == 0)
	302	return;
	303	n = n->next;
	304	}
	305	}
	306	return;
	307	}
	308
	309	/*
310	* hash_purge()
311	*
312	* Run through the entire hash table. Call purge_func
313	* on the data found at each node, and then free the node.
314	* Set all the bucket pointers to 0.
315	*/
316	void
317	hash_purge(hash_table table, void (purge_func)(char p1, void p2))
318	{
319	int i;
320	int size = table->size;
321
322	for (i = 0; i < size; i++) {
323	hash_node *n = table->buckets[i];
324	if (n) {
325	do {
326	hash_node *to_free = n;
327	if (purge_func) {
328	(*purge_func)(n->key, n->data);
329	}
330	n = n->next;
331	free(to_free);
332	} while (n);
333	table->buckets[i] = NODE_NULL;
334	}
335	}
336	}
337
338	#undef min
339	#define min(a, b) (a) < (b) ? (a) : (b)
340
341	/*
342	* hash_stats()
343	*
344	* Print statistics about the current table allocation to stdout.
345	*/
346	void
347	hash_stats(hash_table *table, int verbose)
348	{
349	int i;
350	int total_elements = 0;
351	int non_empty_buckets = 0;
352	int max_count = 0;
353	int max_repeats = 0;
354	int *counts;
355	int size = table->size;
356
357	if (!(counts = (int )malloc(size sizeof(int)))){
358	fprintf(stderr, "malloc returns 0\n");
359	exit(1);
360	}
361
362	for (i = 0; i < size; i++){
363	int x = 0;
364	hash_node *n = table->buckets[i];
365	counts[i] = 0;
366	while (n){
367	if (!x){
368	x = 1;
369	non_empty_buckets++;
370	if (verbose){
371	printf("bucket %2d: ", i);
372	}
373	}
374	if (verbose){
375	printf(" %s", n->key);
376	}
377	counts[i]++;
378	n = n->next;
379	}
380
381	total_elements += counts[i];
382	if (counts[i] > max_count){
383	max_count = counts[i];
384	max_repeats = 1;
385	}
386	else if (counts[i] == max_count){
387	max_repeats++;
388	}
389
390	if (counts[i] && verbose){
391	printf(" (%d)\n", counts[i]);
392	}
393	}
394
395	printf("\n");
396	printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");
397
398	if (total_elements){
399	printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
400	(double)100 * (double)non_empty_buckets / (double)(size));
401	printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
402	printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
403	printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
404	}
405	return;
406	}