[apple/libc.git] / stdlib / FreeBSD / strhash.c

/*
 *
 *                      Copyright 1990
 *               Terry Jones & Jordan Hubbard
 *
 *		  PCS Computer Systeme, GmbH.
 *	             Munich, West Germany
 *
 *
 *  All rights reserved.
 *
 *  This is unsupported software and is subject to change without notice.
 *  the author makes no representations about the suitability of this software
 *  for any purpose. It is supplied "as is" without express or implied
 *  warranty.
 *
 *  Permission to use, copy, modify, and distribute this software and its
 *  documentation for any purpose and without fee is hereby granted, provided
 *  that the above copyright notice appear in all copies and that both that
 *  copyright notice and this permission notice appear in supporting
 *  documentation, and that the name of the author not be used in
 *  advertising or publicity pertaining to distribution of the software
 *  without specific, written prior permission.
 *
 */

/*
 * This is a fairly simple open addressing hash scheme.
 * Terry did all the code, I just did the spec.
 * Thanks again, you crazy Aussie..
 *
 */

/*
 * $Log: strhash.c,v $
 * Revision 2.0  90/03/26  01:44:26  jkh
 * pre-beta check-in
 *
 * Revision 1.8  90/03/09  19:22:35  jkh
 * Fixed bogus comment.
 *
 * Revision 1.7  90/03/09  19:01:08  jkh
 * Added comments, GPL.
 *
 * Revision 1.6  90/03/08  17:55:58  terry
 * Rearranged hash_purge to be a tiny bit more efficient.
 * Added verbose option to hash_stats.
 *
 * Revision 1.5  90/03/08  17:19:54  terry
 * Added hash_purge. Added arg to hash_traverse. Changed all
 * void * to Generic.
 *
 * Revision 1.4  90/03/08  12:02:35  terry
 * Fixed problems with allocation that I screwed up last night.
 * Changed bucket lists to be singly linked. Thanks to JKH, my hero.
 *
 * Revision 1.3  90/03/07  21:33:33  terry
 * Cleaned up a few decls to keep gcc -Wall quiet.
 *
 * Revision 1.2  90/03/07  21:14:53  terry
 * Comments. Added HASH_STATS define. Removed hash_find()
 * and new_node().
 *
 * Revision 1.1  90/03/07  20:49:45  terry
 * Initial revision
 *
 *
 */

#include <sys/cdefs.h>
__FBSDID("$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.10 2002/03/22 21:53:10 obrien Exp $");

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <strhash.h>

#define HASH_NULL    (hash_table *)0
#define NODE_NULL    (hash_node *)0
#define GENERIC_NULL (void *)0

#define HASH_SZ 97


static int _hash(int size, char *key);
static hash_node *list_find(caddr_t key, hash_node *head);


/*
 * hash_create()
 *
 * Malloc room for a new hash table and then room for its
 * bucket pointers. Then set all the buckets to
 * point to 0. Return the address of the new table.
 */
hash_table *
hash_create(int size)
{
    int i;
    hash_table *new = (hash_table *)malloc(sizeof(hash_table));

    if (!new || size < 0){
	return HASH_NULL;
    }

    if (size == 0){
	size = HASH_SZ;
    }

    if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){
	return HASH_NULL;
    }

    for (i = 0; i < size; i++){
	new->buckets[i] = NODE_NULL;
    }
    new->size = size;

    return new;
}


/*
 * list_find()
 *
 * Find the key in the linked list pointed to by head.
 */
static hash_node *
list_find(caddr_t key, hash_node *head)
{
    while (head){
	if (!strcmp(head->key, key)){
	    return head;
	}
	head = head->next;
    }
    return NODE_NULL;
}


/*
 * _hash()
 *
 * Compute the hash value for the given key.
 */
static int
_hash(int size, char *key)
{
    unsigned int h = 0x0;

    while (*key){
	h = (h << 1) ^ (h ^ (unsigned char) *key++);
    }

    h %= size;
    return h;
}

/*
 * hash_destroy()
 *
 * Find the key and (if it's there) remove it entirely.
 * The function (*nukefunc)() is in charge of disposing
 * of the storage help by the data associated with the node.
 */
void
hash_destroy(hash_table *table, char *key, void (*nukefunc)())
{
    int bucket = _hash(table->size, key);
    hash_node *found = table->buckets[bucket];
    hash_node *to_free = NODE_NULL;

    if (!found) {
	return;
    }

    if (!strcmp(found->key, key)) {
	/*
	 * It was the head of the list.
	 */
	table->buckets[bucket] = found->next;
	to_free = found;
    }
    else {
	/*
	 * Walk the list, looking one ahead.
	 */
	while (found->next) {
	    if (!strcmp(found->next->key, key)) {
		to_free = found->next;
		found->next = found->next->next;
		break;
	    }
	    found = found->next;
	}

	if (!to_free){
	    return;
	}
    }

    if (nukefunc)
	(*nukefunc)(to_free->key, to_free->data);
    free(to_free);
    return;
}


/*
 * hash_search()
 *
 * Search the table for the given key. Then:
 *
 * 1) If you find it and there is no replacement function, just
 *    return what you found. (This is a simple search).
 * 2) If you find it and there is a replacement function, run
 *    the function on the data you found, and replace the old
 *    data with whatever is passed in datum. Return 0.
 * 3) If you don't find it and there is some datum, insert a
 *    new item into the table. Insertions go at the front of
 *    the bucket. Return 0.
 * 4) Otherwise just return 0.
 *
 */
void *
hash_search(hash_table *table, caddr_t key, void *datum,
	    void (*replace_func)())
{
    int bucket = _hash(table->size, key);
    hash_node *found = list_find(key, table->buckets[bucket]);

    if (found){
	if (!replace_func){
	    return found->data;
	}
	else{
	    (*replace_func)(found->data);
	    found->data = datum;
	}
    }
    else{
	if (datum){

	    static int assign_key();

	    hash_node *new = (hash_node *)malloc(sizeof(hash_node));

	    if (!new || !assign_key(key, new)){
		return GENERIC_NULL;
	    }
	    new->data = datum;
	    new->next = table->buckets[bucket];
	    table->buckets[bucket] = new;
	    return new;
	}
    }
    return GENERIC_NULL;
}


/*
 * assign_key()
 *
 * Set the key value of a node to be 'key'. Get some space from
 * malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
 */
static int
assign_key(char *key, hash_node *node)
{
    if (!node || !key){
	return 0;
    }

    if (!(node->key = (char *)malloc(strlen(key) + 1))){
	return 0;
    }

    node->key[0] = '\0';
    strcat(node->key, key);
    return 1;
}

/*
 * hash_traverse()
 *
 * Traverse the hash table and run the function func on the
 * data found at each node and the argument we're passed for it.
 */
void
hash_traverse(hash_table *table, int (*func)(), void *arg)
{
    int i;
    int size = table->size;

    if (!func)
	return;

    for (i = 0; i < size; i++) {
	hash_node *n = table->buckets[i];
	while (n) {
	    if ((*func)(n->key, n->data, arg) == 0)
		return;
	    n = n->next;
	}
    }
    return;
}

/*
 * hash_purge()
 *
 * Run through the entire hash table. Call purge_func
 * on the data found at each node, and then free the node.
 * Set all the bucket pointers to 0.
 */
void
hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2))
{
    int i;
    int size = table->size;

    for (i = 0; i < size; i++) {
	hash_node *n = table->buckets[i];
	if (n) {
	    do {
		hash_node *to_free = n;
		if (purge_func) {
		    (*purge_func)(n->key, n->data);
		}
		n = n->next;
		free(to_free);
	    } while (n);
	    table->buckets[i] = NODE_NULL;
	}
    }
}

#undef min
#define min(a, b) (a) < (b) ? (a) : (b)

/*
 * hash_stats()
 *
 * Print statistics about the current table allocation to stdout.
 */
void
hash_stats(hash_table *table, int verbose)
{
    int i;
    int total_elements = 0;
    int non_empty_buckets = 0;
    int max_count = 0;
    int max_repeats = 0;
    int *counts;
    int size = table->size;

    if (!(counts = (int *)malloc(size * sizeof(int)))){
	fprintf(stderr, "malloc returns 0\n");
	exit(1);
    }

    for (i = 0; i < size; i++){
	int x = 0;
	hash_node *n = table->buckets[i];
	counts[i] = 0;
	while (n){
	    if (!x){
		x = 1;
		non_empty_buckets++;
		if (verbose){
		    printf("bucket %2d: ", i);
		}
	    }
	    if (verbose){
		printf(" %s", n->key);
	    }
	    counts[i]++;
	    n = n->next;
	}

	total_elements += counts[i];
	if (counts[i] > max_count){
	    max_count = counts[i];
	    max_repeats = 1;
	}
	else if (counts[i] == max_count){
	    max_repeats++;
	}

	if (counts[i] && verbose){
	    printf(" (%d)\n", counts[i]);
	}
    }

    printf("\n");
    printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");

    if (total_elements){
	printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
	       (double)100 * (double)non_empty_buckets / (double)(size));
	printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
	printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
	printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
    }
    return;
}
Commit	Line	Data
9385eb3d A	1	/*
	2	*
	3	* Copyright 1990
	4	* Terry Jones & Jordan Hubbard
	5	*
	6	* PCS Computer Systeme, GmbH.
	7	* Munich, West Germany
	8	*
	9	*
	10	* All rights reserved.
	11	*
	12	* This is unsupported software and is subject to change without notice.
	13	* the author makes no representations about the suitability of this software
	14	* for any purpose. It is supplied "as is" without express or implied
	15	* warranty.
	16	*
	17	* Permission to use, copy, modify, and distribute this software and its
	18	* documentation for any purpose and without fee is hereby granted, provided
	19	* that the above copyright notice appear in all copies and that both that
	20	* copyright notice and this permission notice appear in supporting
	21	* documentation, and that the name of the author not be used in
	22	* advertising or publicity pertaining to distribution of the software
	23	* without specific, written prior permission.
	24	*
	25	*/
	26
	27	/*
	28	* This is a fairly simple open addressing hash scheme.
	29	* Terry did all the code, I just did the spec.
	30	* Thanks again, you crazy Aussie..
	31	*
	32	*/
	33
	34	/*
	35	* $Log: strhash.c,v $
	36	* Revision 2.0 90/03/26 01:44:26 jkh
	37	* pre-beta check-in
	38	*
	39	* Revision 1.8 90/03/09 19:22:35 jkh
	40	* Fixed bogus comment.
	41	*
	42	* Revision 1.7 90/03/09 19:01:08 jkh
	43	* Added comments, GPL.
	44	*
	45	* Revision 1.6 90/03/08 17:55:58 terry
	46	* Rearranged hash_purge to be a tiny bit more efficient.
	47	* Added verbose option to hash_stats.
	48	*
	49	* Revision 1.5 90/03/08 17:19:54 terry
	50	* Added hash_purge. Added arg to hash_traverse. Changed all
	51	* void * to Generic.
	52	*
	53	* Revision 1.4 90/03/08 12:02:35 terry
	54	* Fixed problems with allocation that I screwed up last night.
	55	* Changed bucket lists to be singly linked. Thanks to JKH, my hero.
	56	*
	57	* Revision 1.3 90/03/07 21:33:33 terry
	58	* Cleaned up a few decls to keep gcc -Wall quiet.
	59	*
	60	* Revision 1.2 90/03/07 21:14:53 terry
	61	* Comments. Added HASH_STATS define. Removed hash_find()
	62	* and new_node().
	63	*
	64	* Revision 1.1 90/03/07 20:49:45 terry
65	* Initial revision
66	*
67	*
68	*/
69
70	#include <sys/cdefs.h>
71	__FBSDID("$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.10 2002/03/22 21:53:10 obrien Exp $");
72
73	#include <stdio.h>
74	#include <stdlib.h>
75	#include <string.h>
76	#include <sys/types.h>
77	#include <strhash.h>
78
79	#define HASH_NULL (hash_table *)0
80	#define NODE_NULL (hash_node *)0
81	#define GENERIC_NULL (void *)0
82
83	#define HASH_SZ 97
84
85
86	static int _hash(int size, char *key);
87	static hash_node list_find(caddr_t key, hash_node head);
88
89
90	/*
91	* hash_create()
92	*
93	* Malloc room for a new hash table and then room for its
94	* bucket pointers. Then set all the buckets to
95	* point to 0. Return the address of the new table.
96	*/
97	hash_table *
98	hash_create(int size)
99	{
100	int i;
101	hash_table new = (hash_table )malloc(sizeof(hash_table));
102
103	if (!new \|\| size < 0){
104	return HASH_NULL;
105	}
106
107	if (size == 0){
108	size = HASH_SZ;
109	}
110
111	if (!(new->buckets = (hash_node *)malloc(size sizeof(hash_node *)))){
112	return HASH_NULL;
113	}
114
115	for (i = 0; i < size; i++){
116	new->buckets[i] = NODE_NULL;
117	}
118	new->size = size;
119
120	return new;
121	}
122
123
124	/*
125	* list_find()
126	*
127	* Find the key in the linked list pointed to by head.
128	*/
129	static hash_node *
130	list_find(caddr_t key, hash_node *head)
131	{
132	while (head){
133	if (!strcmp(head->key, key)){
134	return head;
135	}
136	head = head->next;
137	}
138	return NODE_NULL;
139	}
140
141
142	/*
143	* _hash()
144	*
145	* Compute the hash value for the given key.
146	*/
147	static int
148	_hash(int size, char *key)
149	{
150	unsigned int h = 0x0;
151
152	while (*key){
153	h = (h << 1) ^ (h ^ (unsigned char) *key++);
154	}
155
156	h %= size;
157	return h;
158	}
159
160	/*
161	* hash_destroy()
162	*
163	* Find the key and (if it's there) remove it entirely.
164	* The function (*nukefunc)() is in charge of disposing
165	* of the storage help by the data associated with the node.
166	*/
167	void
168	hash_destroy(hash_table table, char key, void (*nukefunc)())
169	{
170	int bucket = _hash(table->size, key);
171	hash_node *found = table->buckets[bucket];
172	hash_node *to_free = NODE_NULL;
173
174	if (!found) {
175	return;
176	}
177
178	if (!strcmp(found->key, key)) {
179	/*
180	* It was the head of the list.
181	*/
182	table->buckets[bucket] = found->next;
183	to_free = found;
184	}
185	else {
186	/*
187	* Walk the list, looking one ahead.
188	*/
189	while (found->next) {
190	if (!strcmp(found->next->key, key)) {
191	to_free = found->next;
192	found->next = found->next->next;
193	break;
194	}
195	found = found->next;
196	}
197
198	if (!to_free){
199	return;
200	}
201	}
202
203	if (nukefunc)
204	(*nukefunc)(to_free->key, to_free->data);
205	free(to_free);
206	return;
207	}
208
209
210	/*
211	* hash_search()
212	*
213	* Search the table for the given key. Then:
214	*
215	* 1) If you find it and there is no replacement function, just
216	* return what you found. (This is a simple search).
217	* 2) If you find it and there is a replacement function, run
218	* the function on the data you found, and replace the old
219	* data with whatever is passed in datum. Return 0.
220	* 3) If you don't find it and there is some datum, insert a
221	* new item into the table. Insertions go at the front of
222	* the bucket. Return 0.
223	* 4) Otherwise just return 0.
224	*
225	*/
226	void *
227	hash_search(hash_table table, caddr_t key, void datum,
228	void (*replace_func)())
229	{
230	int bucket = _hash(table->size, key);
231	hash_node *found = list_find(key, table->buckets[bucket]);
232
233	if (found){
234	if (!replace_func){
235	return found->data;
236	}
237	else{
238	(*replace_func)(found->data);
239	found->data = datum;
240	}
241	}
242	else{
243	if (datum){
244
245	static int assign_key();
246
247	hash_node new = (hash_node )malloc(sizeof(hash_node));
248
249	if (!new \|\| !assign_key(key, new)){
250	return GENERIC_NULL;
251	}
252	new->data = datum;
253	new->next = table->buckets[bucket];
254	table->buckets[bucket] = new;
255	return new;
256	}
257	}
258	return GENERIC_NULL;
259	}
260
261
262	/*
263	* assign_key()
264	*
265	* Set the key value of a node to be 'key'. Get some space from
266	* malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
267	*/
268	static int
269	assign_key(char key, hash_node node)
270	{
271	if (!node \|\| !key){
272	return 0;
273	}
274
275	if (!(node->key = (char *)malloc(strlen(key) + 1))){
276	return 0;
277	}
278
279	node->key[0] = '\0';
280	strcat(node->key, key);
281	return 1;
282	}
283
284	/*
285	* hash_traverse()
286	*
287	* Traverse the hash table and run the function func on the
288	* data found at each node and the argument we're passed for it.
289	*/
290	void
291	hash_traverse(hash_table table, int (func)(), void *arg)
292	{
293	int i;
294	int size = table->size;
295
296	if (!func)
297	return;
298
299	for (i = 0; i < size; i++) {
300	hash_node *n = table->buckets[i];
301	while (n) {
302	if ((*func)(n->key, n->data, arg) == 0)
303	return;
304	n = n->next;
305	}
306	}
307	return;
308	}
309
310	/*
311	* hash_purge()
312	*
313	* Run through the entire hash table. Call purge_func
314	* on the data found at each node, and then free the node.
315	* Set all the bucket pointers to 0.
316	*/
317	void
318	hash_purge(hash_table table, void (purge_func)(char p1, void p2))
319	{
320	int i;
321	int size = table->size;
322
323	for (i = 0; i < size; i++) {
324	hash_node *n = table->buckets[i];
325	if (n) {
326	do {
327	hash_node *to_free = n;
328	if (purge_func) {
329	(*purge_func)(n->key, n->data);
330	}
331	n = n->next;
332	free(to_free);
333	} while (n);
334	table->buckets[i] = NODE_NULL;
335	}
336	}
337	}
338
339	#undef min
340	#define min(a, b) (a) < (b) ? (a) : (b)
341
342	/*
343	* hash_stats()
344	*
345	* Print statistics about the current table allocation to stdout.
346	*/
347	void
348	hash_stats(hash_table *table, int verbose)
349	{
350	int i;
351	int total_elements = 0;
352	int non_empty_buckets = 0;
353	int max_count = 0;
354	int max_repeats = 0;
355	int *counts;
356	int size = table->size;
357
358	if (!(counts = (int )malloc(size sizeof(int)))){
359	fprintf(stderr, "malloc returns 0\n");
360	exit(1);
361	}
362
363	for (i = 0; i < size; i++){
364	int x = 0;
365	hash_node *n = table->buckets[i];
366	counts[i] = 0;
367	while (n){
368	if (!x){
369	x = 1;
370	non_empty_buckets++;
371	if (verbose){
372	printf("bucket %2d: ", i);
373	}
374	}
375	if (verbose){
376	printf(" %s", n->key);
377	}
378	counts[i]++;
379	n = n->next;
380	}
381
382	total_elements += counts[i];
383	if (counts[i] > max_count){
384	max_count = counts[i];
385	max_repeats = 1;
386	}
387	else if (counts[i] == max_count){
388	max_repeats++;
389	}
390
391	if (counts[i] && verbose){
392	printf(" (%d)\n", counts[i]);
393	}
394	}
395
396	printf("\n");
397	printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");
398
399	if (total_elements){
400	printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
401	(double)100 * (double)non_empty_buckets / (double)(size));
402	printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
403	printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
404	printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
405	}
406	return;
407	}