[apple/libc.git] / stdlib / FreeBSD / strhash.c

/*
 *
 *                      Copyright 1990
 *               Terry Jones & Jordan Hubbard
 *
 *		  PCS Computer Systeme, GmbH.
 *	             Munich, West Germany
 *
 *
 *  All rights reserved.
 *
 *  This is unsupported software and is subject to change without notice.
 *  the author makes no representations about the suitability of this software
 *  for any purpose. It is supplied "as is" without express or implied
 *  warranty.
 *
 *  Permission to use, copy, modify, and distribute this software and its
 *  documentation for any purpose and without fee is hereby granted, provided
 *  that the above copyright notice appear in all copies and that both that
 *  copyright notice and this permission notice appear in supporting
 *  documentation, and that the name of the author not be used in
 *  advertising or publicity pertaining to distribution of the software
 *  without specific, written prior permission.
 *
 */

/*
 * This is a fairly simple open addressing hash scheme.
 * Terry did all the code, I just did the spec.
 * Thanks again, you crazy Aussie..
 *
 */

/*
 * $Log: strhash.c,v $
 * Revision 2.0  90/03/26  01:44:26  jkh
 * pre-beta check-in
 *
 * Revision 1.8  90/03/09  19:22:35  jkh
 * Fixed bogus comment.
 *
 * Revision 1.7  90/03/09  19:01:08  jkh
 * Added comments, GPL.
 *
 * Revision 1.6  90/03/08  17:55:58  terry
 * Rearranged hash_purge to be a tiny bit more efficient.
 * Added verbose option to hash_stats.
 *
 * Revision 1.5  90/03/08  17:19:54  terry
 * Added hash_purge. Added arg to hash_traverse. Changed all
 * void * to Generic.
 *
 * Revision 1.4  90/03/08  12:02:35  terry
 * Fixed problems with allocation that I screwed up last night.
 * Changed bucket lists to be singly linked. Thanks to JKH, my hero.
 *
 * Revision 1.3  90/03/07  21:33:33  terry
 * Cleaned up a few decls to keep gcc -Wall quiet.
 *
 * Revision 1.2  90/03/07  21:14:53  terry
 * Comments. Added HASH_STATS define. Removed hash_find()
 * and new_node().
 *
 * Revision 1.1  90/03/07  20:49:45  terry
 * Initial revision
 *
 *
 */

#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wstrict-prototypes"

#include <sys/cdefs.h>
__FBSDID("$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.10 2002/03/22 21:53:10 obrien Exp $");

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <strhash.h>

#define HASH_NULL    (hash_table *)0
#define NODE_NULL    (hash_node *)0
#define GENERIC_NULL (void *)0

#define HASH_SZ 97


static int _hash(int size, char *key);
static hash_node *list_find(caddr_t key, hash_node *head);
static int assign_key(char *key, hash_node *node);


/*
 * hash_create()
 *
 * Malloc room for a new hash table and then room for its
 * bucket pointers. Then set all the buckets to
 * point to 0. Return the address of the new table.
 */
hash_table *
hash_create(int size)
{
    int i;
    hash_table *new = (hash_table *)malloc(sizeof(hash_table));

    if (!new || size < 0){
	return HASH_NULL;
    }

    if (size == 0){
	size = HASH_SZ;
    }

    if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){
	return HASH_NULL;
    }

    for (i = 0; i < size; i++){
	new->buckets[i] = NODE_NULL;
    }
    new->size = size;

    return new;
}


/*
 * list_find()
 *
 * Find the key in the linked list pointed to by head.
 */
static hash_node *
list_find(caddr_t key, hash_node *head)
{
    while (head){
	if (!strcmp(head->key, key)){
	    return head;
	}
	head = head->next;
    }
    return NODE_NULL;
}


/*
 * _hash()
 *
 * Compute the hash value for the given key.
 */
static int
_hash(int size, char *key)
{
    unsigned int h = 0x0;

    while (*key){
	h = (h << 1) ^ (h ^ (unsigned char) *key++);
    }

    h %= size;
    return h;
}

/*
 * hash_destroy()
 *
 * Find the key and (if it's there) remove it entirely.
 * The function (*nukefunc)() is in charge of disposing
 * of the storage help by the data associated with the node.
 */
void
hash_destroy(hash_table *table, char *key, void (*nukefunc)())
{
    int bucket = _hash(table->size, key);
    hash_node *found = table->buckets[bucket];
    hash_node *to_free = NODE_NULL;

    if (!found) {
	return;
    }

    if (!strcmp(found->key, key)) {
	/*
	 * It was the head of the list.
	 */
	table->buckets[bucket] = found->next;
	to_free = found;
    }
    else {
	/*
	 * Walk the list, looking one ahead.
	 */
	while (found->next) {
	    if (!strcmp(found->next->key, key)) {
		to_free = found->next;
		found->next = found->next->next;
		break;
	    }
	    found = found->next;
	}

	if (!to_free){
	    return;
	}
    }

    if (nukefunc)
	(*nukefunc)(to_free->key, to_free->data);
    free(to_free);
    return;
}


/*
 * hash_search()
 *
 * Search the table for the given key. Then:
 *
 * 1) If you find it and there is no replacement function, just
 *    return what you found. (This is a simple search).
 * 2) If you find it and there is a replacement function, run
 *    the function on the data you found, and replace the old
 *    data with whatever is passed in datum. Return 0.
 * 3) If you don't find it and there is some datum, insert a
 *    new item into the table. Insertions go at the front of
 *    the bucket. Return 0.
 * 4) Otherwise just return 0.
 *
 */
void *
hash_search(hash_table *table, caddr_t key, void *datum,
	    void (*replace_func)())
{
    int bucket = _hash(table->size, key);
    hash_node *found = list_find(key, table->buckets[bucket]);

    if (found){
	if (!replace_func){
	    return found->data;
	}
	else{
	    (*replace_func)(found->data);
	    found->data = datum;
	}
    }
    else{
	if (datum){

	    hash_node *new = (hash_node *)malloc(sizeof(hash_node));

	    if (!new || !assign_key(key, new)){
		return GENERIC_NULL;
	    }
	    new->data = datum;
	    new->next = table->buckets[bucket];
	    table->buckets[bucket] = new;
	    return new;
	}
    }
    return GENERIC_NULL;
}


/*
 * assign_key()
 *
 * Set the key value of a node to be 'key'. Get some space from
 * malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
 */
static int
assign_key(char *key, hash_node *node)
{
    if (!node || !key){
	return 0;
    }

    if (!(node->key = (char *)malloc(strlen(key) + 1))){
	return 0;
    }

    node->key[0] = '\0';
    strcat(node->key, key);
    return 1;
}

/*
 * hash_traverse()
 *
 * Traverse the hash table and run the function func on the
 * data found at each node and the argument we're passed for it.
 */
void
hash_traverse(hash_table *table, int (*func)(), void *arg)
{
    int i;
    int size = table->size;

    if (!func)
	return;

    for (i = 0; i < size; i++) {
	hash_node *n = table->buckets[i];
	while (n) {
	    if ((*func)(n->key, n->data, arg) == 0)
		return;
	    n = n->next;
	}
    }
    return;
}

/*
 * hash_purge()
 *
 * Run through the entire hash table. Call purge_func
 * on the data found at each node, and then free the node.
 * Set all the bucket pointers to 0.
 */
void
hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2))
{
    int i;
    int size = table->size;

    for (i = 0; i < size; i++) {
	hash_node *n = table->buckets[i];
	if (n) {
	    do {
		hash_node *to_free = n;
		if (purge_func) {
		    (*purge_func)(n->key, n->data);
		}
		n = n->next;
		free(to_free);
	    } while (n);
	    table->buckets[i] = NODE_NULL;
	}
    }
}

#undef min
#define min(a, b) (a) < (b) ? (a) : (b)

/*
 * hash_stats()
 *
 * Print statistics about the current table allocation to stdout.
 */
void
hash_stats(hash_table *table, int verbose)
{
    int i;
    int total_elements = 0;
    int non_empty_buckets = 0;
    int max_count = 0;
    int max_repeats = 0;
    int *counts;
    int size = table->size;

    if (!(counts = (int *)malloc(size * sizeof(int)))){
	fprintf(stderr, "malloc returns 0\n");
	exit(1);
    }

    for (i = 0; i < size; i++){
	int x = 0;
	hash_node *n = table->buckets[i];
	counts[i] = 0;
	while (n){
	    if (!x){
		x = 1;
		non_empty_buckets++;
		if (verbose){
		    printf("bucket %2d: ", i);
		}
	    }
	    if (verbose){
		printf(" %s", n->key);
	    }
	    counts[i]++;
	    n = n->next;
	}

	total_elements += counts[i];
	if (counts[i] > max_count){
	    max_count = counts[i];
	    max_repeats = 1;
	}
	else if (counts[i] == max_count){
	    max_repeats++;
	}

	if (counts[i] && verbose){
	    printf(" (%d)\n", counts[i]);
	}
    }

    printf("\n");
    printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");

    if (total_elements){
	printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
	       (double)100 * (double)non_empty_buckets / (double)(size));
	printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
	printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
	printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
    }
    return;
}
#pragma clang diagnostic pop
Commit	Line	Data
9385eb3d A	1	/*
	2	*
	3	* Copyright 1990
	4	* Terry Jones & Jordan Hubbard
	5	*
	6	* PCS Computer Systeme, GmbH.
	7	* Munich, West Germany
	8	*
	9	*
	10	* All rights reserved.
	11	*
	12	* This is unsupported software and is subject to change without notice.
	13	* the author makes no representations about the suitability of this software
	14	* for any purpose. It is supplied "as is" without express or implied
	15	* warranty.
	16	*
	17	* Permission to use, copy, modify, and distribute this software and its
	18	* documentation for any purpose and without fee is hereby granted, provided
	19	* that the above copyright notice appear in all copies and that both that
	20	* copyright notice and this permission notice appear in supporting
	21	* documentation, and that the name of the author not be used in
	22	* advertising or publicity pertaining to distribution of the software
	23	* without specific, written prior permission.
	24	*
	25	*/
	26
	27	/*
	28	* This is a fairly simple open addressing hash scheme.
	29	* Terry did all the code, I just did the spec.
	30	* Thanks again, you crazy Aussie..
	31	*
	32	*/
	33
	34	/*
	35	* $Log: strhash.c,v $
	36	* Revision 2.0 90/03/26 01:44:26 jkh
	37	* pre-beta check-in
	38	*
	39	* Revision 1.8 90/03/09 19:22:35 jkh
	40	* Fixed bogus comment.
	41	*
	42	* Revision 1.7 90/03/09 19:01:08 jkh
	43	* Added comments, GPL.
	44	*
	45	* Revision 1.6 90/03/08 17:55:58 terry
	46	* Rearranged hash_purge to be a tiny bit more efficient.
	47	* Added verbose option to hash_stats.
	48	*
	49	* Revision 1.5 90/03/08 17:19:54 terry
	50	* Added hash_purge. Added arg to hash_traverse. Changed all
	51	* void * to Generic.
	52	*
	53	* Revision 1.4 90/03/08 12:02:35 terry
	54	* Fixed problems with allocation that I screwed up last night.
	55	* Changed bucket lists to be singly linked. Thanks to JKH, my hero.
	56	*
	57	* Revision 1.3 90/03/07 21:33:33 terry
	58	* Cleaned up a few decls to keep gcc -Wall quiet.
	59	*
	60	* Revision 1.2 90/03/07 21:14:53 terry
	61	* Comments. Added HASH_STATS define. Removed hash_find()
	62	* and new_node().
	63	*
	64	* Revision 1.1 90/03/07 20:49:45 terry
65	* Initial revision
66	*
67	*
68	*/
69
70ad1dc8 A	70	#pragma clang diagnostic push
	71	#pragma clang diagnostic ignored "-Wstrict-prototypes"
	72
9385eb3d A	73	#include <sys/cdefs.h>
	74	__FBSDID("$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.10 2002/03/22 21:53:10 obrien Exp $");
	75
	76	#include <stdio.h>
	77	#include <stdlib.h>
	78	#include <string.h>
	79	#include <sys/types.h>
	80	#include <strhash.h>
	81
	82	#define HASH_NULL (hash_table *)0
	83	#define NODE_NULL (hash_node *)0
	84	#define GENERIC_NULL (void *)0
	85
	86	#define HASH_SZ 97
	87
	88
	89	static int _hash(int size, char *key);
	90	static hash_node list_find(caddr_t key, hash_node head);
ad3c9f2a	91	static int assign_key(char key, hash_node node);
9385eb3d A	92
	93
	94	/*
	95	* hash_create()
	96	*
	97	* Malloc room for a new hash table and then room for its
	98	* bucket pointers. Then set all the buckets to
	99	* point to 0. Return the address of the new table.
	100	*/
	101	hash_table *
	102	hash_create(int size)
	103	{
	104	int i;
	105	hash_table new = (hash_table )malloc(sizeof(hash_table));
	106
	107	if (!new \|\| size < 0){
	108	return HASH_NULL;
	109	}
	110
	111	if (size == 0){
	112	size = HASH_SZ;
	113	}
	114
	115	if (!(new->buckets = (hash_node *)malloc(size sizeof(hash_node *)))){
	116	return HASH_NULL;
	117	}
	118
	119	for (i = 0; i < size; i++){
	120	new->buckets[i] = NODE_NULL;
	121	}
	122	new->size = size;
	123
	124	return new;
	125	}
	126
	127
	128	/*
	129	* list_find()
	130	*
	131	* Find the key in the linked list pointed to by head.
	132	*/
	133	static hash_node *
	134	list_find(caddr_t key, hash_node *head)
	135	{
	136	while (head){
	137	if (!strcmp(head->key, key)){
	138	return head;
	139	}
	140	head = head->next;
	141	}
	142	return NODE_NULL;
	143	}
	144
	145
	146	/*
	147	* _hash()
	148	*
	149	* Compute the hash value for the given key.
	150	*/
	151	static int
	152	_hash(int size, char *key)
	153	{
	154	unsigned int h = 0x0;
	155
156	while (*key){
157	h = (h << 1) ^ (h ^ (unsigned char) *key++);
158	}
159
160	h %= size;
161	return h;
162	}
163
164	/*
165	* hash_destroy()
166	*
167	* Find the key and (if it's there) remove it entirely.
168	* The function (*nukefunc)() is in charge of disposing
169	* of the storage help by the data associated with the node.
170	*/
171	void
172	hash_destroy(hash_table table, char key, void (*nukefunc)())
173	{
174	int bucket = _hash(table->size, key);
175	hash_node *found = table->buckets[bucket];
176	hash_node *to_free = NODE_NULL;
177
178	if (!found) {
179	return;
180	}
181
182	if (!strcmp(found->key, key)) {
183	/*
184	* It was the head of the list.
185	*/
186	table->buckets[bucket] = found->next;
187	to_free = found;
188	}
189	else {
190	/*
191	* Walk the list, looking one ahead.
192	*/
193	while (found->next) {
194	if (!strcmp(found->next->key, key)) {
195	to_free = found->next;
196	found->next = found->next->next;
197	break;
198	}
199	found = found->next;
200	}
201
202	if (!to_free){
203	return;
204	}
205	}
206
207	if (nukefunc)
208	(*nukefunc)(to_free->key, to_free->data);
209	free(to_free);
210	return;
211	}
212
213
214	/*
215	* hash_search()
216	*
217	* Search the table for the given key. Then:
218	*
219	* 1) If you find it and there is no replacement function, just
220	* return what you found. (This is a simple search).
221	* 2) If you find it and there is a replacement function, run
222	* the function on the data you found, and replace the old
223	* data with whatever is passed in datum. Return 0.
224	* 3) If you don't find it and there is some datum, insert a
225	* new item into the table. Insertions go at the front of
226	* the bucket. Return 0.
227	* 4) Otherwise just return 0.
228	*
229	*/
230	void *
231	hash_search(hash_table table, caddr_t key, void datum,
232	void (*replace_func)())
233	{
234	int bucket = _hash(table->size, key);
235	hash_node *found = list_find(key, table->buckets[bucket]);
236
237	if (found){
238	if (!replace_func){
239	return found->data;
240	}
241	else{
242	(*replace_func)(found->data);
243	found->data = datum;
244	}
245	}
246	else{
247	if (datum){
248
9385eb3d A	249	hash_node new = (hash_node )malloc(sizeof(hash_node));
	250
	251	if (!new \|\| !assign_key(key, new)){
	252	return GENERIC_NULL;
	253	}
	254	new->data = datum;
	255	new->next = table->buckets[bucket];
	256	table->buckets[bucket] = new;
	257	return new;
	258	}
	259	}
	260	return GENERIC_NULL;
	261	}
	262
	263
	264	/*
	265	* assign_key()
	266	*
	267	* Set the key value of a node to be 'key'. Get some space from
	268	* malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
	269	*/
	270	static int
	271	assign_key(char key, hash_node node)
	272	{
	273	if (!node \|\| !key){
	274	return 0;
	275	}
	276
	277	if (!(node->key = (char *)malloc(strlen(key) + 1))){
	278	return 0;
	279	}
	280
	281	node->key[0] = '\0';
	282	strcat(node->key, key);
	283	return 1;
	284	}
	285
	286	/*
	287	* hash_traverse()
	288	*
	289	* Traverse the hash table and run the function func on the
	290	* data found at each node and the argument we're passed for it.
	291	*/
	292	void
	293	hash_traverse(hash_table table, int (func)(), void *arg)
	294	{
	295	int i;
	296	int size = table->size;
	297
	298	if (!func)
	299	return;
	300
	301	for (i = 0; i < size; i++) {
	302	hash_node *n = table->buckets[i];
	303	while (n) {
	304	if ((*func)(n->key, n->data, arg) == 0)
	305	return;
	306	n = n->next;
	307	}
	308	}
	309	return;
	310	}
	311
	312	/*
313	* hash_purge()
314	*
315	* Run through the entire hash table. Call purge_func
316	* on the data found at each node, and then free the node.
317	* Set all the bucket pointers to 0.
318	*/
319	void
320	hash_purge(hash_table table, void (purge_func)(char p1, void p2))
321	{
322	int i;
323	int size = table->size;
324
325	for (i = 0; i < size; i++) {
326	hash_node *n = table->buckets[i];
327	if (n) {
328	do {
329	hash_node *to_free = n;
330	if (purge_func) {
331	(*purge_func)(n->key, n->data);
332	}
333	n = n->next;
334	free(to_free);
335	} while (n);
336	table->buckets[i] = NODE_NULL;
337	}
338	}
339	}
340
341	#undef min
342	#define min(a, b) (a) < (b) ? (a) : (b)
343
344	/*
345	* hash_stats()
346	*
347	* Print statistics about the current table allocation to stdout.
348	*/
349	void
350	hash_stats(hash_table *table, int verbose)
351	{
352	int i;
353	int total_elements = 0;
354	int non_empty_buckets = 0;
355	int max_count = 0;
356	int max_repeats = 0;
357	int *counts;
358	int size = table->size;
359
360	if (!(counts = (int )malloc(size sizeof(int)))){
361	fprintf(stderr, "malloc returns 0\n");
362	exit(1);
363	}
364
365	for (i = 0; i < size; i++){
366	int x = 0;
367	hash_node *n = table->buckets[i];
368	counts[i] = 0;
369	while (n){
370	if (!x){
371	x = 1;
372	non_empty_buckets++;
373	if (verbose){
374	printf("bucket %2d: ", i);
375	}
376	}
377	if (verbose){
378	printf(" %s", n->key);
379	}
380	counts[i]++;
381	n = n->next;
382	}
383
384	total_elements += counts[i];
385	if (counts[i] > max_count){
386	max_count = counts[i];
387	max_repeats = 1;
388	}
389	else if (counts[i] == max_count){
390	max_repeats++;
391	}
392
393	if (counts[i] && verbose){
394	printf(" (%d)\n", counts[i]);
395	}
396	}
397
398	printf("\n");
399	printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");
400
401	if (total_elements){
402	printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
403	(double)100 * (double)non_empty_buckets / (double)(size));
404	printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
405	printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
406	printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
407	}
408	return;
409	}
70ad1dc8	410	#pragma clang diagnostic pop