]> git.saurik.com Git - apple/libc.git/blob - stdlib/FreeBSD/strhash.c
Libc-339.tar.gz
[apple/libc.git] / stdlib / FreeBSD / strhash.c
1 /*
2 *
3 * Copyright 1990
4 * Terry Jones & Jordan Hubbard
5 *
6 * PCS Computer Systeme, GmbH.
7 * Munich, West Germany
8 *
9 *
10 * All rights reserved.
11 *
12 * This is unsupported software and is subject to change without notice.
13 * the author makes no representations about the suitability of this software
14 * for any purpose. It is supplied "as is" without express or implied
15 * warranty.
16 *
17 * Permission to use, copy, modify, and distribute this software and its
18 * documentation for any purpose and without fee is hereby granted, provided
19 * that the above copyright notice appear in all copies and that both that
20 * copyright notice and this permission notice appear in supporting
21 * documentation, and that the name of the author not be used in
22 * advertising or publicity pertaining to distribution of the software
23 * without specific, written prior permission.
24 *
25 */
26
27 /*
28 * This is a fairly simple open addressing hash scheme.
29 * Terry did all the code, I just did the spec.
30 * Thanks again, you crazy Aussie..
31 *
32 */
33
34 /*
35 * $Log: strhash.c,v $
36 * Revision 2.0 90/03/26 01:44:26 jkh
37 * pre-beta check-in
38 *
39 * Revision 1.8 90/03/09 19:22:35 jkh
40 * Fixed bogus comment.
41 *
42 * Revision 1.7 90/03/09 19:01:08 jkh
43 * Added comments, GPL.
44 *
45 * Revision 1.6 90/03/08 17:55:58 terry
46 * Rearranged hash_purge to be a tiny bit more efficient.
47 * Added verbose option to hash_stats.
48 *
49 * Revision 1.5 90/03/08 17:19:54 terry
50 * Added hash_purge. Added arg to hash_traverse. Changed all
51 * void * to Generic.
52 *
53 * Revision 1.4 90/03/08 12:02:35 terry
54 * Fixed problems with allocation that I screwed up last night.
55 * Changed bucket lists to be singly linked. Thanks to JKH, my hero.
56 *
57 * Revision 1.3 90/03/07 21:33:33 terry
58 * Cleaned up a few decls to keep gcc -Wall quiet.
59 *
60 * Revision 1.2 90/03/07 21:14:53 terry
61 * Comments. Added HASH_STATS define. Removed hash_find()
62 * and new_node().
63 *
64 * Revision 1.1 90/03/07 20:49:45 terry
65 * Initial revision
66 *
67 *
68 */
69
70 #include <sys/cdefs.h>
71 __FBSDID("$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.10 2002/03/22 21:53:10 obrien Exp $");
72
73 #include <stdio.h>
74 #include <stdlib.h>
75 #include <string.h>
76 #include <sys/types.h>
77 #include <strhash.h>
78
79 #define HASH_NULL (hash_table *)0
80 #define NODE_NULL (hash_node *)0
81 #define GENERIC_NULL (void *)0
82
83 #define HASH_SZ 97
84
85
86 static int _hash(int size, char *key);
87 static hash_node *list_find(caddr_t key, hash_node *head);
88
89
90 /*
91 * hash_create()
92 *
93 * Malloc room for a new hash table and then room for its
94 * bucket pointers. Then set all the buckets to
95 * point to 0. Return the address of the new table.
96 */
97 hash_table *
98 hash_create(int size)
99 {
100 int i;
101 hash_table *new = (hash_table *)malloc(sizeof(hash_table));
102
103 if (!new || size < 0){
104 return HASH_NULL;
105 }
106
107 if (size == 0){
108 size = HASH_SZ;
109 }
110
111 if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){
112 return HASH_NULL;
113 }
114
115 for (i = 0; i < size; i++){
116 new->buckets[i] = NODE_NULL;
117 }
118 new->size = size;
119
120 return new;
121 }
122
123
124 /*
125 * list_find()
126 *
127 * Find the key in the linked list pointed to by head.
128 */
129 static hash_node *
130 list_find(caddr_t key, hash_node *head)
131 {
132 while (head){
133 if (!strcmp(head->key, key)){
134 return head;
135 }
136 head = head->next;
137 }
138 return NODE_NULL;
139 }
140
141
142 /*
143 * _hash()
144 *
145 * Compute the hash value for the given key.
146 */
147 static int
148 _hash(int size, char *key)
149 {
150 unsigned int h = 0x0;
151
152 while (*key){
153 h = (h << 1) ^ (h ^ (unsigned char) *key++);
154 }
155
156 h %= size;
157 return h;
158 }
159
160 /*
161 * hash_destroy()
162 *
163 * Find the key and (if it's there) remove it entirely.
164 * The function (*nukefunc)() is in charge of disposing
165 * of the storage help by the data associated with the node.
166 */
167 void
168 hash_destroy(hash_table *table, char *key, void (*nukefunc)())
169 {
170 int bucket = _hash(table->size, key);
171 hash_node *found = table->buckets[bucket];
172 hash_node *to_free = NODE_NULL;
173
174 if (!found) {
175 return;
176 }
177
178 if (!strcmp(found->key, key)) {
179 /*
180 * It was the head of the list.
181 */
182 table->buckets[bucket] = found->next;
183 to_free = found;
184 }
185 else {
186 /*
187 * Walk the list, looking one ahead.
188 */
189 while (found->next) {
190 if (!strcmp(found->next->key, key)) {
191 to_free = found->next;
192 found->next = found->next->next;
193 break;
194 }
195 found = found->next;
196 }
197
198 if (!to_free){
199 return;
200 }
201 }
202
203 if (nukefunc)
204 (*nukefunc)(to_free->key, to_free->data);
205 free(to_free);
206 return;
207 }
208
209
210 /*
211 * hash_search()
212 *
213 * Search the table for the given key. Then:
214 *
215 * 1) If you find it and there is no replacement function, just
216 * return what you found. (This is a simple search).
217 * 2) If you find it and there is a replacement function, run
218 * the function on the data you found, and replace the old
219 * data with whatever is passed in datum. Return 0.
220 * 3) If you don't find it and there is some datum, insert a
221 * new item into the table. Insertions go at the front of
222 * the bucket. Return 0.
223 * 4) Otherwise just return 0.
224 *
225 */
226 void *
227 hash_search(hash_table *table, caddr_t key, void *datum,
228 void (*replace_func)())
229 {
230 int bucket = _hash(table->size, key);
231 hash_node *found = list_find(key, table->buckets[bucket]);
232
233 if (found){
234 if (!replace_func){
235 return found->data;
236 }
237 else{
238 (*replace_func)(found->data);
239 found->data = datum;
240 }
241 }
242 else{
243 if (datum){
244
245 static int assign_key();
246
247 hash_node *new = (hash_node *)malloc(sizeof(hash_node));
248
249 if (!new || !assign_key(key, new)){
250 return GENERIC_NULL;
251 }
252 new->data = datum;
253 new->next = table->buckets[bucket];
254 table->buckets[bucket] = new;
255 return new;
256 }
257 }
258 return GENERIC_NULL;
259 }
260
261
262 /*
263 * assign_key()
264 *
265 * Set the key value of a node to be 'key'. Get some space from
266 * malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
267 */
268 static int
269 assign_key(char *key, hash_node *node)
270 {
271 if (!node || !key){
272 return 0;
273 }
274
275 if (!(node->key = (char *)malloc(strlen(key) + 1))){
276 return 0;
277 }
278
279 node->key[0] = '\0';
280 strcat(node->key, key);
281 return 1;
282 }
283
284 /*
285 * hash_traverse()
286 *
287 * Traverse the hash table and run the function func on the
288 * data found at each node and the argument we're passed for it.
289 */
290 void
291 hash_traverse(hash_table *table, int (*func)(), void *arg)
292 {
293 int i;
294 int size = table->size;
295
296 if (!func)
297 return;
298
299 for (i = 0; i < size; i++) {
300 hash_node *n = table->buckets[i];
301 while (n) {
302 if ((*func)(n->key, n->data, arg) == 0)
303 return;
304 n = n->next;
305 }
306 }
307 return;
308 }
309
310 /*
311 * hash_purge()
312 *
313 * Run through the entire hash table. Call purge_func
314 * on the data found at each node, and then free the node.
315 * Set all the bucket pointers to 0.
316 */
317 void
318 hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2))
319 {
320 int i;
321 int size = table->size;
322
323 for (i = 0; i < size; i++) {
324 hash_node *n = table->buckets[i];
325 if (n) {
326 do {
327 hash_node *to_free = n;
328 if (purge_func) {
329 (*purge_func)(n->key, n->data);
330 }
331 n = n->next;
332 free(to_free);
333 } while (n);
334 table->buckets[i] = NODE_NULL;
335 }
336 }
337 }
338
339 #undef min
340 #define min(a, b) (a) < (b) ? (a) : (b)
341
342 /*
343 * hash_stats()
344 *
345 * Print statistics about the current table allocation to stdout.
346 */
347 void
348 hash_stats(hash_table *table, int verbose)
349 {
350 int i;
351 int total_elements = 0;
352 int non_empty_buckets = 0;
353 int max_count = 0;
354 int max_repeats = 0;
355 int *counts;
356 int size = table->size;
357
358 if (!(counts = (int *)malloc(size * sizeof(int)))){
359 fprintf(stderr, "malloc returns 0\n");
360 exit(1);
361 }
362
363 for (i = 0; i < size; i++){
364 int x = 0;
365 hash_node *n = table->buckets[i];
366 counts[i] = 0;
367 while (n){
368 if (!x){
369 x = 1;
370 non_empty_buckets++;
371 if (verbose){
372 printf("bucket %2d: ", i);
373 }
374 }
375 if (verbose){
376 printf(" %s", n->key);
377 }
378 counts[i]++;
379 n = n->next;
380 }
381
382 total_elements += counts[i];
383 if (counts[i] > max_count){
384 max_count = counts[i];
385 max_repeats = 1;
386 }
387 else if (counts[i] == max_count){
388 max_repeats++;
389 }
390
391 if (counts[i] && verbose){
392 printf(" (%d)\n", counts[i]);
393 }
394 }
395
396 printf("\n");
397 printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");
398
399 if (total_elements){
400 printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
401 (double)100 * (double)non_empty_buckets / (double)(size));
402 printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
403 printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
404 printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
405 }
406 return;
407 }