]> git.saurik.com Git - apple/libc.git/blob - stdlib/FreeBSD/strhash.c
Libc-1439.100.3.tar.gz
[apple/libc.git] / stdlib / FreeBSD / strhash.c
1 /*
2 *
3 * Copyright 1990
4 * Terry Jones & Jordan Hubbard
5 *
6 * PCS Computer Systeme, GmbH.
7 * Munich, West Germany
8 *
9 *
10 * All rights reserved.
11 *
12 * This is unsupported software and is subject to change without notice.
13 * the author makes no representations about the suitability of this software
14 * for any purpose. It is supplied "as is" without express or implied
15 * warranty.
16 *
17 * Permission to use, copy, modify, and distribute this software and its
18 * documentation for any purpose and without fee is hereby granted, provided
19 * that the above copyright notice appear in all copies and that both that
20 * copyright notice and this permission notice appear in supporting
21 * documentation, and that the name of the author not be used in
22 * advertising or publicity pertaining to distribution of the software
23 * without specific, written prior permission.
24 *
25 */
26
27 /*
28 * This is a fairly simple open addressing hash scheme.
29 * Terry did all the code, I just did the spec.
30 * Thanks again, you crazy Aussie..
31 *
32 */
33
34 /*
35 * $Log: strhash.c,v $
36 * Revision 2.0 90/03/26 01:44:26 jkh
37 * pre-beta check-in
38 *
39 * Revision 1.8 90/03/09 19:22:35 jkh
40 * Fixed bogus comment.
41 *
42 * Revision 1.7 90/03/09 19:01:08 jkh
43 * Added comments, GPL.
44 *
45 * Revision 1.6 90/03/08 17:55:58 terry
46 * Rearranged hash_purge to be a tiny bit more efficient.
47 * Added verbose option to hash_stats.
48 *
49 * Revision 1.5 90/03/08 17:19:54 terry
50 * Added hash_purge. Added arg to hash_traverse. Changed all
51 * void * to Generic.
52 *
53 * Revision 1.4 90/03/08 12:02:35 terry
54 * Fixed problems with allocation that I screwed up last night.
55 * Changed bucket lists to be singly linked. Thanks to JKH, my hero.
56 *
57 * Revision 1.3 90/03/07 21:33:33 terry
58 * Cleaned up a few decls to keep gcc -Wall quiet.
59 *
60 * Revision 1.2 90/03/07 21:14:53 terry
61 * Comments. Added HASH_STATS define. Removed hash_find()
62 * and new_node().
63 *
64 * Revision 1.1 90/03/07 20:49:45 terry
65 * Initial revision
66 *
67 *
68 */
69
70 #pragma clang diagnostic push
71 #pragma clang diagnostic ignored "-Wstrict-prototypes"
72
73 #include <sys/cdefs.h>
74 __FBSDID("$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.10 2002/03/22 21:53:10 obrien Exp $");
75
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <sys/types.h>
80 #include <strhash.h>
81
82 #define HASH_NULL (hash_table *)0
83 #define NODE_NULL (hash_node *)0
84 #define GENERIC_NULL (void *)0
85
86 #define HASH_SZ 97
87
88
89 static int _hash(int size, char *key);
90 static hash_node *list_find(caddr_t key, hash_node *head);
91 static int assign_key(char *key, hash_node *node);
92
93
94 /*
95 * hash_create()
96 *
97 * Malloc room for a new hash table and then room for its
98 * bucket pointers. Then set all the buckets to
99 * point to 0. Return the address of the new table.
100 */
101 hash_table *
102 hash_create(int size)
103 {
104 int i;
105 hash_table *new = (hash_table *)malloc(sizeof(hash_table));
106
107 if (!new || size < 0){
108 return HASH_NULL;
109 }
110
111 if (size == 0){
112 size = HASH_SZ;
113 }
114
115 if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){
116 return HASH_NULL;
117 }
118
119 for (i = 0; i < size; i++){
120 new->buckets[i] = NODE_NULL;
121 }
122 new->size = size;
123
124 return new;
125 }
126
127
128 /*
129 * list_find()
130 *
131 * Find the key in the linked list pointed to by head.
132 */
133 static hash_node *
134 list_find(caddr_t key, hash_node *head)
135 {
136 while (head){
137 if (!strcmp(head->key, key)){
138 return head;
139 }
140 head = head->next;
141 }
142 return NODE_NULL;
143 }
144
145
146 /*
147 * _hash()
148 *
149 * Compute the hash value for the given key.
150 */
151 static int
152 _hash(int size, char *key)
153 {
154 unsigned int h = 0x0;
155
156 while (*key){
157 h = (h << 1) ^ (h ^ (unsigned char) *key++);
158 }
159
160 h %= size;
161 return h;
162 }
163
164 /*
165 * hash_destroy()
166 *
167 * Find the key and (if it's there) remove it entirely.
168 * The function (*nukefunc)() is in charge of disposing
169 * of the storage help by the data associated with the node.
170 */
171 void
172 hash_destroy(hash_table *table, char *key, void (*nukefunc)())
173 {
174 int bucket = _hash(table->size, key);
175 hash_node *found = table->buckets[bucket];
176 hash_node *to_free = NODE_NULL;
177
178 if (!found) {
179 return;
180 }
181
182 if (!strcmp(found->key, key)) {
183 /*
184 * It was the head of the list.
185 */
186 table->buckets[bucket] = found->next;
187 to_free = found;
188 }
189 else {
190 /*
191 * Walk the list, looking one ahead.
192 */
193 while (found->next) {
194 if (!strcmp(found->next->key, key)) {
195 to_free = found->next;
196 found->next = found->next->next;
197 break;
198 }
199 found = found->next;
200 }
201
202 if (!to_free){
203 return;
204 }
205 }
206
207 if (nukefunc)
208 (*nukefunc)(to_free->key, to_free->data);
209 free(to_free);
210 return;
211 }
212
213
214 /*
215 * hash_search()
216 *
217 * Search the table for the given key. Then:
218 *
219 * 1) If you find it and there is no replacement function, just
220 * return what you found. (This is a simple search).
221 * 2) If you find it and there is a replacement function, run
222 * the function on the data you found, and replace the old
223 * data with whatever is passed in datum. Return 0.
224 * 3) If you don't find it and there is some datum, insert a
225 * new item into the table. Insertions go at the front of
226 * the bucket. Return 0.
227 * 4) Otherwise just return 0.
228 *
229 */
230 void *
231 hash_search(hash_table *table, caddr_t key, void *datum,
232 void (*replace_func)())
233 {
234 int bucket = _hash(table->size, key);
235 hash_node *found = list_find(key, table->buckets[bucket]);
236
237 if (found){
238 if (!replace_func){
239 return found->data;
240 }
241 else{
242 (*replace_func)(found->data);
243 found->data = datum;
244 }
245 }
246 else{
247 if (datum){
248
249 hash_node *new = (hash_node *)malloc(sizeof(hash_node));
250
251 if (!new || !assign_key(key, new)){
252 return GENERIC_NULL;
253 }
254 new->data = datum;
255 new->next = table->buckets[bucket];
256 table->buckets[bucket] = new;
257 return new;
258 }
259 }
260 return GENERIC_NULL;
261 }
262
263
264 /*
265 * assign_key()
266 *
267 * Set the key value of a node to be 'key'. Get some space from
268 * malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
269 */
270 static int
271 assign_key(char *key, hash_node *node)
272 {
273 if (!node || !key){
274 return 0;
275 }
276
277 if (!(node->key = (char *)malloc(strlen(key) + 1))){
278 return 0;
279 }
280
281 node->key[0] = '\0';
282 strcat(node->key, key);
283 return 1;
284 }
285
286 /*
287 * hash_traverse()
288 *
289 * Traverse the hash table and run the function func on the
290 * data found at each node and the argument we're passed for it.
291 */
292 void
293 hash_traverse(hash_table *table, int (*func)(), void *arg)
294 {
295 int i;
296 int size = table->size;
297
298 if (!func)
299 return;
300
301 for (i = 0; i < size; i++) {
302 hash_node *n = table->buckets[i];
303 while (n) {
304 if ((*func)(n->key, n->data, arg) == 0)
305 return;
306 n = n->next;
307 }
308 }
309 return;
310 }
311
312 /*
313 * hash_purge()
314 *
315 * Run through the entire hash table. Call purge_func
316 * on the data found at each node, and then free the node.
317 * Set all the bucket pointers to 0.
318 */
319 void
320 hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2))
321 {
322 int i;
323 int size = table->size;
324
325 for (i = 0; i < size; i++) {
326 hash_node *n = table->buckets[i];
327 if (n) {
328 do {
329 hash_node *to_free = n;
330 if (purge_func) {
331 (*purge_func)(n->key, n->data);
332 }
333 n = n->next;
334 free(to_free);
335 } while (n);
336 table->buckets[i] = NODE_NULL;
337 }
338 }
339 }
340
341 #undef min
342 #define min(a, b) (a) < (b) ? (a) : (b)
343
344 /*
345 * hash_stats()
346 *
347 * Print statistics about the current table allocation to stdout.
348 */
349 void
350 hash_stats(hash_table *table, int verbose)
351 {
352 int i;
353 int total_elements = 0;
354 int non_empty_buckets = 0;
355 int max_count = 0;
356 int max_repeats = 0;
357 int *counts;
358 int size = table->size;
359
360 if (!(counts = (int *)malloc(size * sizeof(int)))){
361 fprintf(stderr, "malloc returns 0\n");
362 exit(1);
363 }
364
365 for (i = 0; i < size; i++){
366 int x = 0;
367 hash_node *n = table->buckets[i];
368 counts[i] = 0;
369 while (n){
370 if (!x){
371 x = 1;
372 non_empty_buckets++;
373 if (verbose){
374 printf("bucket %2d: ", i);
375 }
376 }
377 if (verbose){
378 printf(" %s", n->key);
379 }
380 counts[i]++;
381 n = n->next;
382 }
383
384 total_elements += counts[i];
385 if (counts[i] > max_count){
386 max_count = counts[i];
387 max_repeats = 1;
388 }
389 else if (counts[i] == max_count){
390 max_repeats++;
391 }
392
393 if (counts[i] && verbose){
394 printf(" (%d)\n", counts[i]);
395 }
396 }
397
398 printf("\n");
399 printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");
400
401 if (total_elements){
402 printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
403 (double)100 * (double)non_empty_buckets / (double)(size));
404 printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
405 printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
406 printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
407 }
408 return;
409 }
410 #pragma clang diagnostic pop