]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * | |
3 | * Copyright 1990 | |
4 | * Terry Jones & Jordan Hubbard | |
5 | * | |
6 | * PCS Computer Systeme, GmbH. | |
7 | * Munich, West Germany | |
8 | * | |
9 | * | |
10 | * All rights reserved. | |
11 | * | |
12 | * This is unsupported software and is subject to change without notice. | |
13 | * the author makes no representations about the suitability of this software | |
14 | * for any purpose. It is supplied "as is" without express or implied | |
15 | * warranty. | |
16 | * | |
17 | * Permission to use, copy, modify, and distribute this software and its | |
18 | * documentation for any purpose and without fee is hereby granted, provided | |
19 | * that the above copyright notice appear in all copies and that both that | |
20 | * copyright notice and this permission notice appear in supporting | |
21 | * documentation, and that the name of the author not be used in | |
22 | * advertising or publicity pertaining to distribution of the software | |
23 | * without specific, written prior permission. | |
24 | * | |
25 | */ | |
26 | ||
27 | /* | |
28 | * This is a fairly simple open addressing hash scheme. | |
29 | * Terry did all the code, I just did the spec. | |
30 | * Thanks again, you crazy Aussie.. | |
31 | * | |
32 | */ | |
33 | ||
34 | /* | |
35 | * $Log: strhash.c,v $ | |
36 | * Revision 2.0 90/03/26 01:44:26 jkh | |
37 | * pre-beta check-in | |
38 | * | |
39 | * Revision 1.8 90/03/09 19:22:35 jkh | |
40 | * Fixed bogus comment. | |
41 | * | |
42 | * Revision 1.7 90/03/09 19:01:08 jkh | |
43 | * Added comments, GPL. | |
44 | * | |
45 | * Revision 1.6 90/03/08 17:55:58 terry | |
46 | * Rearranged hash_purge to be a tiny bit more efficient. | |
47 | * Added verbose option to hash_stats. | |
48 | * | |
49 | * Revision 1.5 90/03/08 17:19:54 terry | |
50 | * Added hash_purge. Added arg to hash_traverse. Changed all | |
51 | * void * to Generic. | |
52 | * | |
53 | * Revision 1.4 90/03/08 12:02:35 terry | |
54 | * Fixed problems with allocation that I screwed up last night. | |
55 | * Changed bucket lists to be singly linked. Thanks to JKH, my hero. | |
56 | * | |
57 | * Revision 1.3 90/03/07 21:33:33 terry | |
58 | * Cleaned up a few decls to keep gcc -Wall quiet. | |
59 | * | |
60 | * Revision 1.2 90/03/07 21:14:53 terry | |
61 | * Comments. Added HASH_STATS define. Removed hash_find() | |
62 | * and new_node(). | |
63 | * | |
64 | * Revision 1.1 90/03/07 20:49:45 terry | |
65 | * Initial revision | |
66 | * | |
67 | * | |
68 | */ | |
69 | ||
70 | #include <sys/cdefs.h> | |
71 | __FBSDID("$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.10 2002/03/22 21:53:10 obrien Exp $"); | |
72 | ||
73 | #include <stdio.h> | |
74 | #include <stdlib.h> | |
75 | #include <string.h> | |
76 | #include <sys/types.h> | |
77 | #include <strhash.h> | |
78 | ||
79 | #define HASH_NULL (hash_table *)0 | |
80 | #define NODE_NULL (hash_node *)0 | |
81 | #define GENERIC_NULL (void *)0 | |
82 | ||
83 | #define HASH_SZ 97 | |
84 | ||
85 | ||
86 | static int _hash(int size, char *key); | |
87 | static hash_node *list_find(caddr_t key, hash_node *head); | |
88 | static int assign_key(char *key, hash_node *node); | |
89 | ||
90 | ||
91 | /* | |
92 | * hash_create() | |
93 | * | |
94 | * Malloc room for a new hash table and then room for its | |
95 | * bucket pointers. Then set all the buckets to | |
96 | * point to 0. Return the address of the new table. | |
97 | */ | |
98 | hash_table * | |
99 | hash_create(int size) | |
100 | { | |
101 | int i; | |
102 | hash_table *new = (hash_table *)malloc(sizeof(hash_table)); | |
103 | ||
104 | if (!new || size < 0){ | |
105 | return HASH_NULL; | |
106 | } | |
107 | ||
108 | if (size == 0){ | |
109 | size = HASH_SZ; | |
110 | } | |
111 | ||
112 | if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){ | |
113 | return HASH_NULL; | |
114 | } | |
115 | ||
116 | for (i = 0; i < size; i++){ | |
117 | new->buckets[i] = NODE_NULL; | |
118 | } | |
119 | new->size = size; | |
120 | ||
121 | return new; | |
122 | } | |
123 | ||
124 | ||
125 | /* | |
126 | * list_find() | |
127 | * | |
128 | * Find the key in the linked list pointed to by head. | |
129 | */ | |
130 | static hash_node * | |
131 | list_find(caddr_t key, hash_node *head) | |
132 | { | |
133 | while (head){ | |
134 | if (!strcmp(head->key, key)){ | |
135 | return head; | |
136 | } | |
137 | head = head->next; | |
138 | } | |
139 | return NODE_NULL; | |
140 | } | |
141 | ||
142 | ||
143 | /* | |
144 | * _hash() | |
145 | * | |
146 | * Compute the hash value for the given key. | |
147 | */ | |
148 | static int | |
149 | _hash(int size, char *key) | |
150 | { | |
151 | unsigned int h = 0x0; | |
152 | ||
153 | while (*key){ | |
154 | h = (h << 1) ^ (h ^ (unsigned char) *key++); | |
155 | } | |
156 | ||
157 | h %= size; | |
158 | return h; | |
159 | } | |
160 | ||
161 | /* | |
162 | * hash_destroy() | |
163 | * | |
164 | * Find the key and (if it's there) remove it entirely. | |
165 | * The function (*nukefunc)() is in charge of disposing | |
166 | * of the storage help by the data associated with the node. | |
167 | */ | |
168 | void | |
169 | hash_destroy(hash_table *table, char *key, void (*nukefunc)()) | |
170 | { | |
171 | int bucket = _hash(table->size, key); | |
172 | hash_node *found = table->buckets[bucket]; | |
173 | hash_node *to_free = NODE_NULL; | |
174 | ||
175 | if (!found) { | |
176 | return; | |
177 | } | |
178 | ||
179 | if (!strcmp(found->key, key)) { | |
180 | /* | |
181 | * It was the head of the list. | |
182 | */ | |
183 | table->buckets[bucket] = found->next; | |
184 | to_free = found; | |
185 | } | |
186 | else { | |
187 | /* | |
188 | * Walk the list, looking one ahead. | |
189 | */ | |
190 | while (found->next) { | |
191 | if (!strcmp(found->next->key, key)) { | |
192 | to_free = found->next; | |
193 | found->next = found->next->next; | |
194 | break; | |
195 | } | |
196 | found = found->next; | |
197 | } | |
198 | ||
199 | if (!to_free){ | |
200 | return; | |
201 | } | |
202 | } | |
203 | ||
204 | if (nukefunc) | |
205 | (*nukefunc)(to_free->key, to_free->data); | |
206 | free(to_free); | |
207 | return; | |
208 | } | |
209 | ||
210 | ||
211 | /* | |
212 | * hash_search() | |
213 | * | |
214 | * Search the table for the given key. Then: | |
215 | * | |
216 | * 1) If you find it and there is no replacement function, just | |
217 | * return what you found. (This is a simple search). | |
218 | * 2) If you find it and there is a replacement function, run | |
219 | * the function on the data you found, and replace the old | |
220 | * data with whatever is passed in datum. Return 0. | |
221 | * 3) If you don't find it and there is some datum, insert a | |
222 | * new item into the table. Insertions go at the front of | |
223 | * the bucket. Return 0. | |
224 | * 4) Otherwise just return 0. | |
225 | * | |
226 | */ | |
227 | void * | |
228 | hash_search(hash_table *table, caddr_t key, void *datum, | |
229 | void (*replace_func)()) | |
230 | { | |
231 | int bucket = _hash(table->size, key); | |
232 | hash_node *found = list_find(key, table->buckets[bucket]); | |
233 | ||
234 | if (found){ | |
235 | if (!replace_func){ | |
236 | return found->data; | |
237 | } | |
238 | else{ | |
239 | (*replace_func)(found->data); | |
240 | found->data = datum; | |
241 | } | |
242 | } | |
243 | else{ | |
244 | if (datum){ | |
245 | ||
246 | hash_node *new = (hash_node *)malloc(sizeof(hash_node)); | |
247 | ||
248 | if (!new || !assign_key(key, new)){ | |
249 | return GENERIC_NULL; | |
250 | } | |
251 | new->data = datum; | |
252 | new->next = table->buckets[bucket]; | |
253 | table->buckets[bucket] = new; | |
254 | return new; | |
255 | } | |
256 | } | |
257 | return GENERIC_NULL; | |
258 | } | |
259 | ||
260 | ||
261 | /* | |
262 | * assign_key() | |
263 | * | |
264 | * Set the key value of a node to be 'key'. Get some space from | |
265 | * malloc and copy it in etc. Return 1 if all is well, 0 otherwise. | |
266 | */ | |
267 | static int | |
268 | assign_key(char *key, hash_node *node) | |
269 | { | |
270 | if (!node || !key){ | |
271 | return 0; | |
272 | } | |
273 | ||
274 | if (!(node->key = (char *)malloc(strlen(key) + 1))){ | |
275 | return 0; | |
276 | } | |
277 | ||
278 | node->key[0] = '\0'; | |
279 | strcat(node->key, key); | |
280 | return 1; | |
281 | } | |
282 | ||
283 | /* | |
284 | * hash_traverse() | |
285 | * | |
286 | * Traverse the hash table and run the function func on the | |
287 | * data found at each node and the argument we're passed for it. | |
288 | */ | |
289 | void | |
290 | hash_traverse(hash_table *table, int (*func)(), void *arg) | |
291 | { | |
292 | int i; | |
293 | int size = table->size; | |
294 | ||
295 | if (!func) | |
296 | return; | |
297 | ||
298 | for (i = 0; i < size; i++) { | |
299 | hash_node *n = table->buckets[i]; | |
300 | while (n) { | |
301 | if ((*func)(n->key, n->data, arg) == 0) | |
302 | return; | |
303 | n = n->next; | |
304 | } | |
305 | } | |
306 | return; | |
307 | } | |
308 | ||
309 | /* | |
310 | * hash_purge() | |
311 | * | |
312 | * Run through the entire hash table. Call purge_func | |
313 | * on the data found at each node, and then free the node. | |
314 | * Set all the bucket pointers to 0. | |
315 | */ | |
316 | void | |
317 | hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2)) | |
318 | { | |
319 | int i; | |
320 | int size = table->size; | |
321 | ||
322 | for (i = 0; i < size; i++) { | |
323 | hash_node *n = table->buckets[i]; | |
324 | if (n) { | |
325 | do { | |
326 | hash_node *to_free = n; | |
327 | if (purge_func) { | |
328 | (*purge_func)(n->key, n->data); | |
329 | } | |
330 | n = n->next; | |
331 | free(to_free); | |
332 | } while (n); | |
333 | table->buckets[i] = NODE_NULL; | |
334 | } | |
335 | } | |
336 | } | |
337 | ||
338 | #undef min | |
339 | #define min(a, b) (a) < (b) ? (a) : (b) | |
340 | ||
341 | /* | |
342 | * hash_stats() | |
343 | * | |
344 | * Print statistics about the current table allocation to stdout. | |
345 | */ | |
346 | void | |
347 | hash_stats(hash_table *table, int verbose) | |
348 | { | |
349 | int i; | |
350 | int total_elements = 0; | |
351 | int non_empty_buckets = 0; | |
352 | int max_count = 0; | |
353 | int max_repeats = 0; | |
354 | int *counts; | |
355 | int size = table->size; | |
356 | ||
357 | if (!(counts = (int *)malloc(size * sizeof(int)))){ | |
358 | fprintf(stderr, "malloc returns 0\n"); | |
359 | exit(1); | |
360 | } | |
361 | ||
362 | for (i = 0; i < size; i++){ | |
363 | int x = 0; | |
364 | hash_node *n = table->buckets[i]; | |
365 | counts[i] = 0; | |
366 | while (n){ | |
367 | if (!x){ | |
368 | x = 1; | |
369 | non_empty_buckets++; | |
370 | if (verbose){ | |
371 | printf("bucket %2d: ", i); | |
372 | } | |
373 | } | |
374 | if (verbose){ | |
375 | printf(" %s", n->key); | |
376 | } | |
377 | counts[i]++; | |
378 | n = n->next; | |
379 | } | |
380 | ||
381 | total_elements += counts[i]; | |
382 | if (counts[i] > max_count){ | |
383 | max_count = counts[i]; | |
384 | max_repeats = 1; | |
385 | } | |
386 | else if (counts[i] == max_count){ | |
387 | max_repeats++; | |
388 | } | |
389 | ||
390 | if (counts[i] && verbose){ | |
391 | printf(" (%d)\n", counts[i]); | |
392 | } | |
393 | } | |
394 | ||
395 | printf("\n"); | |
396 | printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s"); | |
397 | ||
398 | if (total_elements){ | |
399 | printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size, | |
400 | (double)100 * (double)non_empty_buckets / (double)(size)); | |
401 | printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats); | |
402 | printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets); | |
403 | printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements))); | |
404 | } | |
405 | return; | |
406 | } |