]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * | |
3 | * Copyright 1990 | |
4 | * Terry Jones & Jordan Hubbard | |
5 | * | |
6 | * PCS Computer Systeme, GmbH. | |
7 | * Munich, West Germany | |
8 | * | |
9 | * | |
10 | * All rights reserved. | |
11 | * | |
12 | * This is unsupported software and is subject to change without notice. | |
13 | * the author makes no representations about the suitability of this software | |
14 | * for any purpose. It is supplied "as is" without express or implied | |
15 | * warranty. | |
16 | * | |
17 | * Permission to use, copy, modify, and distribute this software and its | |
18 | * documentation for any purpose and without fee is hereby granted, provided | |
19 | * that the above copyright notice appear in all copies and that both that | |
20 | * copyright notice and this permission notice appear in supporting | |
21 | * documentation, and that the name of the author not be used in | |
22 | * advertising or publicity pertaining to distribution of the software | |
23 | * without specific, written prior permission. | |
24 | * | |
25 | */ | |
26 | ||
27 | /* | |
28 | * This is a fairly simple open addressing hash scheme. | |
29 | * Terry did all the code, I just did the spec. | |
30 | * Thanks again, you crazy Aussie.. | |
31 | * | |
32 | */ | |
33 | ||
34 | /* | |
35 | * $Log: strhash.c,v $ | |
36 | * Revision 2.0 90/03/26 01:44:26 jkh | |
37 | * pre-beta check-in | |
38 | * | |
39 | * Revision 1.8 90/03/09 19:22:35 jkh | |
40 | * Fixed bogus comment. | |
41 | * | |
42 | * Revision 1.7 90/03/09 19:01:08 jkh | |
43 | * Added comments, GPL. | |
44 | * | |
45 | * Revision 1.6 90/03/08 17:55:58 terry | |
46 | * Rearranged hash_purge to be a tiny bit more efficient. | |
47 | * Added verbose option to hash_stats. | |
48 | * | |
49 | * Revision 1.5 90/03/08 17:19:54 terry | |
50 | * Added hash_purge. Added arg to hash_traverse. Changed all | |
51 | * void * to Generic. | |
52 | * | |
53 | * Revision 1.4 90/03/08 12:02:35 terry | |
54 | * Fixed problems with allocation that I screwed up last night. | |
55 | * Changed bucket lists to be singly linked. Thanks to JKH, my hero. | |
56 | * | |
57 | * Revision 1.3 90/03/07 21:33:33 terry | |
58 | * Cleaned up a few decls to keep gcc -Wall quiet. | |
59 | * | |
60 | * Revision 1.2 90/03/07 21:14:53 terry | |
61 | * Comments. Added HASH_STATS define. Removed hash_find() | |
62 | * and new_node(). | |
63 | * | |
64 | * Revision 1.1 90/03/07 20:49:45 terry | |
65 | * Initial revision | |
66 | * | |
67 | * | |
68 | */ | |
69 | ||
70 | #pragma clang diagnostic push | |
71 | #pragma clang diagnostic ignored "-Wstrict-prototypes" | |
72 | ||
73 | #include <sys/cdefs.h> | |
74 | __FBSDID("$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.10 2002/03/22 21:53:10 obrien Exp $"); | |
75 | ||
76 | #include <stdio.h> | |
77 | #include <stdlib.h> | |
78 | #include <string.h> | |
79 | #include <sys/types.h> | |
80 | #include <strhash.h> | |
81 | ||
82 | #define HASH_NULL (hash_table *)0 | |
83 | #define NODE_NULL (hash_node *)0 | |
84 | #define GENERIC_NULL (void *)0 | |
85 | ||
86 | #define HASH_SZ 97 | |
87 | ||
88 | ||
89 | static int _hash(int size, char *key); | |
90 | static hash_node *list_find(caddr_t key, hash_node *head); | |
91 | static int assign_key(char *key, hash_node *node); | |
92 | ||
93 | ||
94 | /* | |
95 | * hash_create() | |
96 | * | |
97 | * Malloc room for a new hash table and then room for its | |
98 | * bucket pointers. Then set all the buckets to | |
99 | * point to 0. Return the address of the new table. | |
100 | */ | |
101 | hash_table * | |
102 | hash_create(int size) | |
103 | { | |
104 | int i; | |
105 | hash_table *new = (hash_table *)malloc(sizeof(hash_table)); | |
106 | ||
107 | if (!new || size < 0){ | |
108 | return HASH_NULL; | |
109 | } | |
110 | ||
111 | if (size == 0){ | |
112 | size = HASH_SZ; | |
113 | } | |
114 | ||
115 | if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){ | |
116 | return HASH_NULL; | |
117 | } | |
118 | ||
119 | for (i = 0; i < size; i++){ | |
120 | new->buckets[i] = NODE_NULL; | |
121 | } | |
122 | new->size = size; | |
123 | ||
124 | return new; | |
125 | } | |
126 | ||
127 | ||
128 | /* | |
129 | * list_find() | |
130 | * | |
131 | * Find the key in the linked list pointed to by head. | |
132 | */ | |
133 | static hash_node * | |
134 | list_find(caddr_t key, hash_node *head) | |
135 | { | |
136 | while (head){ | |
137 | if (!strcmp(head->key, key)){ | |
138 | return head; | |
139 | } | |
140 | head = head->next; | |
141 | } | |
142 | return NODE_NULL; | |
143 | } | |
144 | ||
145 | ||
146 | /* | |
147 | * _hash() | |
148 | * | |
149 | * Compute the hash value for the given key. | |
150 | */ | |
151 | static int | |
152 | _hash(int size, char *key) | |
153 | { | |
154 | unsigned int h = 0x0; | |
155 | ||
156 | while (*key){ | |
157 | h = (h << 1) ^ (h ^ (unsigned char) *key++); | |
158 | } | |
159 | ||
160 | h %= size; | |
161 | return h; | |
162 | } | |
163 | ||
164 | /* | |
165 | * hash_destroy() | |
166 | * | |
167 | * Find the key and (if it's there) remove it entirely. | |
168 | * The function (*nukefunc)() is in charge of disposing | |
169 | * of the storage help by the data associated with the node. | |
170 | */ | |
171 | void | |
172 | hash_destroy(hash_table *table, char *key, void (*nukefunc)()) | |
173 | { | |
174 | int bucket = _hash(table->size, key); | |
175 | hash_node *found = table->buckets[bucket]; | |
176 | hash_node *to_free = NODE_NULL; | |
177 | ||
178 | if (!found) { | |
179 | return; | |
180 | } | |
181 | ||
182 | if (!strcmp(found->key, key)) { | |
183 | /* | |
184 | * It was the head of the list. | |
185 | */ | |
186 | table->buckets[bucket] = found->next; | |
187 | to_free = found; | |
188 | } | |
189 | else { | |
190 | /* | |
191 | * Walk the list, looking one ahead. | |
192 | */ | |
193 | while (found->next) { | |
194 | if (!strcmp(found->next->key, key)) { | |
195 | to_free = found->next; | |
196 | found->next = found->next->next; | |
197 | break; | |
198 | } | |
199 | found = found->next; | |
200 | } | |
201 | ||
202 | if (!to_free){ | |
203 | return; | |
204 | } | |
205 | } | |
206 | ||
207 | if (nukefunc) | |
208 | (*nukefunc)(to_free->key, to_free->data); | |
209 | free(to_free); | |
210 | return; | |
211 | } | |
212 | ||
213 | ||
214 | /* | |
215 | * hash_search() | |
216 | * | |
217 | * Search the table for the given key. Then: | |
218 | * | |
219 | * 1) If you find it and there is no replacement function, just | |
220 | * return what you found. (This is a simple search). | |
221 | * 2) If you find it and there is a replacement function, run | |
222 | * the function on the data you found, and replace the old | |
223 | * data with whatever is passed in datum. Return 0. | |
224 | * 3) If you don't find it and there is some datum, insert a | |
225 | * new item into the table. Insertions go at the front of | |
226 | * the bucket. Return 0. | |
227 | * 4) Otherwise just return 0. | |
228 | * | |
229 | */ | |
230 | void * | |
231 | hash_search(hash_table *table, caddr_t key, void *datum, | |
232 | void (*replace_func)()) | |
233 | { | |
234 | int bucket = _hash(table->size, key); | |
235 | hash_node *found = list_find(key, table->buckets[bucket]); | |
236 | ||
237 | if (found){ | |
238 | if (!replace_func){ | |
239 | return found->data; | |
240 | } | |
241 | else{ | |
242 | (*replace_func)(found->data); | |
243 | found->data = datum; | |
244 | } | |
245 | } | |
246 | else{ | |
247 | if (datum){ | |
248 | ||
249 | hash_node *new = (hash_node *)malloc(sizeof(hash_node)); | |
250 | ||
251 | if (!new || !assign_key(key, new)){ | |
252 | return GENERIC_NULL; | |
253 | } | |
254 | new->data = datum; | |
255 | new->next = table->buckets[bucket]; | |
256 | table->buckets[bucket] = new; | |
257 | return new; | |
258 | } | |
259 | } | |
260 | return GENERIC_NULL; | |
261 | } | |
262 | ||
263 | ||
264 | /* | |
265 | * assign_key() | |
266 | * | |
267 | * Set the key value of a node to be 'key'. Get some space from | |
268 | * malloc and copy it in etc. Return 1 if all is well, 0 otherwise. | |
269 | */ | |
270 | static int | |
271 | assign_key(char *key, hash_node *node) | |
272 | { | |
273 | if (!node || !key){ | |
274 | return 0; | |
275 | } | |
276 | ||
277 | if (!(node->key = (char *)malloc(strlen(key) + 1))){ | |
278 | return 0; | |
279 | } | |
280 | ||
281 | node->key[0] = '\0'; | |
282 | strcat(node->key, key); | |
283 | return 1; | |
284 | } | |
285 | ||
286 | /* | |
287 | * hash_traverse() | |
288 | * | |
289 | * Traverse the hash table and run the function func on the | |
290 | * data found at each node and the argument we're passed for it. | |
291 | */ | |
292 | void | |
293 | hash_traverse(hash_table *table, int (*func)(), void *arg) | |
294 | { | |
295 | int i; | |
296 | int size = table->size; | |
297 | ||
298 | if (!func) | |
299 | return; | |
300 | ||
301 | for (i = 0; i < size; i++) { | |
302 | hash_node *n = table->buckets[i]; | |
303 | while (n) { | |
304 | if ((*func)(n->key, n->data, arg) == 0) | |
305 | return; | |
306 | n = n->next; | |
307 | } | |
308 | } | |
309 | return; | |
310 | } | |
311 | ||
312 | /* | |
313 | * hash_purge() | |
314 | * | |
315 | * Run through the entire hash table. Call purge_func | |
316 | * on the data found at each node, and then free the node. | |
317 | * Set all the bucket pointers to 0. | |
318 | */ | |
319 | void | |
320 | hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2)) | |
321 | { | |
322 | int i; | |
323 | int size = table->size; | |
324 | ||
325 | for (i = 0; i < size; i++) { | |
326 | hash_node *n = table->buckets[i]; | |
327 | if (n) { | |
328 | do { | |
329 | hash_node *to_free = n; | |
330 | if (purge_func) { | |
331 | (*purge_func)(n->key, n->data); | |
332 | } | |
333 | n = n->next; | |
334 | free(to_free); | |
335 | } while (n); | |
336 | table->buckets[i] = NODE_NULL; | |
337 | } | |
338 | } | |
339 | } | |
340 | ||
341 | #undef min | |
342 | #define min(a, b) (a) < (b) ? (a) : (b) | |
343 | ||
344 | /* | |
345 | * hash_stats() | |
346 | * | |
347 | * Print statistics about the current table allocation to stdout. | |
348 | */ | |
349 | void | |
350 | hash_stats(hash_table *table, int verbose) | |
351 | { | |
352 | int i; | |
353 | int total_elements = 0; | |
354 | int non_empty_buckets = 0; | |
355 | int max_count = 0; | |
356 | int max_repeats = 0; | |
357 | int *counts; | |
358 | int size = table->size; | |
359 | ||
360 | if (!(counts = (int *)malloc(size * sizeof(int)))){ | |
361 | fprintf(stderr, "malloc returns 0\n"); | |
362 | exit(1); | |
363 | } | |
364 | ||
365 | for (i = 0; i < size; i++){ | |
366 | int x = 0; | |
367 | hash_node *n = table->buckets[i]; | |
368 | counts[i] = 0; | |
369 | while (n){ | |
370 | if (!x){ | |
371 | x = 1; | |
372 | non_empty_buckets++; | |
373 | if (verbose){ | |
374 | printf("bucket %2d: ", i); | |
375 | } | |
376 | } | |
377 | if (verbose){ | |
378 | printf(" %s", n->key); | |
379 | } | |
380 | counts[i]++; | |
381 | n = n->next; | |
382 | } | |
383 | ||
384 | total_elements += counts[i]; | |
385 | if (counts[i] > max_count){ | |
386 | max_count = counts[i]; | |
387 | max_repeats = 1; | |
388 | } | |
389 | else if (counts[i] == max_count){ | |
390 | max_repeats++; | |
391 | } | |
392 | ||
393 | if (counts[i] && verbose){ | |
394 | printf(" (%d)\n", counts[i]); | |
395 | } | |
396 | } | |
397 | ||
398 | printf("\n"); | |
399 | printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s"); | |
400 | ||
401 | if (total_elements){ | |
402 | printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size, | |
403 | (double)100 * (double)non_empty_buckets / (double)(size)); | |
404 | printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats); | |
405 | printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets); | |
406 | printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements))); | |
407 | } | |
408 | return; | |
409 | } | |
410 | #pragma clang diagnostic pop |