]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * | |
3 | * Copyright 1990 | |
4 | * Terry Jones & Jordan Hubbard | |
5 | * | |
6 | * PCS Computer Systeme, GmbH. | |
7 | * Munich, West Germany | |
8 | * | |
9 | * | |
10 | * All rights reserved. | |
11 | * | |
12 | * This is unsupported software and is subject to change without notice. | |
13 | * the author makes no representations about the suitability of this software | |
14 | * for any purpose. It is supplied "as is" without express or implied | |
15 | * warranty. | |
16 | * | |
17 | * Permission to use, copy, modify, and distribute this software and its | |
18 | * documentation for any purpose and without fee is hereby granted, provided | |
19 | * that the above copyright notice appear in all copies and that both that | |
20 | * copyright notice and this permission notice appear in supporting | |
21 | * documentation, and that the name of the author not be used in | |
22 | * advertising or publicity pertaining to distribution of the software | |
23 | * without specific, written prior permission. | |
24 | * | |
25 | */ | |
26 | ||
27 | /* | |
28 | * This is a fairly simple open addressing hash scheme. | |
29 | * Terry did all the code, I just did the spec. | |
30 | * Thanks again, you crazy Aussie.. | |
31 | * | |
32 | */ | |
33 | ||
34 | /* | |
35 | * $Log: strhash.c,v $ | |
36 | * Revision 2.0 90/03/26 01:44:26 jkh | |
37 | * pre-beta check-in | |
38 | * | |
39 | * Revision 1.8 90/03/09 19:22:35 jkh | |
40 | * Fixed bogus comment. | |
41 | * | |
42 | * Revision 1.7 90/03/09 19:01:08 jkh | |
43 | * Added comments, GPL. | |
44 | * | |
45 | * Revision 1.6 90/03/08 17:55:58 terry | |
46 | * Rearranged hash_purge to be a tiny bit more efficient. | |
47 | * Added verbose option to hash_stats. | |
48 | * | |
49 | * Revision 1.5 90/03/08 17:19:54 terry | |
50 | * Added hash_purge. Added arg to hash_traverse. Changed all | |
51 | * void * to Generic. | |
52 | * | |
53 | * Revision 1.4 90/03/08 12:02:35 terry | |
54 | * Fixed problems with allocation that I screwed up last night. | |
55 | * Changed bucket lists to be singly linked. Thanks to JKH, my hero. | |
56 | * | |
57 | * Revision 1.3 90/03/07 21:33:33 terry | |
58 | * Cleaned up a few decls to keep gcc -Wall quiet. | |
59 | * | |
60 | * Revision 1.2 90/03/07 21:14:53 terry | |
61 | * Comments. Added HASH_STATS define. Removed hash_find() | |
62 | * and new_node(). | |
63 | * | |
64 | * Revision 1.1 90/03/07 20:49:45 terry | |
65 | * Initial revision | |
66 | * | |
67 | * | |
68 | */ | |
69 | ||
70 | #include <sys/cdefs.h> | |
71 | __FBSDID("$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.10 2002/03/22 21:53:10 obrien Exp $"); | |
72 | ||
73 | #include <stdio.h> | |
74 | #include <stdlib.h> | |
75 | #include <string.h> | |
76 | #include <sys/types.h> | |
77 | #include <strhash.h> | |
78 | ||
79 | #define HASH_NULL (hash_table *)0 | |
80 | #define NODE_NULL (hash_node *)0 | |
81 | #define GENERIC_NULL (void *)0 | |
82 | ||
83 | #define HASH_SZ 97 | |
84 | ||
85 | ||
86 | static int _hash(int size, char *key); | |
87 | static hash_node *list_find(caddr_t key, hash_node *head); | |
88 | ||
89 | ||
90 | /* | |
91 | * hash_create() | |
92 | * | |
93 | * Malloc room for a new hash table and then room for its | |
94 | * bucket pointers. Then set all the buckets to | |
95 | * point to 0. Return the address of the new table. | |
96 | */ | |
97 | hash_table * | |
98 | hash_create(int size) | |
99 | { | |
100 | int i; | |
101 | hash_table *new = (hash_table *)malloc(sizeof(hash_table)); | |
102 | ||
103 | if (!new || size < 0){ | |
104 | return HASH_NULL; | |
105 | } | |
106 | ||
107 | if (size == 0){ | |
108 | size = HASH_SZ; | |
109 | } | |
110 | ||
111 | if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){ | |
112 | return HASH_NULL; | |
113 | } | |
114 | ||
115 | for (i = 0; i < size; i++){ | |
116 | new->buckets[i] = NODE_NULL; | |
117 | } | |
118 | new->size = size; | |
119 | ||
120 | return new; | |
121 | } | |
122 | ||
123 | ||
124 | /* | |
125 | * list_find() | |
126 | * | |
127 | * Find the key in the linked list pointed to by head. | |
128 | */ | |
129 | static hash_node * | |
130 | list_find(caddr_t key, hash_node *head) | |
131 | { | |
132 | while (head){ | |
133 | if (!strcmp(head->key, key)){ | |
134 | return head; | |
135 | } | |
136 | head = head->next; | |
137 | } | |
138 | return NODE_NULL; | |
139 | } | |
140 | ||
141 | ||
142 | /* | |
143 | * _hash() | |
144 | * | |
145 | * Compute the hash value for the given key. | |
146 | */ | |
147 | static int | |
148 | _hash(int size, char *key) | |
149 | { | |
150 | unsigned int h = 0x0; | |
151 | ||
152 | while (*key){ | |
153 | h = (h << 1) ^ (h ^ (unsigned char) *key++); | |
154 | } | |
155 | ||
156 | h %= size; | |
157 | return h; | |
158 | } | |
159 | ||
160 | /* | |
161 | * hash_destroy() | |
162 | * | |
163 | * Find the key and (if it's there) remove it entirely. | |
164 | * The function (*nukefunc)() is in charge of disposing | |
165 | * of the storage help by the data associated with the node. | |
166 | */ | |
167 | void | |
168 | hash_destroy(hash_table *table, char *key, void (*nukefunc)()) | |
169 | { | |
170 | int bucket = _hash(table->size, key); | |
171 | hash_node *found = table->buckets[bucket]; | |
172 | hash_node *to_free = NODE_NULL; | |
173 | ||
174 | if (!found) { | |
175 | return; | |
176 | } | |
177 | ||
178 | if (!strcmp(found->key, key)) { | |
179 | /* | |
180 | * It was the head of the list. | |
181 | */ | |
182 | table->buckets[bucket] = found->next; | |
183 | to_free = found; | |
184 | } | |
185 | else { | |
186 | /* | |
187 | * Walk the list, looking one ahead. | |
188 | */ | |
189 | while (found->next) { | |
190 | if (!strcmp(found->next->key, key)) { | |
191 | to_free = found->next; | |
192 | found->next = found->next->next; | |
193 | break; | |
194 | } | |
195 | found = found->next; | |
196 | } | |
197 | ||
198 | if (!to_free){ | |
199 | return; | |
200 | } | |
201 | } | |
202 | ||
203 | if (nukefunc) | |
204 | (*nukefunc)(to_free->key, to_free->data); | |
205 | free(to_free); | |
206 | return; | |
207 | } | |
208 | ||
209 | ||
210 | /* | |
211 | * hash_search() | |
212 | * | |
213 | * Search the table for the given key. Then: | |
214 | * | |
215 | * 1) If you find it and there is no replacement function, just | |
216 | * return what you found. (This is a simple search). | |
217 | * 2) If you find it and there is a replacement function, run | |
218 | * the function on the data you found, and replace the old | |
219 | * data with whatever is passed in datum. Return 0. | |
220 | * 3) If you don't find it and there is some datum, insert a | |
221 | * new item into the table. Insertions go at the front of | |
222 | * the bucket. Return 0. | |
223 | * 4) Otherwise just return 0. | |
224 | * | |
225 | */ | |
226 | void * | |
227 | hash_search(hash_table *table, caddr_t key, void *datum, | |
228 | void (*replace_func)()) | |
229 | { | |
230 | int bucket = _hash(table->size, key); | |
231 | hash_node *found = list_find(key, table->buckets[bucket]); | |
232 | ||
233 | if (found){ | |
234 | if (!replace_func){ | |
235 | return found->data; | |
236 | } | |
237 | else{ | |
238 | (*replace_func)(found->data); | |
239 | found->data = datum; | |
240 | } | |
241 | } | |
242 | else{ | |
243 | if (datum){ | |
244 | ||
245 | static int assign_key(); | |
246 | ||
247 | hash_node *new = (hash_node *)malloc(sizeof(hash_node)); | |
248 | ||
249 | if (!new || !assign_key(key, new)){ | |
250 | return GENERIC_NULL; | |
251 | } | |
252 | new->data = datum; | |
253 | new->next = table->buckets[bucket]; | |
254 | table->buckets[bucket] = new; | |
255 | return new; | |
256 | } | |
257 | } | |
258 | return GENERIC_NULL; | |
259 | } | |
260 | ||
261 | ||
262 | /* | |
263 | * assign_key() | |
264 | * | |
265 | * Set the key value of a node to be 'key'. Get some space from | |
266 | * malloc and copy it in etc. Return 1 if all is well, 0 otherwise. | |
267 | */ | |
268 | static int | |
269 | assign_key(char *key, hash_node *node) | |
270 | { | |
271 | if (!node || !key){ | |
272 | return 0; | |
273 | } | |
274 | ||
275 | if (!(node->key = (char *)malloc(strlen(key) + 1))){ | |
276 | return 0; | |
277 | } | |
278 | ||
279 | node->key[0] = '\0'; | |
280 | strcat(node->key, key); | |
281 | return 1; | |
282 | } | |
283 | ||
284 | /* | |
285 | * hash_traverse() | |
286 | * | |
287 | * Traverse the hash table and run the function func on the | |
288 | * data found at each node and the argument we're passed for it. | |
289 | */ | |
290 | void | |
291 | hash_traverse(hash_table *table, int (*func)(), void *arg) | |
292 | { | |
293 | int i; | |
294 | int size = table->size; | |
295 | ||
296 | if (!func) | |
297 | return; | |
298 | ||
299 | for (i = 0; i < size; i++) { | |
300 | hash_node *n = table->buckets[i]; | |
301 | while (n) { | |
302 | if ((*func)(n->key, n->data, arg) == 0) | |
303 | return; | |
304 | n = n->next; | |
305 | } | |
306 | } | |
307 | return; | |
308 | } | |
309 | ||
310 | /* | |
311 | * hash_purge() | |
312 | * | |
313 | * Run through the entire hash table. Call purge_func | |
314 | * on the data found at each node, and then free the node. | |
315 | * Set all the bucket pointers to 0. | |
316 | */ | |
317 | void | |
318 | hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2)) | |
319 | { | |
320 | int i; | |
321 | int size = table->size; | |
322 | ||
323 | for (i = 0; i < size; i++) { | |
324 | hash_node *n = table->buckets[i]; | |
325 | if (n) { | |
326 | do { | |
327 | hash_node *to_free = n; | |
328 | if (purge_func) { | |
329 | (*purge_func)(n->key, n->data); | |
330 | } | |
331 | n = n->next; | |
332 | free(to_free); | |
333 | } while (n); | |
334 | table->buckets[i] = NODE_NULL; | |
335 | } | |
336 | } | |
337 | } | |
338 | ||
339 | #undef min | |
340 | #define min(a, b) (a) < (b) ? (a) : (b) | |
341 | ||
342 | /* | |
343 | * hash_stats() | |
344 | * | |
345 | * Print statistics about the current table allocation to stdout. | |
346 | */ | |
347 | void | |
348 | hash_stats(hash_table *table, int verbose) | |
349 | { | |
350 | int i; | |
351 | int total_elements = 0; | |
352 | int non_empty_buckets = 0; | |
353 | int max_count = 0; | |
354 | int max_repeats = 0; | |
355 | int *counts; | |
356 | int size = table->size; | |
357 | ||
358 | if (!(counts = (int *)malloc(size * sizeof(int)))){ | |
359 | fprintf(stderr, "malloc returns 0\n"); | |
360 | exit(1); | |
361 | } | |
362 | ||
363 | for (i = 0; i < size; i++){ | |
364 | int x = 0; | |
365 | hash_node *n = table->buckets[i]; | |
366 | counts[i] = 0; | |
367 | while (n){ | |
368 | if (!x){ | |
369 | x = 1; | |
370 | non_empty_buckets++; | |
371 | if (verbose){ | |
372 | printf("bucket %2d: ", i); | |
373 | } | |
374 | } | |
375 | if (verbose){ | |
376 | printf(" %s", n->key); | |
377 | } | |
378 | counts[i]++; | |
379 | n = n->next; | |
380 | } | |
381 | ||
382 | total_elements += counts[i]; | |
383 | if (counts[i] > max_count){ | |
384 | max_count = counts[i]; | |
385 | max_repeats = 1; | |
386 | } | |
387 | else if (counts[i] == max_count){ | |
388 | max_repeats++; | |
389 | } | |
390 | ||
391 | if (counts[i] && verbose){ | |
392 | printf(" (%d)\n", counts[i]); | |
393 | } | |
394 | } | |
395 | ||
396 | printf("\n"); | |
397 | printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s"); | |
398 | ||
399 | if (total_elements){ | |
400 | printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size, | |
401 | (double)100 * (double)non_empty_buckets / (double)(size)); | |
402 | printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats); | |
403 | printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets); | |
404 | printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements))); | |
405 | } | |
406 | return; | |
407 | } |