stdlib/strhash-fbsd.c

   1 /*
   2  *
   3  *                      Copyright 1990
   4  *               Terry Jones & Jordan Hubbard
   5  *
   6  *                PCS Computer Systeme, GmbH.
   7  *                   Munich, West Germany
   8  *
   9  *
  10  *  All rights reserved.
  11  *
  12  *  This is unsupported software and is subject to change without notice.
  13  *  the author makes no representations about the suitability of this software
  14  *  for any purpose. It is supplied "as is" without express or implied
  15  *  warranty.
  16  *
  17  *  Permission to use, copy, modify, and distribute this software and its
  18  *  documentation for any purpose and without fee is hereby granted, provided
  19  *  that the above copyright notice appear in all copies and that both that
  20  *  copyright notice and this permission notice appear in supporting
  21  *  documentation, and that the name of the author not be used in
  22  *  advertising or publicity pertaining to distribution of the software
  23  *  without specific, written prior permission.
  24  *
  25  */
  26
  27 /*
  28  * This is a fairly simple open addressing hash scheme.
  29  * Terry did all the code, I just did the spec.
  30  * Thanks again, you crazy Aussie..
  31  *
  32  */
  33
  34 /*
  35  * $Log: strhash.c,v $
  36  * Revision 2.0  90/03/26  01:44:26  jkh
  37  * pre-beta check-in
  38  *
  39  * Revision 1.8  90/03/09  19:22:35  jkh
  40  * Fixed bogus comment.
  41  *
  42  * Revision 1.7  90/03/09  19:01:08  jkh
  43  * Added comments, GPL.
  44  *
  45  * Revision 1.6  90/03/08  17:55:58  terry
  46  * Rearranged hash_purge to be a tiny bit more efficient.
  47  * Added verbose option to hash_stats.
  48  *
  49  * Revision 1.5  90/03/08  17:19:54  terry
  50  * Added hash_purge. Added arg to hash_traverse. Changed all
  51  * void * to Generic.
  52  *
  53  * Revision 1.4  90/03/08  12:02:35  terry
  54  * Fixed problems with allocation that I screwed up last night.
  55  * Changed bucket lists to be singly linked. Thanks to JKH, my hero.
  56  *
  57  * Revision 1.3  90/03/07  21:33:33  terry
  58  * Cleaned up a few decls to keep gcc -Wall quiet.
  59  *
  60  * Revision 1.2  90/03/07  21:14:53  terry
  61  * Comments. Added HASH_STATS define. Removed hash_find()
  62  * and new_node().
  63  *
  64  * Revision 1.1  90/03/07  20:49:45  terry
  65  * Initial revision
  66  *
  67  *
  68  */
  69
  70 #include <sys/cdefs.h>
  71 __FBSDID("$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.10 2002/03/22 21:53:10 obrien Exp $");
  72
  73 #include <stdio.h>
  74 #include <stdlib.h>
  75 #include <string.h>
  76 #include <sys/types.h>
  77 #include <strhash.h>
  78
  79 #define HASH_NULL    (hash_table *)0
  80 #define NODE_NULL    (hash_node *)0
  81 #define GENERIC_NULL (void *)0
  82
  83 #define HASH_SZ 97
  84
  85
  86 static int _hash(int size, char *key);
  87 static hash_node *list_find(caddr_t key, hash_node *head);
  88 static int assign_key(char *key, hash_node *node);
  89
  90
  91 /*
  92  * hash_create()
  93  *
  94  * Malloc room for a new hash table and then room for its
  95  * bucket pointers. Then set all the buckets to
  96  * point to 0. Return the address of the new table.
  97  */
  98 hash_table *
  99 hash_create(int size)
 100 {
 101     int i;
 102     hash_table *new = (hash_table *)malloc(sizeof(hash_table));
 103
 104     if (!new || size < 0){
 105         return HASH_NULL;
 106     }
 107
 108     if (size == 0){
 109         size = HASH_SZ;
 110     }
 111
 112     if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){
 113         return HASH_NULL;
 114     }
 115
 116     for (i = 0; i < size; i++){
 117         new->buckets[i] = NODE_NULL;
 118     }
 119     new->size = size;
 120
 121     return new;
 122 }
 123
 124
 125 /*
 126  * list_find()
 127  *
 128  * Find the key in the linked list pointed to by head.
 129  */
 130 static hash_node *
 131 list_find(caddr_t key, hash_node *head)
 132 {
 133     while (head){
 134         if (!strcmp(head->key, key)){
 135             return head;
 136         }
 137         head = head->next;
 138     }
 139     return NODE_NULL;
 140 }
 141
 142
 143 /*
 144  * _hash()
 145  *
 146  * Compute the hash value for the given key.
 147  */
 148 static int
 149 _hash(int size, char *key)
 150 {
 151     unsigned int h = 0x0;
 152
 153     while (*key){
 154         h = (h << 1) ^ (h ^ (unsigned char) *key++);
 155     }
 156
 157     h %= size;
 158     return h;
 159 }
 160
 161 /*
 162  * hash_destroy()
 163  *
 164  * Find the key and (if it's there) remove it entirely.
 165  * The function (*nukefunc)() is in charge of disposing
 166  * of the storage help by the data associated with the node.
 167  */
 168 void
 169 hash_destroy(hash_table *table, char *key, void (*nukefunc)())
 170 {
 171     int bucket = _hash(table->size, key);
 172     hash_node *found = table->buckets[bucket];
 173     hash_node *to_free = NODE_NULL;
 174
 175     if (!found) {
 176         return;
 177     }
 178
 179     if (!strcmp(found->key, key)) {
 180         /*
 181          * It was the head of the list.
 182          */
 183         table->buckets[bucket] = found->next;
 184         to_free = found;
 185     }
 186     else {
 187         /*
 188          * Walk the list, looking one ahead.
 189          */
 190         while (found->next) {
 191             if (!strcmp(found->next->key, key)) {
 192                 to_free = found->next;
 193                 found->next = found->next->next;
 194                 break;
 195             }
 196             found = found->next;
 197         }
 198
 199         if (!to_free){
 200             return;
 201         }
 202     }
 203
 204     if (nukefunc)
 205         (*nukefunc)(to_free->key, to_free->data);
 206     free(to_free);
 207     return;
 208 }
 209
 210
 211 /*
 212  * hash_search()
 213  *
 214  * Search the table for the given key. Then:
 215  *
 216  * 1) If you find it and there is no replacement function, just
 217  *    return what you found. (This is a simple search).
 218  * 2) If you find it and there is a replacement function, run
 219  *    the function on the data you found, and replace the old
 220  *    data with whatever is passed in datum. Return 0.
 221  * 3) If you don't find it and there is some datum, insert a
 222  *    new item into the table. Insertions go at the front of
 223  *    the bucket. Return 0.
 224  * 4) Otherwise just return 0.
 225  *
 226  */
 227 void *
 228 hash_search(hash_table *table, caddr_t key, void *datum,
 229             void (*replace_func)())
 230 {
 231     int bucket = _hash(table->size, key);
 232     hash_node *found = list_find(key, table->buckets[bucket]);
 233
 234     if (found){
 235         if (!replace_func){
 236             return found->data;
 237         }
 238         else{
 239             (*replace_func)(found->data);
 240             found->data = datum;
 241         }
 242     }
 243     else{
 244         if (datum){
 245
 246             hash_node *new = (hash_node *)malloc(sizeof(hash_node));
 247
 248             if (!new || !assign_key(key, new)){
 249                 return GENERIC_NULL;
 250             }
 251             new->data = datum;
 252             new->next = table->buckets[bucket];
 253             table->buckets[bucket] = new;
 254             return new;
 255         }
 256     }
 257     return GENERIC_NULL;
 258 }
 259
 260
 261 /*
 262  * assign_key()
 263  *
 264  * Set the key value of a node to be 'key'. Get some space from
 265  * malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
 266  */
 267 static int
 268 assign_key(char *key, hash_node *node)
 269 {
 270     if (!node || !key){
 271         return 0;
 272     }
 273
 274     if (!(node->key = (char *)malloc(strlen(key) + 1))){
 275         return 0;
 276     }
 277
 278     node->key[0] = '\0';
 279     strcat(node->key, key);
 280     return 1;
 281 }
 282
 283 /*
 284  * hash_traverse()
 285  *
 286  * Traverse the hash table and run the function func on the
 287  * data found at each node and the argument we're passed for it.
 288  */
 289 void
 290 hash_traverse(hash_table *table, int (*func)(), void *arg)
 291 {
 292     int i;
 293     int size = table->size;
 294
 295     if (!func)
 296         return;
 297
 298     for (i = 0; i < size; i++) {
 299         hash_node *n = table->buckets[i];
 300         while (n) {
 301             if ((*func)(n->key, n->data, arg) == 0)
 302                 return;
 303             n = n->next;
 304         }
 305     }
 306     return;
 307 }
 308
 309 /*
 310  * hash_purge()
 311  *
 312  * Run through the entire hash table. Call purge_func
 313  * on the data found at each node, and then free the node.
 314  * Set all the bucket pointers to 0.
 315  */
 316 void
 317 hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2))
 318 {
 319     int i;
 320     int size = table->size;
 321
 322     for (i = 0; i < size; i++) {
 323         hash_node *n = table->buckets[i];
 324         if (n) {
 325             do {
 326                 hash_node *to_free = n;
 327                 if (purge_func) {
 328                     (*purge_func)(n->key, n->data);
 329                 }
 330                 n = n->next;
 331                 free(to_free);
 332             } while (n);
 333             table->buckets[i] = NODE_NULL;
 334         }
 335     }
 336 }
 337
 338 #undef min
 339 #define min(a, b) (a) < (b) ? (a) : (b)
 340
 341 /*
 342  * hash_stats()
 343  *
 344  * Print statistics about the current table allocation to stdout.
 345  */
 346 void
 347 hash_stats(hash_table *table, int verbose)
 348 {
 349     int i;
 350     int total_elements = 0;
 351     int non_empty_buckets = 0;
 352     int max_count = 0;
 353     int max_repeats = 0;
 354     int *counts;
 355     int size = table->size;
 356
 357     if (!(counts = (int *)malloc(size * sizeof(int)))){
 358         fprintf(stderr, "malloc returns 0\n");
 359         exit(1);
 360     }
 361
 362     for (i = 0; i < size; i++){
 363         int x = 0;
 364         hash_node *n = table->buckets[i];
 365         counts[i] = 0;
 366         while (n){
 367             if (!x){
 368                 x = 1;
 369                 non_empty_buckets++;
 370                 if (verbose){
 371                     printf("bucket %2d: ", i);
 372                 }
 373             }
 374             if (verbose){
 375                 printf(" %s", n->key);
 376             }
 377             counts[i]++;
 378             n = n->next;
 379         }
 380
 381         total_elements += counts[i];
 382         if (counts[i] > max_count){
 383             max_count = counts[i];
 384             max_repeats = 1;
 385         }
 386         else if (counts[i] == max_count){
 387             max_repeats++;
 388         }
 389
 390         if (counts[i] && verbose){
 391             printf(" (%d)\n", counts[i]);
 392         }
 393     }
 394
 395     printf("\n");
 396     printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");
 397
 398     if (total_elements){
 399         printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
 400                (double)100 * (double)non_empty_buckets / (double)(size));
 401         printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
 402         printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
 403         printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
 404     }
 405     return;
 406 }