stdlib/FreeBSD/strhash.c

   1 /*
   2  *
   3  *                      Copyright 1990
   4  *               Terry Jones & Jordan Hubbard
   5  *
   6  *                PCS Computer Systeme, GmbH.
   7  *                   Munich, West Germany
   8  *
   9  *
  10  *  All rights reserved.
  11  *
  12  *  This is unsupported software and is subject to change without notice.
  13  *  the author makes no representations about the suitability of this software
  14  *  for any purpose. It is supplied "as is" without express or implied
  15  *  warranty.
  16  *
  17  *  Permission to use, copy, modify, and distribute this software and its
  18  *  documentation for any purpose and without fee is hereby granted, provided
  19  *  that the above copyright notice appear in all copies and that both that
  20  *  copyright notice and this permission notice appear in supporting
  21  *  documentation, and that the name of the author not be used in
  22  *  advertising or publicity pertaining to distribution of the software
  23  *  without specific, written prior permission.
  24  *
  25  */
  26
  27 /*
  28  * This is a fairly simple open addressing hash scheme.
  29  * Terry did all the code, I just did the spec.
  30  * Thanks again, you crazy Aussie..
  31  *
  32  */
  33
  34 /*
  35  * $Log: strhash.c,v $
  36  * Revision 2.0  90/03/26  01:44:26  jkh
  37  * pre-beta check-in
  38  *
  39  * Revision 1.8  90/03/09  19:22:35  jkh
  40  * Fixed bogus comment.
  41  *
  42  * Revision 1.7  90/03/09  19:01:08  jkh
  43  * Added comments, GPL.
  44  *
  45  * Revision 1.6  90/03/08  17:55:58  terry
  46  * Rearranged hash_purge to be a tiny bit more efficient.
  47  * Added verbose option to hash_stats.
  48  *
  49  * Revision 1.5  90/03/08  17:19:54  terry
  50  * Added hash_purge. Added arg to hash_traverse. Changed all
  51  * void * to Generic.
  52  *
  53  * Revision 1.4  90/03/08  12:02:35  terry
  54  * Fixed problems with allocation that I screwed up last night.
  55  * Changed bucket lists to be singly linked. Thanks to JKH, my hero.
  56  *
  57  * Revision 1.3  90/03/07  21:33:33  terry
  58  * Cleaned up a few decls to keep gcc -Wall quiet.
  59  *
  60  * Revision 1.2  90/03/07  21:14:53  terry
  61  * Comments. Added HASH_STATS define. Removed hash_find()
  62  * and new_node().
  63  *
  64  * Revision 1.1  90/03/07  20:49:45  terry
  65  * Initial revision
  66  *
  67  *
  68  */
  69
  70 #include <sys/cdefs.h>
  71 __FBSDID("$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.10 2002/03/22 21:53:10 obrien Exp $");
  72
  73 #include <stdio.h>
  74 #include <stdlib.h>
  75 #include <string.h>
  76 #include <sys/types.h>
  77 #include <strhash.h>
  78
  79 #define HASH_NULL    (hash_table *)0
  80 #define NODE_NULL    (hash_node *)0
  81 #define GENERIC_NULL (void *)0
  82
  83 #define HASH_SZ 97
  84
  85
  86 static int _hash(int size, char *key);
  87 static hash_node *list_find(caddr_t key, hash_node *head);
  88
  89
  90 /*
  91  * hash_create()
  92  *
  93  * Malloc room for a new hash table and then room for its
  94  * bucket pointers. Then set all the buckets to
  95  * point to 0. Return the address of the new table.
  96  */
  97 hash_table *
  98 hash_create(int size)
  99 {
 100     int i;
 101     hash_table *new = (hash_table *)malloc(sizeof(hash_table));
 102
 103     if (!new || size < 0){
 104         return HASH_NULL;
 105     }
 106
 107     if (size == 0){
 108         size = HASH_SZ;
 109     }
 110
 111     if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){
 112         return HASH_NULL;
 113     }
 114
 115     for (i = 0; i < size; i++){
 116         new->buckets[i] = NODE_NULL;
 117     }
 118     new->size = size;
 119
 120     return new;
 121 }
 122
 123
 124 /*
 125  * list_find()
 126  *
 127  * Find the key in the linked list pointed to by head.
 128  */
 129 static hash_node *
 130 list_find(caddr_t key, hash_node *head)
 131 {
 132     while (head){
 133         if (!strcmp(head->key, key)){
 134             return head;
 135         }
 136         head = head->next;
 137     }
 138     return NODE_NULL;
 139 }
 140
 141
 142 /*
 143  * _hash()
 144  *
 145  * Compute the hash value for the given key.
 146  */
 147 static int
 148 _hash(int size, char *key)
 149 {
 150     unsigned int h = 0x0;
 151
 152     while (*key){
 153         h = (h << 1) ^ (h ^ (unsigned char) *key++);
 154     }
 155
 156     h %= size;
 157     return h;
 158 }
 159
 160 /*
 161  * hash_destroy()
 162  *
 163  * Find the key and (if it's there) remove it entirely.
 164  * The function (*nukefunc)() is in charge of disposing
 165  * of the storage help by the data associated with the node.
 166  */
 167 void
 168 hash_destroy(hash_table *table, char *key, void (*nukefunc)())
 169 {
 170     int bucket = _hash(table->size, key);
 171     hash_node *found = table->buckets[bucket];
 172     hash_node *to_free = NODE_NULL;
 173
 174     if (!found) {
 175         return;
 176     }
 177
 178     if (!strcmp(found->key, key)) {
 179         /*
 180          * It was the head of the list.
 181          */
 182         table->buckets[bucket] = found->next;
 183         to_free = found;
 184     }
 185     else {
 186         /*
 187          * Walk the list, looking one ahead.
 188          */
 189         while (found->next) {
 190             if (!strcmp(found->next->key, key)) {
 191                 to_free = found->next;
 192                 found->next = found->next->next;
 193                 break;
 194             }
 195             found = found->next;
 196         }
 197
 198         if (!to_free){
 199             return;
 200         }
 201     }
 202
 203     if (nukefunc)
 204         (*nukefunc)(to_free->key, to_free->data);
 205     free(to_free);
 206     return;
 207 }
 208
 209
 210 /*
 211  * hash_search()
 212  *
 213  * Search the table for the given key. Then:
 214  *
 215  * 1) If you find it and there is no replacement function, just
 216  *    return what you found. (This is a simple search).
 217  * 2) If you find it and there is a replacement function, run
 218  *    the function on the data you found, and replace the old
 219  *    data with whatever is passed in datum. Return 0.
 220  * 3) If you don't find it and there is some datum, insert a
 221  *    new item into the table. Insertions go at the front of
 222  *    the bucket. Return 0.
 223  * 4) Otherwise just return 0.
 224  *
 225  */
 226 void *
 227 hash_search(hash_table *table, caddr_t key, void *datum,
 228             void (*replace_func)())
 229 {
 230     int bucket = _hash(table->size, key);
 231     hash_node *found = list_find(key, table->buckets[bucket]);
 232
 233     if (found){
 234         if (!replace_func){
 235             return found->data;
 236         }
 237         else{
 238             (*replace_func)(found->data);
 239             found->data = datum;
 240         }
 241     }
 242     else{
 243         if (datum){
 244
 245             static int assign_key();
 246
 247             hash_node *new = (hash_node *)malloc(sizeof(hash_node));
 248
 249             if (!new || !assign_key(key, new)){
 250                 return GENERIC_NULL;
 251             }
 252             new->data = datum;
 253             new->next = table->buckets[bucket];
 254             table->buckets[bucket] = new;
 255             return new;
 256         }
 257     }
 258     return GENERIC_NULL;
 259 }
 260
 261
 262 /*
 263  * assign_key()
 264  *
 265  * Set the key value of a node to be 'key'. Get some space from
 266  * malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
 267  */
 268 static int
 269 assign_key(char *key, hash_node *node)
 270 {
 271     if (!node || !key){
 272         return 0;
 273     }
 274
 275     if (!(node->key = (char *)malloc(strlen(key) + 1))){
 276         return 0;
 277     }
 278
 279     node->key[0] = '\0';
 280     strcat(node->key, key);
 281     return 1;
 282 }
 283
 284 /*
 285  * hash_traverse()
 286  *
 287  * Traverse the hash table and run the function func on the
 288  * data found at each node and the argument we're passed for it.
 289  */
 290 void
 291 hash_traverse(hash_table *table, int (*func)(), void *arg)
 292 {
 293     int i;
 294     int size = table->size;
 295
 296     if (!func)
 297         return;
 298
 299     for (i = 0; i < size; i++) {
 300         hash_node *n = table->buckets[i];
 301         while (n) {
 302             if ((*func)(n->key, n->data, arg) == 0)
 303                 return;
 304             n = n->next;
 305         }
 306     }
 307     return;
 308 }
 309
 310 /*
 311  * hash_purge()
 312  *
 313  * Run through the entire hash table. Call purge_func
 314  * on the data found at each node, and then free the node.
 315  * Set all the bucket pointers to 0.
 316  */
 317 void
 318 hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2))
 319 {
 320     int i;
 321     int size = table->size;
 322
 323     for (i = 0; i < size; i++) {
 324         hash_node *n = table->buckets[i];
 325         if (n) {
 326             do {
 327                 hash_node *to_free = n;
 328                 if (purge_func) {
 329                     (*purge_func)(n->key, n->data);
 330                 }
 331                 n = n->next;
 332                 free(to_free);
 333             } while (n);
 334             table->buckets[i] = NODE_NULL;
 335         }
 336     }
 337 }
 338
 339 #undef min
 340 #define min(a, b) (a) < (b) ? (a) : (b)
 341
 342 /*
 343  * hash_stats()
 344  *
 345  * Print statistics about the current table allocation to stdout.
 346  */
 347 void
 348 hash_stats(hash_table *table, int verbose)
 349 {
 350     int i;
 351     int total_elements = 0;
 352     int non_empty_buckets = 0;
 353     int max_count = 0;
 354     int max_repeats = 0;
 355     int *counts;
 356     int size = table->size;
 357
 358     if (!(counts = (int *)malloc(size * sizeof(int)))){
 359         fprintf(stderr, "malloc returns 0\n");
 360         exit(1);
 361     }
 362
 363     for (i = 0; i < size; i++){
 364         int x = 0;
 365         hash_node *n = table->buckets[i];
 366         counts[i] = 0;
 367         while (n){
 368             if (!x){
 369                 x = 1;
 370                 non_empty_buckets++;
 371                 if (verbose){
 372                     printf("bucket %2d: ", i);
 373                 }
 374             }
 375             if (verbose){
 376                 printf(" %s", n->key);
 377             }
 378             counts[i]++;
 379             n = n->next;
 380         }
 381
 382         total_elements += counts[i];
 383         if (counts[i] > max_count){
 384             max_count = counts[i];
 385             max_repeats = 1;
 386         }
 387         else if (counts[i] == max_count){
 388             max_repeats++;
 389         }
 390
 391         if (counts[i] && verbose){
 392             printf(" (%d)\n", counts[i]);
 393         }
 394     }
 395
 396     printf("\n");
 397     printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");
 398
 399     if (total_elements){
 400         printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
 401                (double)100 * (double)non_empty_buckets / (double)(size));
 402         printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
 403         printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
 404         printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
 405     }
 406     return;
 407 }