lib/hash.c

   1 /* hash - hashing table processing.
   2
   3    Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 Free Software
   4    Foundation, Inc.
   5
   6    Written by Jim Meyering, 1992.
   7
   8    This program is free software; you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation; either version 2, or (at your option)
  11    any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program; if not, write to the Free Software Foundation,
  20    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  21
  22 /* A generic hash table package.  */
  23
  24 /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
  25    of malloc.  If you change USE_OBSTACK, you have to recompile!  */
  26
  27 #if HAVE_CONFIG_H
  28 # include <config.h>
  29 #endif
  30 #if HAVE_STDLIB_H
  31 # include <stdlib.h>
  32 #endif
  33
  34 #if HAVE_STDBOOL_H
  35 # include <stdbool.h>
  36 #else
  37 # ifndef __bool_true_false_are_defined
  38 #  if ! HAVE__BOOL
  39 #   ifdef __cplusplus
  40 typedef bool _Bool;
  41 #   else
  42 typedef unsigned char _Bool;
  43 #   endif
  44 #  endif
  45 #  define bool _Bool
  46 #  define false 0
  47 #  define true 1
  48 #  define __bool_true_false_are_defined 1
  49 # endif
  50 #endif
  51
  52 #include <stdio.h>
  53
  54 #ifndef HAVE_DECL_FREE
  55 "this configure-time declaration test was not run"
  56 #endif
  57 #if !HAVE_DECL_FREE
  58 void free ();
  59 #endif
  60
  61 #ifndef HAVE_DECL_MALLOC
  62 "this configure-time declaration test was not run"
  63 #endif
  64 #if !HAVE_DECL_MALLOC
  65 char *malloc ();
  66 #endif
  67
  68 #if USE_OBSTACK
  69 # include "obstack.h"
  70 # ifndef obstack_chunk_alloc
  71 #  define obstack_chunk_alloc malloc
  72 # endif
  73 # ifndef obstack_chunk_free
  74 #  define obstack_chunk_free free
  75 # endif
  76 #endif
  77
  78 #include "hash.h"
  79
  80 struct hash_table
  81   {
  82     /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
  83        for a possibility of N_BUCKETS.  Among those, N_BUCKETS_USED buckets
  84        are not empty, there are N_ENTRIES active entries in the table.  */
  85     struct hash_entry *bucket;
  86     struct hash_entry *bucket_limit;
  87     unsigned n_buckets;
  88     unsigned n_buckets_used;
  89     unsigned n_entries;
  90
  91     /* Tuning arguments, kept in a physicaly separate structure.  */
  92     const Hash_tuning *tuning;
  93
  94     /* Three functions are given to `hash_initialize', see the documentation
  95        block for this function.  In a word, HASHER randomizes a user entry
  96        into a number up from 0 up to some maximum minus 1; COMPARATOR returns
  97        true if two user entries compare equally; and DATA_FREER is the cleanup
  98        function for a user entry.  */
  99     Hash_hasher hasher;
 100     Hash_comparator comparator;
 101     Hash_data_freer data_freer;
 102
 103     /* A linked list of freed struct hash_entry structs.  */
 104     struct hash_entry *free_entry_list;
 105
 106 #if USE_OBSTACK
 107     /* Whenever obstacks are used, it is possible to allocate all overflowed
 108        entries into a single stack, so they all can be freed in a single
 109        operation.  It is not clear if the speedup is worth the trouble.  */
 110     struct obstack entry_stack;
 111 #endif
 112   };
 113
 114 /* A hash table contains many internal entries, each holding a pointer to
 115    some user provided data (also called a user entry).  An entry indistinctly
 116    refers to both the internal entry and its associated user entry.  A user
 117    entry contents may be hashed by a randomization function (the hashing
 118    function, or just `hasher' for short) into a number (or `slot') between 0
 119    and the current table size.  At each slot position in the hash table,
 120    starts a linked chain of entries for which the user data all hash to this
 121    slot.  A bucket is the collection of all entries hashing to the same slot.
 122
 123    A good `hasher' function will distribute entries rather evenly in buckets.
 124    In the ideal case, the length of each bucket is roughly the number of
 125    entries divided by the table size.  Finding the slot for a data is usually
 126    done in constant time by the `hasher', and the later finding of a precise
 127    entry is linear in time with the size of the bucket.  Consequently, a
 128    larger hash table size (that is, a larger number of buckets) is prone to
 129    yielding shorter chains, *given* the `hasher' function behaves properly.
 130
 131    Long buckets slow down the lookup algorithm.  One might use big hash table
 132    sizes in hope to reduce the average length of buckets, but this might
 133    become inordinate, as unused slots in the hash table take some space.  The
 134    best bet is to make sure you are using a good `hasher' function (beware
 135    that those are not that easy to write! :-), and to use a table size
 136    larger than the actual number of entries.  */
 137
 138 /* If an insertion makes the ratio of nonempty buckets to table size larger
 139    than the growth threshold (a number between 0.0 and 1.0), then increase
 140    the table size by multiplying by the growth factor (a number greater than
 141    1.0).  The growth threshold defaults to 0.8, and the growth factor
 142    defaults to 1.414, meaning that the table will have doubled its size
 143    every second time 80% of the buckets get used.  */
 144 #define DEFAULT_GROWTH_THRESHOLD 0.8
 145 #define DEFAULT_GROWTH_FACTOR 1.414
 146
 147 /* If a deletion empties a bucket and causes the ratio of used buckets to
 148    table size to become smaller than the shrink threshold (a number between
 149    0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
 150    number greater than the shrink threshold but smaller than 1.0).  The shrink
 151    threshold and factor default to 0.0 and 1.0, meaning that the table never
 152    shrinks.  */
 153 #define DEFAULT_SHRINK_THRESHOLD 0.0
 154 #define DEFAULT_SHRINK_FACTOR 1.0
 155
 156 /* Use this to initialize or reset a TUNING structure to
 157    some sensible values. */
 158 static const Hash_tuning default_tuning =
 159   {
 160     DEFAULT_SHRINK_THRESHOLD,
 161     DEFAULT_SHRINK_FACTOR,
 162     DEFAULT_GROWTH_THRESHOLD,
 163     DEFAULT_GROWTH_FACTOR,
 164     false
 165   };
 166
 167 /* Information and lookup.  */
 168
 169 /* The following few functions provide information about the overall hash
 170    table organization: the number of entries, number of buckets and maximum
 171    length of buckets.  */
 172
 173 /* Return the number of buckets in the hash table.  The table size, the total
 174    number of buckets (used plus unused), or the maximum number of slots, are
 175    the same quantity.  */
 176
 177 unsigned
 178 hash_get_n_buckets (const Hash_table *table)
 179 {
 180   return table->n_buckets;
 181 }
 182
 183 /* Return the number of slots in use (non-empty buckets).  */
 184
 185 unsigned
 186 hash_get_n_buckets_used (const Hash_table *table)
 187 {
 188   return table->n_buckets_used;
 189 }
 190
 191 /* Return the number of active entries.  */
 192
 193 unsigned
 194 hash_get_n_entries (const Hash_table *table)
 195 {
 196   return table->n_entries;
 197 }
 198
 199 /* Return the length of the longest chain (bucket).  */
 200
 201 unsigned
 202 hash_get_max_bucket_length (const Hash_table *table)
 203 {
 204   struct hash_entry *bucket;
 205   unsigned max_bucket_length = 0;
 206
 207   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 208     {
 209       if (bucket->data)
 210         {
 211           struct hash_entry *cursor = bucket;
 212           unsigned bucket_length = 1;
 213
 214           while (cursor = cursor->next, cursor)
 215             bucket_length++;
 216
 217           if (bucket_length > max_bucket_length)
 218             max_bucket_length = bucket_length;
 219         }
 220     }
 221
 222   return max_bucket_length;
 223 }
 224
 225 /* Do a mild validation of a hash table, by traversing it and checking two
 226    statistics.  */
 227
 228 bool
 229 hash_table_ok (const Hash_table *table)
 230 {
 231   struct hash_entry *bucket;
 232   unsigned n_buckets_used = 0;
 233   unsigned n_entries = 0;
 234
 235   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 236     {
 237       if (bucket->data)
 238         {
 239           struct hash_entry *cursor = bucket;
 240
 241           /* Count bucket head.  */
 242           n_buckets_used++;
 243           n_entries++;
 244
 245           /* Count bucket overflow.  */
 246           while (cursor = cursor->next, cursor)
 247             n_entries++;
 248         }
 249     }
 250
 251   if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
 252     return true;
 253
 254   return false;
 255 }
 256
 257 void
 258 hash_print_statistics (const Hash_table *table, FILE *stream)
 259 {
 260   unsigned n_entries = hash_get_n_entries (table);
 261   unsigned n_buckets = hash_get_n_buckets (table);
 262   unsigned n_buckets_used = hash_get_n_buckets_used (table);
 263   unsigned max_bucket_length = hash_get_max_bucket_length (table);
 264
 265   fprintf (stream, "# entries:         %u\n", n_entries);
 266   fprintf (stream, "# buckets:         %u\n", n_buckets);
 267   fprintf (stream, "# buckets used:    %u (%.2f%%)\n", n_buckets_used,
 268            (100.0 * n_buckets_used) / n_buckets);
 269   fprintf (stream, "max bucket length: %u\n", max_bucket_length);
 270 }
 271
 272 /* If ENTRY matches an entry already in the hash table, return the
 273    entry from the table.  Otherwise, return NULL.  */
 274
 275 void *
 276 hash_lookup (const Hash_table *table, const void *entry)
 277 {
 278   struct hash_entry *bucket
 279     = table->bucket + table->hasher (entry, table->n_buckets);
 280   struct hash_entry *cursor;
 281
 282   if (! (bucket < table->bucket_limit))
 283     abort ();
 284
 285   if (bucket->data == NULL)
 286     return NULL;
 287
 288   for (cursor = bucket; cursor; cursor = cursor->next)
 289     if (table->comparator (entry, cursor->data))
 290       return cursor->data;
 291
 292   return NULL;
 293 }
 294
 295 /* Walking.  */
 296
 297 /* The functions in this page traverse the hash table and process the
 298    contained entries.  For the traversal to work properly, the hash table
 299    should not be resized nor modified while any particular entry is being
 300    processed.  In particular, entries should not be added or removed.  */
 301
 302 /* Return the first data in the table, or NULL if the table is empty.  */
 303
 304 void *
 305 hash_get_first (const Hash_table *table)
 306 {
 307   struct hash_entry *bucket;
 308
 309   if (table->n_entries == 0)
 310     return NULL;
 311
 312   for (bucket = table->bucket; ; bucket++)
 313     if (! (bucket < table->bucket_limit))
 314       abort ();
 315     else if (bucket->data)
 316       return bucket->data;
 317 }
 318
 319 /* Return the user data for the entry following ENTRY, where ENTRY has been
 320    returned by a previous call to either `hash_get_first' or `hash_get_next'.
 321    Return NULL if there are no more entries.  */
 322
 323 void *
 324 hash_get_next (const Hash_table *table, const void *entry)
 325 {
 326   struct hash_entry *bucket
 327     = table->bucket + table->hasher (entry, table->n_buckets);
 328   struct hash_entry *cursor;
 329
 330   if (! (bucket < table->bucket_limit))
 331     abort ();
 332
 333   /* Find next entry in the same bucket.  */
 334   for (cursor = bucket; cursor; cursor = cursor->next)
 335     if (cursor->data == entry && cursor->next)
 336       return cursor->next->data;
 337
 338   /* Find first entry in any subsequent bucket.  */
 339   while (++bucket < table->bucket_limit)
 340     if (bucket->data)
 341       return bucket->data;
 342
 343   /* None found.  */
 344   return NULL;
 345 }
 346
 347 /* Fill BUFFER with pointers to active user entries in the hash table, then
 348    return the number of pointers copied.  Do not copy more than BUFFER_SIZE
 349    pointers.  */
 350
 351 unsigned
 352 hash_get_entries (const Hash_table *table, void **buffer,
 353                   unsigned buffer_size)
 354 {
 355   unsigned counter = 0;
 356   struct hash_entry *bucket;
 357   struct hash_entry *cursor;
 358
 359   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 360     {
 361       if (bucket->data)
 362         {
 363           for (cursor = bucket; cursor; cursor = cursor->next)
 364             {
 365               if (counter >= buffer_size)
 366                 return counter;
 367               buffer[counter++] = cursor->data;
 368             }
 369         }
 370     }
 371
 372   return counter;
 373 }
 374
 375 /* Call a PROCESSOR function for each entry of a hash table, and return the
 376    number of entries for which the processor function returned success.  A
 377    pointer to some PROCESSOR_DATA which will be made available to each call to
 378    the processor function.  The PROCESSOR accepts two arguments: the first is
 379    the user entry being walked into, the second is the value of PROCESSOR_DATA
 380    as received.  The walking continue for as long as the PROCESSOR function
 381    returns nonzero.  When it returns zero, the walking is interrupted.  */
 382
 383 unsigned
 384 hash_do_for_each (const Hash_table *table, Hash_processor processor,
 385                   void *processor_data)
 386 {
 387   unsigned counter = 0;
 388   struct hash_entry *bucket;
 389   struct hash_entry *cursor;
 390
 391   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 392     {
 393       if (bucket->data)
 394         {
 395           for (cursor = bucket; cursor; cursor = cursor->next)
 396             {
 397               if (!(*processor) (cursor->data, processor_data))
 398                 return counter;
 399               counter++;
 400             }
 401         }
 402     }
 403
 404   return counter;
 405 }
 406
 407 /* Allocation and clean-up.  */
 408
 409 /* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
 410    This is a convenience routine for constructing other hashing functions.  */
 411
 412 #if USE_DIFF_HASH
 413
 414 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
 415    B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
 416    Software--practice & experience 20, 2 (Feb 1990), 209-224.  Good hash
 417    algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
 418    may not be good for your application."  */
 419
 420 unsigned
 421 hash_string (const char *string, unsigned n_buckets)
 422 {
 423 # ifndef CHAR_BIT
 424 #  define CHAR_BIT 8
 425 # endif
 426 # define ROTATE_LEFT(Value, Shift) \
 427   ((Value) << (Shift) | (Value) >> ((sizeof (unsigned) * CHAR_BIT) - (Shift)))
 428 # define HASH_ONE_CHAR(Value, Byte) \
 429   ((Byte) + ROTATE_LEFT (Value, 7))
 430
 431   unsigned value = 0;
 432
 433   for (; *string; string++)
 434     value = HASH_ONE_CHAR (value, *(const unsigned char *) string);
 435   return value % n_buckets;
 436
 437 # undef ROTATE_LEFT
 438 # undef HASH_ONE_CHAR
 439 }
 440
 441 #else /* not USE_DIFF_HASH */
 442
 443 /* This one comes from `recode', and performs a bit better than the above as
 444    per a few experiments.  It is inspired from a hashing routine found in the
 445    very old Cyber `snoop', itself written in typical Greg Mansfield style.
 446    (By the way, what happened to this excellent man?  Is he still alive?)  */
 447
 448 unsigned
 449 hash_string (const char *string, unsigned n_buckets)
 450 {
 451   unsigned value = 0;
 452
 453   while (*string)
 454     value = ((value * 31 + (int) *(const unsigned char *) string++)
 455              % n_buckets);
 456   return value;
 457 }
 458
 459 #endif /* not USE_DIFF_HASH */
 460
 461 /* Return true if CANDIDATE is a prime number.  CANDIDATE should be an odd
 462    number at least equal to 11.  */
 463
 464 static bool
 465 is_prime (unsigned long candidate)
 466 {
 467   unsigned long divisor = 3;
 468   unsigned long square = divisor * divisor;
 469
 470   while (square < candidate && (candidate % divisor))
 471     {
 472       divisor++;
 473       square += 4 * divisor;
 474       divisor++;
 475     }
 476
 477   return (candidate % divisor ? true : false);
 478 }
 479
 480 /* Round a given CANDIDATE number up to the nearest prime, and return that
 481    prime.  Primes lower than 10 are merely skipped.  */
 482
 483 static unsigned long
 484 next_prime (unsigned long candidate)
 485 {
 486   /* Skip small primes.  */
 487   if (candidate < 10)
 488     candidate = 10;
 489
 490   /* Make it definitely odd.  */
 491   candidate |= 1;
 492
 493   while (!is_prime (candidate))
 494     candidate += 2;
 495
 496   return candidate;
 497 }
 498
 499 void
 500 hash_reset_tuning (Hash_tuning *tuning)
 501 {
 502   *tuning = default_tuning;
 503 }
 504
 505 /* For the given hash TABLE, check the user supplied tuning structure for
 506    reasonable values, and return true if there is no gross error with it.
 507    Otherwise, definitively reset the TUNING field to some acceptable default
 508    in the hash table (that is, the user loses the right of further modifying
 509    tuning arguments), and return false.  */
 510
 511 static bool
 512 check_tuning (Hash_table *table)
 513 {
 514   const Hash_tuning *tuning = table->tuning;
 515
 516   if (tuning->growth_threshold > 0.0
 517       && tuning->growth_threshold < 1.0
 518       && tuning->growth_factor > 1.0
 519       && tuning->shrink_threshold >= 0.0
 520       && tuning->shrink_threshold < 1.0
 521       && tuning->shrink_factor > tuning->shrink_threshold
 522       && tuning->shrink_factor <= 1.0
 523       && tuning->shrink_threshold < tuning->growth_threshold)
 524     return true;
 525
 526   table->tuning = &default_tuning;
 527   return false;
 528 }
 529
 530 /* Allocate and return a new hash table, or NULL upon failure.  The initial
 531    number of buckets is automatically selected so as to _guarantee_ that you
 532    may insert at least CANDIDATE different user entries before any growth of
 533    the hash table size occurs.  So, if have a reasonably tight a-priori upper
 534    bound on the number of entries you intend to insert in the hash table, you
 535    may save some table memory and insertion time, by specifying it here.  If
 536    the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
 537    argument has its meaning changed to the wanted number of buckets.
 538
 539    TUNING points to a structure of user-supplied values, in case some fine
 540    tuning is wanted over the default behavior of the hasher.  If TUNING is
 541    NULL, the default tuning parameters are used instead.
 542
 543    The user-supplied HASHER function should be provided.  It accepts two
 544    arguments ENTRY and TABLE_SIZE.  It computes, by hashing ENTRY contents, a
 545    slot number for that entry which should be in the range 0..TABLE_SIZE-1.
 546    This slot number is then returned.
 547
 548    The user-supplied COMPARATOR function should be provided.  It accepts two
 549    arguments pointing to user data, it then returns true for a pair of entries
 550    that compare equal, or false otherwise.  This function is internally called
 551    on entries which are already known to hash to the same bucket index.
 552
 553    The user-supplied DATA_FREER function, when not NULL, may be later called
 554    with the user data as an argument, just before the entry containing the
 555    data gets freed.  This happens from within `hash_free' or `hash_clear'.
 556    You should specify this function only if you want these functions to free
 557    all of your `data' data.  This is typically the case when your data is
 558    simply an auxiliary struct that you have malloc'd to aggregate several
 559    values.  */
 560
 561 Hash_table *
 562 hash_initialize (unsigned candidate, const Hash_tuning *tuning,
 563                  Hash_hasher hasher, Hash_comparator comparator,
 564                  Hash_data_freer data_freer)
 565 {
 566   Hash_table *table;
 567   struct hash_entry *bucket;
 568
 569   if (hasher == NULL || comparator == NULL)
 570     return NULL;
 571
 572   table = (Hash_table *) malloc (sizeof (Hash_table));
 573   if (table == NULL)
 574     return NULL;
 575
 576   if (!tuning)
 577     tuning = &default_tuning;
 578   table->tuning = tuning;
 579   if (!check_tuning (table))
 580     {
 581       /* Fail if the tuning options are invalid.  This is the only occasion
 582          when the user gets some feedback about it.  Once the table is created,
 583          if the user provides invalid tuning options, we silently revert to
 584          using the defaults, and ignore further request to change the tuning
 585          options.  */
 586       free (table);
 587       return NULL;
 588     }
 589
 590   table->n_buckets
 591     = next_prime (tuning->is_n_buckets ? candidate
 592                   : (unsigned) (candidate / tuning->growth_threshold));
 593
 594   table->bucket = (struct hash_entry *)
 595     malloc (table->n_buckets * sizeof (struct hash_entry));
 596   if (table->bucket == NULL)
 597     {
 598       free (table);
 599       return NULL;
 600     }
 601   table->bucket_limit = table->bucket + table->n_buckets;
 602
 603   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 604     {
 605       bucket->data = NULL;
 606       bucket->next = NULL;
 607     }
 608   table->n_buckets_used = 0;
 609   table->n_entries = 0;
 610
 611   table->hasher = hasher;
 612   table->comparator = comparator;
 613   table->data_freer = data_freer;
 614
 615   table->free_entry_list = NULL;
 616 #if USE_OBSTACK
 617   obstack_init (&table->entry_stack);
 618 #endif
 619   return table;
 620 }
 621
 622 /* Make all buckets empty, placing any chained entries on the free list.
 623    Apply the user-specified function data_freer (if any) to the datas of any
 624    affected entries.  */
 625
 626 void
 627 hash_clear (Hash_table *table)
 628 {
 629   struct hash_entry *bucket;
 630
 631   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 632     {
 633       if (bucket->data)
 634         {
 635           struct hash_entry *cursor;
 636           struct hash_entry *next;
 637
 638           /* Free the bucket overflow.  */
 639           for (cursor = bucket->next; cursor; cursor = next)
 640             {
 641               if (table->data_freer)
 642                 (*table->data_freer) (cursor->data);
 643               cursor->data = NULL;
 644
 645               next = cursor->next;
 646               /* Relinking is done one entry at a time, as it is to be expected
 647                  that overflows are either rare or short.  */
 648               cursor->next = table->free_entry_list;
 649               table->free_entry_list = cursor;
 650             }
 651
 652           /* Free the bucket head.  */
 653           if (table->data_freer)
 654             (*table->data_freer) (bucket->data);
 655           bucket->data = NULL;
 656           bucket->next = NULL;
 657         }
 658     }
 659
 660   table->n_buckets_used = 0;
 661   table->n_entries = 0;
 662 }
 663
 664 /* Reclaim all storage associated with a hash table.  If a data_freer
 665    function has been supplied by the user when the hash table was created,
 666    this function applies it to the data of each entry before freeing that
 667    entry.  */
 668
 669 void
 670 hash_free (Hash_table *table)
 671 {
 672   struct hash_entry *bucket;
 673   struct hash_entry *cursor;
 674   struct hash_entry *next;
 675
 676   /* Call the user data_freer function.  */
 677   if (table->data_freer && table->n_entries)
 678     {
 679       for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 680         {
 681           if (bucket->data)
 682             {
 683               for (cursor = bucket; cursor; cursor = cursor->next)
 684                 {
 685                   (*table->data_freer) (cursor->data);
 686                 }
 687             }
 688         }
 689     }
 690
 691 #if USE_OBSTACK
 692
 693   obstack_free (&table->entry_stack, NULL);
 694
 695 #else
 696
 697   /* Free all bucket overflowed entries.  */
 698   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 699     {
 700       for (cursor = bucket->next; cursor; cursor = next)
 701         {
 702           next = cursor->next;
 703           free (cursor);
 704         }
 705     }
 706
 707   /* Also reclaim the internal list of previously freed entries.  */
 708   for (cursor = table->free_entry_list; cursor; cursor = next)
 709     {
 710       next = cursor->next;
 711       free (cursor);
 712     }
 713
 714 #endif
 715
 716   /* Free the remainder of the hash table structure.  */
 717   free (table->bucket);
 718   free (table);
 719 }
 720
 721 /* Insertion and deletion.  */
 722
 723 /* Get a new hash entry for a bucket overflow, possibly by reclying a
 724    previously freed one.  If this is not possible, allocate a new one.  */
 725
 726 static struct hash_entry *
 727 allocate_entry (Hash_table *table)
 728 {
 729   struct hash_entry *new;
 730
 731   if (table->free_entry_list)
 732     {
 733       new = table->free_entry_list;
 734       table->free_entry_list = new->next;
 735     }
 736   else
 737     {
 738 #if USE_OBSTACK
 739       new = (struct hash_entry *)
 740         obstack_alloc (&table->entry_stack, sizeof (struct hash_entry));
 741 #else
 742       new = (struct hash_entry *) malloc (sizeof (struct hash_entry));
 743 #endif
 744     }
 745
 746   return new;
 747 }
 748
 749 /* Free a hash entry which was part of some bucket overflow,
 750    saving it for later recycling.  */
 751
 752 static void
 753 free_entry (Hash_table *table, struct hash_entry *entry)
 754 {
 755   entry->data = NULL;
 756   entry->next = table->free_entry_list;
 757   table->free_entry_list = entry;
 758 }
 759
 760 /* This private function is used to help with insertion and deletion.  When
 761    ENTRY matches an entry in the table, return a pointer to the corresponding
 762    user data and set *BUCKET_HEAD to the head of the selected bucket.
 763    Otherwise, return NULL.  When DELETE is true and ENTRY matches an entry in
 764    the table, unlink the matching entry.  */
 765
 766 static void *
 767 hash_find_entry (Hash_table *table, const void *entry,
 768                  struct hash_entry **bucket_head, bool delete)
 769 {
 770   struct hash_entry *bucket
 771     = table->bucket + table->hasher (entry, table->n_buckets);
 772   struct hash_entry *cursor;
 773
 774   if (! (bucket < table->bucket_limit))
 775     abort ();
 776
 777   *bucket_head = bucket;
 778
 779   /* Test for empty bucket.  */
 780   if (bucket->data == NULL)
 781     return NULL;
 782
 783   /* See if the entry is the first in the bucket.  */
 784   if ((*table->comparator) (entry, bucket->data))
 785     {
 786       void *data = bucket->data;
 787
 788       if (delete)
 789         {
 790           if (bucket->next)
 791             {
 792               struct hash_entry *next = bucket->next;
 793
 794               /* Bump the first overflow entry into the bucket head, then save
 795                  the previous first overflow entry for later recycling.  */
 796               *bucket = *next;
 797               free_entry (table, next);
 798             }
 799           else
 800             {
 801               bucket->data = NULL;
 802             }
 803         }
 804
 805       return data;
 806     }
 807
 808   /* Scan the bucket overflow.  */
 809   for (cursor = bucket; cursor->next; cursor = cursor->next)
 810     {
 811       if ((*table->comparator) (entry, cursor->next->data))
 812         {
 813           void *data = cursor->next->data;
 814
 815           if (delete)
 816             {
 817               struct hash_entry *next = cursor->next;
 818
 819               /* Unlink the entry to delete, then save the freed entry for later
 820                  recycling.  */
 821               cursor->next = next->next;
 822               free_entry (table, next);
 823             }
 824
 825           return data;
 826         }
 827     }
 828
 829   /* No entry found.  */
 830   return NULL;
 831 }
 832
 833 /* For an already existing hash table, change the number of buckets through
 834    specifying CANDIDATE.  The contents of the hash table are preserved.  The
 835    new number of buckets is automatically selected so as to _guarantee_ that
 836    the table may receive at least CANDIDATE different user entries, including
 837    those already in the table, before any other growth of the hash table size
 838    occurs.  If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
 839    exact number of buckets desired.  */
 840
 841 bool
 842 hash_rehash (Hash_table *table, unsigned candidate)
 843 {
 844   Hash_table *new_table;
 845   struct hash_entry *bucket;
 846   struct hash_entry *cursor;
 847   struct hash_entry *next;
 848
 849   new_table = hash_initialize (candidate, table->tuning, table->hasher,
 850                                table->comparator, table->data_freer);
 851   if (new_table == NULL)
 852     return false;
 853
 854   /* Merely reuse the extra old space into the new table.  */
 855 #if USE_OBSTACK
 856   obstack_free (&new_table->entry_stack, NULL);
 857   new_table->entry_stack = table->entry_stack;
 858 #endif
 859   new_table->free_entry_list = table->free_entry_list;
 860
 861   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 862     if (bucket->data)
 863       for (cursor = bucket; cursor; cursor = next)
 864         {
 865           void *data = cursor->data;
 866           struct hash_entry *new_bucket
 867             = (new_table->bucket
 868                + new_table->hasher (data, new_table->n_buckets));
 869
 870           if (! (new_bucket < new_table->bucket_limit))
 871             abort ();
 872
 873           next = cursor->next;
 874
 875           if (new_bucket->data)
 876             {
 877               if (cursor == bucket)
 878                 {
 879                   /* Allocate or recycle an entry, when moving from a bucket
 880                      header into a bucket overflow.  */
 881                   struct hash_entry *new_entry = allocate_entry (new_table);
 882
 883                   if (new_entry == NULL)
 884                     return false;
 885
 886                   new_entry->data = data;
 887                   new_entry->next = new_bucket->next;
 888                   new_bucket->next = new_entry;
 889                 }
 890               else
 891                 {
 892                   /* Merely relink an existing entry, when moving from a
 893                      bucket overflow into a bucket overflow.  */
 894                   cursor->next = new_bucket->next;
 895                   new_bucket->next = cursor;
 896                 }
 897             }
 898           else
 899             {
 900               /* Free an existing entry, when moving from a bucket
 901                  overflow into a bucket header.  Also take care of the
 902                  simple case of moving from a bucket header into a bucket
 903                  header.  */
 904               new_bucket->data = data;
 905               new_table->n_buckets_used++;
 906               if (cursor != bucket)
 907                 free_entry (new_table, cursor);
 908             }
 909         }
 910
 911   free (table->bucket);
 912   table->bucket = new_table->bucket;
 913   table->bucket_limit = new_table->bucket_limit;
 914   table->n_buckets = new_table->n_buckets;
 915   table->n_buckets_used = new_table->n_buckets_used;
 916   table->free_entry_list = new_table->free_entry_list;
 917   /* table->n_entries already holds its value.  */
 918 #if USE_OBSTACK
 919   table->entry_stack = new_table->entry_stack;
 920 #endif
 921   free (new_table);
 922
 923   return true;
 924 }
 925
 926 /* If ENTRY matches an entry already in the hash table, return the pointer
 927    to the entry from the table.  Otherwise, insert ENTRY and return ENTRY.
 928    Return NULL if the storage required for insertion cannot be allocated.  */
 929
 930 void *
 931 hash_insert (Hash_table *table, const void *entry)
 932 {
 933   void *data;
 934   struct hash_entry *bucket;
 935
 936   /* The caller cannot insert a NULL entry.  */
 937   if (! entry)
 938     abort ();
 939
 940   /* If there's a matching entry already in the table, return that.  */
 941   if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
 942     return data;
 943
 944   /* ENTRY is not matched, it should be inserted.  */
 945
 946   if (bucket->data)
 947     {
 948       struct hash_entry *new_entry = allocate_entry (table);
 949
 950       if (new_entry == NULL)
 951         return NULL;
 952
 953       /* Add ENTRY in the overflow of the bucket.  */
 954
 955       new_entry->data = (void *) entry;
 956       new_entry->next = bucket->next;
 957       bucket->next = new_entry;
 958       table->n_entries++;
 959       return (void *) entry;
 960     }
 961
 962   /* Add ENTRY right in the bucket head.  */
 963
 964   bucket->data = (void *) entry;
 965   table->n_entries++;
 966   table->n_buckets_used++;
 967
 968   /* If the growth threshold of the buckets in use has been reached, increase
 969      the table size and rehash.  There's no point in checking the number of
 970      entries:  if the hashing function is ill-conditioned, rehashing is not
 971      likely to improve it.  */
 972
 973   if (table->n_buckets_used
 974       > table->tuning->growth_threshold * table->n_buckets)
 975     {
 976       /* Check more fully, before starting real work.  If tuning arguments
 977          became invalid, the second check will rely on proper defaults.  */
 978       check_tuning (table);
 979       if (table->n_buckets_used
 980           > table->tuning->growth_threshold * table->n_buckets)
 981         {
 982           const Hash_tuning *tuning = table->tuning;
 983           unsigned candidate
 984             = (unsigned) (tuning->is_n_buckets
 985                           ? (table->n_buckets * tuning->growth_factor)
 986                           : (table->n_buckets * tuning->growth_factor
 987                              * tuning->growth_threshold));
 988
 989           /* If the rehash fails, arrange to return NULL.  */
 990           if (!hash_rehash (table, candidate))
 991             entry = NULL;
 992         }
 993     }
 994
 995   return (void *) entry;
 996 }
 997
 998 /* If ENTRY is already in the table, remove it and return the just-deleted
 999    data (the user may want to deallocate its storage).  If ENTRY is not in the
1000    table, don't modify the table and return NULL.  */
1001
1002 void *
1003 hash_delete (Hash_table *table, const void *entry)
1004 {
1005   void *data;
1006   struct hash_entry *bucket;
1007
1008   data = hash_find_entry (table, entry, &bucket, true);
1009   if (!data)
1010     return NULL;
1011
1012   table->n_entries--;
1013   if (!bucket->data)
1014     {
1015       table->n_buckets_used--;
1016
1017       /* If the shrink threshold of the buckets in use has been reached,
1018          rehash into a smaller table.  */
1019
1020       if (table->n_buckets_used
1021           < table->tuning->shrink_threshold * table->n_buckets)
1022         {
1023           /* Check more fully, before starting real work.  If tuning arguments
1024              became invalid, the second check will rely on proper defaults.  */
1025           check_tuning (table);
1026           if (table->n_buckets_used
1027               < table->tuning->shrink_threshold * table->n_buckets)
1028             {
1029               const Hash_tuning *tuning = table->tuning;
1030               unsigned candidate
1031                 = (unsigned) (tuning->is_n_buckets
1032                               ? table->n_buckets * tuning->shrink_factor
1033                               : (table->n_buckets * tuning->shrink_factor
1034                                  * tuning->growth_threshold));
1035
1036               hash_rehash (table, candidate);
1037             }
1038         }
1039     }
1040
1041   return data;
1042 }
1043
1044 /* Testing.  */
1045
1046 #if TESTING
1047
1048 void
1049 hash_print (const Hash_table *table)
1050 {
1051   struct hash_entry *bucket;
1052
1053   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
1054     {
1055       struct hash_entry *cursor;
1056
1057       if (bucket)
1058         printf ("%d:\n", bucket - table->bucket);
1059
1060       for (cursor = bucket; cursor; cursor = cursor->next)
1061         {
1062           char *s = (char *) cursor->data;
1063           /* FIXME */
1064           if (s)
1065             printf ("  %s\n", s);
1066         }
1067     }
1068 }
1069
1070 #endif /* TESTING */