| 1 | /* hash - hashing table processing. |
| 2 | Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. |
| 3 | Written by Jim Meyering, 1992. |
| 4 | |
| 5 | This program is free software; you can redistribute it and/or modify |
| 6 | it under the terms of the GNU General Public License as published by |
| 7 | the Free Software Foundation; either version 2, or (at your option) |
| 8 | any later version. |
| 9 | |
| 10 | This program is distributed in the hope that it will be useful, |
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | GNU General Public License for more details. |
| 14 | |
| 15 | You should have received a copy of the GNU General Public License |
| 16 | along with this program; if not, write to the Free Software Foundation, |
| 17 | Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ |
| 18 | |
| 19 | /* A generic hash table package. */ |
| 20 | |
| 21 | /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead |
| 22 | of malloc. If you change USE_OBSTACK, you have to recompile! */ |
| 23 | |
| 24 | #if HAVE_CONFIG_H |
| 25 | # include <config.h> |
| 26 | #endif |
| 27 | #if HAVE_STDLIB_H |
| 28 | # include <stdlib.h> |
| 29 | #endif |
| 30 | #if HAVE_STDBOOL_H |
| 31 | # include <stdbool.h> |
| 32 | #else |
| 33 | typedef enum {false = 0, true = 1} bool; |
| 34 | #endif |
| 35 | #include <stdio.h> |
| 36 | #include <assert.h> |
| 37 | |
| 38 | #ifndef HAVE_DECL_FREE |
| 39 | "this configure-time declaration test was not run" |
| 40 | #endif |
| 41 | #if !HAVE_DECL_FREE |
| 42 | void free (); |
| 43 | #endif |
| 44 | |
| 45 | #ifndef HAVE_DECL_MALLOC |
| 46 | "this configure-time declaration test was not run" |
| 47 | #endif |
| 48 | #if !HAVE_DECL_MALLOC |
| 49 | char *malloc (); |
| 50 | #endif |
| 51 | |
| 52 | #if USE_OBSTACK |
| 53 | # include "obstack.h" |
| 54 | # ifndef obstack_chunk_alloc |
| 55 | # define obstack_chunk_alloc malloc |
| 56 | # endif |
| 57 | # ifndef obstack_chunk_free |
| 58 | # define obstack_chunk_free free |
| 59 | # endif |
| 60 | #endif |
| 61 | |
| 62 | #include "hash.h" |
| 63 | |
| 64 | struct hash_table |
| 65 | { |
| 66 | /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1, |
| 67 | for a possibility of N_BUCKETS. Among those, N_BUCKETS_USED buckets |
| 68 | are not empty, there are N_ENTRIES active entries in the table. */ |
| 69 | struct hash_entry *bucket; |
| 70 | struct hash_entry *bucket_limit; |
| 71 | unsigned n_buckets; |
| 72 | unsigned n_buckets_used; |
| 73 | unsigned n_entries; |
| 74 | |
| 75 | /* Tuning arguments, kept in a physicaly separate structure. */ |
| 76 | const Hash_tuning *tuning; |
| 77 | |
| 78 | /* Three functions are given to `hash_initialize', see the documentation |
| 79 | block for this function. In a word, HASHER randomizes a user entry |
| 80 | into a number up from 0 up to some maximum minus 1; COMPARATOR returns |
| 81 | true if two user entries compare equally; and DATA_FREER is the cleanup |
| 82 | function for a user entry. */ |
| 83 | Hash_hasher hasher; |
| 84 | Hash_comparator comparator; |
| 85 | Hash_data_freer data_freer; |
| 86 | |
| 87 | /* A linked list of freed struct hash_entry structs. */ |
| 88 | struct hash_entry *free_entry_list; |
| 89 | |
| 90 | #if USE_OBSTACK |
| 91 | /* Whenever obstacks are used, it is possible to allocate all overflowed |
| 92 | entries into a single stack, so they all can be freed in a single |
| 93 | operation. It is not clear if the speedup is worth the trouble. */ |
| 94 | struct obstack entry_stack; |
| 95 | #endif |
| 96 | }; |
| 97 | |
| 98 | /* A hash table contains many internal entries, each holding a pointer to |
| 99 | some user provided data (also called a user entry). An entry indistinctly |
| 100 | refers to both the internal entry and its associated user entry. A user |
| 101 | entry contents may be hashed by a randomization function (the hashing |
| 102 | function, or just `hasher' for short) into a number (or `slot') between 0 |
| 103 | and the current table size. At each slot position in the hash table, |
| 104 | starts a linked chain of entries for which the user data all hash to this |
| 105 | slot. A bucket is the collection of all entries hashing to the same slot. |
| 106 | |
| 107 | A good `hasher' function will distribute entries rather evenly in buckets. |
| 108 | In the ideal case, the length of each bucket is roughly the number of |
| 109 | entries divided by the table size. Finding the slot for a data is usually |
| 110 | done in constant time by the `hasher', and the later finding of a precise |
| 111 | entry is linear in time with the size of the bucket. Consequently, a |
| 112 | larger hash table size (that is, a larger number of buckets) is prone to |
| 113 | yielding shorter chains, *given* the `hasher' function behaves properly. |
| 114 | |
| 115 | Long buckets slow down the lookup algorithm. One might use big hash table |
| 116 | sizes in hope to reduce the average length of buckets, but this might |
| 117 | become inordinate, as unused slots in the hash table take some space. The |
| 118 | best bet is to make sure you are using a good `hasher' function (beware |
| 119 | that those are not that easy to write! :-), and to use a table size |
| 120 | larger than the actual number of entries. */ |
| 121 | |
| 122 | /* If an insertion makes the ratio of nonempty buckets to table size larger |
| 123 | than the growth threshold (a number between 0.0 and 1.0), then increase |
| 124 | the table size by multiplying by the growth factor (a number greater than |
| 125 | 1.0). The growth threshold defaults to 0.8, and the growth factor |
| 126 | defaults to 1.414, meaning that the table will have doubled its size |
| 127 | every second time 80% of the buckets get used. */ |
| 128 | #define DEFAULT_GROWTH_THRESHOLD 0.8 |
| 129 | #define DEFAULT_GROWTH_FACTOR 1.414 |
| 130 | |
| 131 | /* If a deletion empties a bucket and causes the ratio of used buckets to |
| 132 | table size to become smaller than the shrink threshold (a number between |
| 133 | 0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a |
| 134 | number greater than the shrink threshold but smaller than 1.0). The shrink |
| 135 | threshold and factor default to 0.0 and 1.0, meaning that the table never |
| 136 | shrinks. */ |
| 137 | #define DEFAULT_SHRINK_THRESHOLD 0.0 |
| 138 | #define DEFAULT_SHRINK_FACTOR 1.0 |
| 139 | |
| 140 | /* Use this to initialize or reset a TUNING structure to |
| 141 | some sensible values. */ |
| 142 | static const Hash_tuning default_tuning = |
| 143 | { |
| 144 | DEFAULT_SHRINK_THRESHOLD, |
| 145 | DEFAULT_SHRINK_FACTOR, |
| 146 | DEFAULT_GROWTH_THRESHOLD, |
| 147 | DEFAULT_GROWTH_FACTOR, |
| 148 | false |
| 149 | }; |
| 150 | |
| 151 | /* Information and lookup. */ |
| 152 | |
| 153 | /* The following few functions provide information about the overall hash |
| 154 | table organization: the number of entries, number of buckets and maximum |
| 155 | length of buckets. */ |
| 156 | |
| 157 | /* Return the number of buckets in the hash table. The table size, the total |
| 158 | number of buckets (used plus unused), or the maximum number of slots, are |
| 159 | the same quantity. */ |
| 160 | |
| 161 | unsigned |
| 162 | hash_get_n_buckets (const Hash_table *table) |
| 163 | { |
| 164 | return table->n_buckets; |
| 165 | } |
| 166 | |
| 167 | /* Return the number of slots in use (non-empty buckets). */ |
| 168 | |
| 169 | unsigned |
| 170 | hash_get_n_buckets_used (const Hash_table *table) |
| 171 | { |
| 172 | return table->n_buckets_used; |
| 173 | } |
| 174 | |
| 175 | /* Return the number of active entries. */ |
| 176 | |
| 177 | unsigned |
| 178 | hash_get_n_entries (const Hash_table *table) |
| 179 | { |
| 180 | return table->n_entries; |
| 181 | } |
| 182 | |
| 183 | /* Return the length of the longest chain (bucket). */ |
| 184 | |
| 185 | unsigned |
| 186 | hash_get_max_bucket_length (const Hash_table *table) |
| 187 | { |
| 188 | struct hash_entry *bucket; |
| 189 | unsigned max_bucket_length = 0; |
| 190 | |
| 191 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) |
| 192 | { |
| 193 | if (bucket->data) |
| 194 | { |
| 195 | struct hash_entry *cursor = bucket; |
| 196 | unsigned bucket_length = 1; |
| 197 | |
| 198 | while (cursor = cursor->next, cursor) |
| 199 | bucket_length++; |
| 200 | |
| 201 | if (bucket_length > max_bucket_length) |
| 202 | max_bucket_length = bucket_length; |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | return max_bucket_length; |
| 207 | } |
| 208 | |
| 209 | /* Do a mild validation of a hash table, by traversing it and checking two |
| 210 | statistics. */ |
| 211 | |
| 212 | bool |
| 213 | hash_table_ok (const Hash_table *table) |
| 214 | { |
| 215 | struct hash_entry *bucket; |
| 216 | unsigned n_buckets_used = 0; |
| 217 | unsigned n_entries = 0; |
| 218 | |
| 219 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) |
| 220 | { |
| 221 | if (bucket->data) |
| 222 | { |
| 223 | struct hash_entry *cursor = bucket; |
| 224 | |
| 225 | /* Count bucket head. */ |
| 226 | n_buckets_used++; |
| 227 | n_entries++; |
| 228 | |
| 229 | /* Count bucket overflow. */ |
| 230 | while (cursor = cursor->next, cursor) |
| 231 | n_entries++; |
| 232 | } |
| 233 | } |
| 234 | |
| 235 | if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries) |
| 236 | return true; |
| 237 | |
| 238 | return false; |
| 239 | } |
| 240 | |
| 241 | void |
| 242 | hash_print_statistics (const Hash_table *table, FILE *stream) |
| 243 | { |
| 244 | unsigned n_entries = hash_get_n_entries (table); |
| 245 | unsigned n_buckets = hash_get_n_buckets (table); |
| 246 | unsigned n_buckets_used = hash_get_n_buckets_used (table); |
| 247 | unsigned max_bucket_length = hash_get_max_bucket_length (table); |
| 248 | |
| 249 | fprintf (stream, "# entries: %u\n", n_entries); |
| 250 | fprintf (stream, "# buckets: %u\n", n_buckets); |
| 251 | fprintf (stream, "# buckets used: %u (%.2f%%)\n", n_buckets_used, |
| 252 | (100.0 * n_buckets_used) / n_buckets); |
| 253 | fprintf (stream, "max bucket length: %u\n", max_bucket_length); |
| 254 | } |
| 255 | |
| 256 | /* If ENTRY matches an entry already in the hash table, return the |
| 257 | entry from the table. Otherwise, return NULL. */ |
| 258 | |
| 259 | void * |
| 260 | hash_lookup (const Hash_table *table, const void *entry) |
| 261 | { |
| 262 | struct hash_entry *bucket |
| 263 | = table->bucket + table->hasher (entry, table->n_buckets); |
| 264 | struct hash_entry *cursor; |
| 265 | |
| 266 | assert (bucket < table->bucket_limit); |
| 267 | |
| 268 | if (bucket->data == NULL) |
| 269 | return NULL; |
| 270 | |
| 271 | for (cursor = bucket; cursor; cursor = cursor->next) |
| 272 | if (table->comparator (entry, cursor->data)) |
| 273 | return cursor->data; |
| 274 | |
| 275 | return NULL; |
| 276 | } |
| 277 | |
| 278 | /* Walking. */ |
| 279 | |
| 280 | /* The functions in this page traverse the hash table and process the |
| 281 | contained entries. For the traversal to work properly, the hash table |
| 282 | should not be resized nor modified while any particular entry is being |
| 283 | processed. In particular, entries should not be added or removed. */ |
| 284 | |
| 285 | /* Return the first data in the table, or NULL if the table is empty. */ |
| 286 | |
| 287 | void * |
| 288 | hash_get_first (const Hash_table *table) |
| 289 | { |
| 290 | struct hash_entry *bucket; |
| 291 | |
| 292 | if (table->n_entries == 0) |
| 293 | return NULL; |
| 294 | |
| 295 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) |
| 296 | if (bucket->data) |
| 297 | return bucket->data; |
| 298 | |
| 299 | assert (0); |
| 300 | return NULL; |
| 301 | } |
| 302 | |
| 303 | /* Return the user data for the entry following ENTRY, where ENTRY has been |
| 304 | returned by a previous call to either `hash_get_first' or `hash_get_next'. |
| 305 | Return NULL if there are no more entries. */ |
| 306 | |
| 307 | void * |
| 308 | hash_get_next (const Hash_table *table, const void *entry) |
| 309 | { |
| 310 | struct hash_entry *bucket |
| 311 | = table->bucket + table->hasher (entry, table->n_buckets); |
| 312 | struct hash_entry *cursor; |
| 313 | |
| 314 | assert (bucket < table->bucket_limit); |
| 315 | |
| 316 | /* Find next entry in the same bucket. */ |
| 317 | for (cursor = bucket; cursor; cursor = cursor->next) |
| 318 | if (cursor->data == entry && cursor->next) |
| 319 | return cursor->next->data; |
| 320 | |
| 321 | /* Find first entry in any subsequent bucket. */ |
| 322 | while (++bucket < table->bucket_limit) |
| 323 | if (bucket->data) |
| 324 | return bucket->data; |
| 325 | |
| 326 | /* None found. */ |
| 327 | return NULL; |
| 328 | } |
| 329 | |
| 330 | /* Fill BUFFER with pointers to active user entries in the hash table, then |
| 331 | return the number of pointers copied. Do not copy more than BUFFER_SIZE |
| 332 | pointers. */ |
| 333 | |
| 334 | unsigned |
| 335 | hash_get_entries (const Hash_table *table, void **buffer, |
| 336 | unsigned buffer_size) |
| 337 | { |
| 338 | unsigned counter = 0; |
| 339 | struct hash_entry *bucket; |
| 340 | struct hash_entry *cursor; |
| 341 | |
| 342 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) |
| 343 | { |
| 344 | if (bucket->data) |
| 345 | { |
| 346 | for (cursor = bucket; cursor; cursor = cursor->next) |
| 347 | { |
| 348 | if (counter >= buffer_size) |
| 349 | return counter; |
| 350 | buffer[counter++] = cursor->data; |
| 351 | } |
| 352 | } |
| 353 | } |
| 354 | |
| 355 | return counter; |
| 356 | } |
| 357 | |
| 358 | /* Call a PROCESSOR function for each entry of a hash table, and return the |
| 359 | number of entries for which the processor function returned success. A |
| 360 | pointer to some PROCESSOR_DATA which will be made available to each call to |
| 361 | the processor function. The PROCESSOR accepts two arguments: the first is |
| 362 | the user entry being walked into, the second is the value of PROCESSOR_DATA |
| 363 | as received. The walking continue for as long as the PROCESSOR function |
| 364 | returns nonzero. When it returns zero, the walking is interrupted. */ |
| 365 | |
| 366 | unsigned |
| 367 | hash_do_for_each (const Hash_table *table, Hash_processor processor, |
| 368 | void *processor_data) |
| 369 | { |
| 370 | unsigned counter = 0; |
| 371 | struct hash_entry *bucket; |
| 372 | struct hash_entry *cursor; |
| 373 | |
| 374 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) |
| 375 | { |
| 376 | if (bucket->data) |
| 377 | { |
| 378 | for (cursor = bucket; cursor; cursor = cursor->next) |
| 379 | { |
| 380 | if (!(*processor) (cursor->data, processor_data)) |
| 381 | return counter; |
| 382 | counter++; |
| 383 | } |
| 384 | } |
| 385 | } |
| 386 | |
| 387 | return counter; |
| 388 | } |
| 389 | |
| 390 | /* Allocation and clean-up. */ |
| 391 | |
| 392 | /* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1. |
| 393 | This is a convenience routine for constructing other hashing functions. */ |
| 394 | |
| 395 | #if USE_DIFF_HASH |
| 396 | |
| 397 | /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see |
| 398 | B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm, |
| 399 | Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash |
| 400 | algorithms tend to be domain-specific, so what's good for [diffutils'] io.c |
| 401 | may not be good for your application." */ |
| 402 | |
| 403 | unsigned |
| 404 | hash_string (const char *string, unsigned n_buckets) |
| 405 | { |
| 406 | # ifndef CHAR_BIT |
| 407 | # define CHAR_BIT 8 |
| 408 | # endif |
| 409 | # define ROTATE_LEFT(Value, Shift) \ |
| 410 | ((Value) << (Shift) | (Value) >> ((sizeof (unsigned) * CHAR_BIT) - (Shift))) |
| 411 | # define HASH_ONE_CHAR(Value, Byte) \ |
| 412 | ((Byte) + ROTATE_LEFT (Value, 7)) |
| 413 | |
| 414 | unsigned value = 0; |
| 415 | |
| 416 | for (; *string; string++) |
| 417 | value = HASH_ONE_CHAR (value, *(const unsigned char *) string); |
| 418 | return value % n_buckets; |
| 419 | |
| 420 | # undef ROTATE_LEFT |
| 421 | # undef HASH_ONE_CHAR |
| 422 | } |
| 423 | |
| 424 | #else /* not USE_DIFF_HASH */ |
| 425 | |
| 426 | /* This one comes from `recode', and performs a bit better than the above as |
| 427 | per a few experiments. It is inspired from a hashing routine found in the |
| 428 | very old Cyber `snoop', itself written in typical Greg Mansfield style. |
| 429 | (By the way, what happened to this excellent man? Is he still alive?) */ |
| 430 | |
| 431 | unsigned |
| 432 | hash_string (const char *string, unsigned n_buckets) |
| 433 | { |
| 434 | unsigned value = 0; |
| 435 | |
| 436 | while (*string) |
| 437 | value = ((value * 31 + (int) *(const unsigned char *) string++) |
| 438 | % n_buckets); |
| 439 | return value; |
| 440 | } |
| 441 | |
| 442 | #endif /* not USE_DIFF_HASH */ |
| 443 | |
| 444 | /* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd |
| 445 | number at least equal to 11. */ |
| 446 | |
| 447 | static bool |
| 448 | is_prime (unsigned long candidate) |
| 449 | { |
| 450 | unsigned long divisor = 3; |
| 451 | unsigned long square = divisor * divisor; |
| 452 | |
| 453 | while (square < candidate && (candidate % divisor)) |
| 454 | { |
| 455 | divisor++; |
| 456 | square += 4 * divisor; |
| 457 | divisor++; |
| 458 | } |
| 459 | |
| 460 | return (candidate % divisor ? true : false); |
| 461 | } |
| 462 | |
| 463 | /* Round a given CANDIDATE number up to the nearest prime, and return that |
| 464 | prime. Primes lower than 10 are merely skipped. */ |
| 465 | |
| 466 | static unsigned long |
| 467 | next_prime (unsigned long candidate) |
| 468 | { |
| 469 | /* Skip small primes. */ |
| 470 | if (candidate < 10) |
| 471 | candidate = 10; |
| 472 | |
| 473 | /* Make it definitely odd. */ |
| 474 | candidate |= 1; |
| 475 | |
| 476 | while (!is_prime (candidate)) |
| 477 | candidate += 2; |
| 478 | |
| 479 | return candidate; |
| 480 | } |
| 481 | |
| 482 | void |
| 483 | hash_reset_tuning (Hash_tuning *tuning) |
| 484 | { |
| 485 | *tuning = default_tuning; |
| 486 | } |
| 487 | |
| 488 | /* For the given hash TABLE, check the user supplied tuning structure for |
| 489 | reasonable values, and return true if there is no gross error with it. |
| 490 | Otherwise, definitively reset the TUNING field to some acceptable default |
| 491 | in the hash table (that is, the user loses the right of further modifying |
| 492 | tuning arguments), and return false. */ |
| 493 | |
| 494 | static bool |
| 495 | check_tuning (Hash_table *table) |
| 496 | { |
| 497 | const Hash_tuning *tuning = table->tuning; |
| 498 | |
| 499 | if (tuning->growth_threshold > 0.0 |
| 500 | && tuning->growth_threshold < 1.0 |
| 501 | && tuning->growth_factor > 1.0 |
| 502 | && tuning->shrink_threshold >= 0.0 |
| 503 | && tuning->shrink_threshold < 1.0 |
| 504 | && tuning->shrink_factor > tuning->shrink_threshold |
| 505 | && tuning->shrink_factor <= 1.0 |
| 506 | && tuning->shrink_threshold < tuning->growth_threshold) |
| 507 | return true; |
| 508 | |
| 509 | table->tuning = &default_tuning; |
| 510 | return false; |
| 511 | } |
| 512 | |
| 513 | /* Allocate and return a new hash table, or NULL upon failure. The initial |
| 514 | number of buckets is automatically selected so as to _guarantee_ that you |
| 515 | may insert at least CANDIDATE different user entries before any growth of |
| 516 | the hash table size occurs. So, if have a reasonably tight a-priori upper |
| 517 | bound on the number of entries you intend to insert in the hash table, you |
| 518 | may save some table memory and insertion time, by specifying it here. If |
| 519 | the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE |
| 520 | argument has its meaning changed to the wanted number of buckets. |
| 521 | |
| 522 | TUNING points to a structure of user-supplied values, in case some fine |
| 523 | tuning is wanted over the default behavior of the hasher. If TUNING is |
| 524 | NULL, the default tuning parameters are used instead. |
| 525 | |
| 526 | The user-supplied HASHER function should be provided. It accepts two |
| 527 | arguments ENTRY and TABLE_SIZE. It computes, by hashing ENTRY contents, a |
| 528 | slot number for that entry which should be in the range 0..TABLE_SIZE-1. |
| 529 | This slot number is then returned. |
| 530 | |
| 531 | The user-supplied COMPARATOR function should be provided. It accepts two |
| 532 | arguments pointing to user data, it then returns true for a pair of entries |
| 533 | that compare equal, or false otherwise. This function is internally called |
| 534 | on entries which are already known to hash to the same bucket index. |
| 535 | |
| 536 | The user-supplied DATA_FREER function, when not NULL, may be later called |
| 537 | with the user data as an argument, just before the entry containing the |
| 538 | data gets freed. This happens from within `hash_free' or `hash_clear'. |
| 539 | You should specify this function only if you want these functions to free |
| 540 | all of your `data' data. This is typically the case when your data is |
| 541 | simply an auxiliary struct that you have malloc'd to aggregate several |
| 542 | values. */ |
| 543 | |
| 544 | Hash_table * |
| 545 | hash_initialize (unsigned candidate, const Hash_tuning *tuning, |
| 546 | Hash_hasher hasher, Hash_comparator comparator, |
| 547 | Hash_data_freer data_freer) |
| 548 | { |
| 549 | Hash_table *table; |
| 550 | struct hash_entry *bucket; |
| 551 | |
| 552 | if (hasher == NULL || comparator == NULL) |
| 553 | return NULL; |
| 554 | |
| 555 | table = (Hash_table *) malloc (sizeof (Hash_table)); |
| 556 | if (table == NULL) |
| 557 | return NULL; |
| 558 | |
| 559 | if (!tuning) |
| 560 | tuning = &default_tuning; |
| 561 | table->tuning = tuning; |
| 562 | if (!check_tuning (table)) |
| 563 | { |
| 564 | /* Fail if the tuning options are invalid. This is the only occasion |
| 565 | when the user gets some feedback about it. Once the table is created, |
| 566 | if the user provides invalid tuning options, we silently revert to |
| 567 | using the defaults, and ignore further request to change the tuning |
| 568 | options. */ |
| 569 | free (table); |
| 570 | return NULL; |
| 571 | } |
| 572 | |
| 573 | table->n_buckets |
| 574 | = next_prime (tuning->is_n_buckets ? candidate |
| 575 | : (unsigned) (candidate / tuning->growth_threshold)); |
| 576 | |
| 577 | table->bucket = (struct hash_entry *) |
| 578 | malloc (table->n_buckets * sizeof (struct hash_entry)); |
| 579 | if (table->bucket == NULL) |
| 580 | { |
| 581 | free (table); |
| 582 | return NULL; |
| 583 | } |
| 584 | table->bucket_limit = table->bucket + table->n_buckets; |
| 585 | |
| 586 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) |
| 587 | { |
| 588 | bucket->data = NULL; |
| 589 | bucket->next = NULL; |
| 590 | } |
| 591 | table->n_buckets_used = 0; |
| 592 | table->n_entries = 0; |
| 593 | |
| 594 | table->hasher = hasher; |
| 595 | table->comparator = comparator; |
| 596 | table->data_freer = data_freer; |
| 597 | |
| 598 | table->free_entry_list = NULL; |
| 599 | #if USE_OBSTACK |
| 600 | obstack_init (&table->entry_stack); |
| 601 | #endif |
| 602 | return table; |
| 603 | } |
| 604 | |
| 605 | /* Make all buckets empty, placing any chained entries on the free list. |
| 606 | Apply the user-specified function data_freer (if any) to the datas of any |
| 607 | affected entries. */ |
| 608 | |
| 609 | void |
| 610 | hash_clear (Hash_table *table) |
| 611 | { |
| 612 | struct hash_entry *bucket; |
| 613 | |
| 614 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) |
| 615 | { |
| 616 | if (bucket->data) |
| 617 | { |
| 618 | struct hash_entry *cursor; |
| 619 | struct hash_entry *next; |
| 620 | |
| 621 | /* Free the bucket overflow. */ |
| 622 | for (cursor = bucket->next; cursor; cursor = next) |
| 623 | { |
| 624 | if (table->data_freer) |
| 625 | (*table->data_freer) (cursor->data); |
| 626 | cursor->data = NULL; |
| 627 | |
| 628 | next = cursor->next; |
| 629 | /* Relinking is done one entry at a time, as it is to be expected |
| 630 | that overflows are either rare or short. */ |
| 631 | cursor->next = table->free_entry_list; |
| 632 | table->free_entry_list = cursor; |
| 633 | } |
| 634 | |
| 635 | /* Free the bucket head. */ |
| 636 | if (table->data_freer) |
| 637 | (*table->data_freer) (bucket->data); |
| 638 | bucket->data = NULL; |
| 639 | bucket->next = NULL; |
| 640 | } |
| 641 | } |
| 642 | |
| 643 | table->n_buckets_used = 0; |
| 644 | table->n_entries = 0; |
| 645 | } |
| 646 | |
| 647 | /* Reclaim all storage associated with a hash table. If a data_freer |
| 648 | function has been supplied by the user when the hash table was created, |
| 649 | this function applies it to the data of each entry before freeing that |
| 650 | entry. */ |
| 651 | |
| 652 | void |
| 653 | hash_free (Hash_table *table) |
| 654 | { |
| 655 | struct hash_entry *bucket; |
| 656 | struct hash_entry *cursor; |
| 657 | struct hash_entry *next; |
| 658 | |
| 659 | /* Call the user data_freer function. */ |
| 660 | if (table->data_freer && table->n_entries) |
| 661 | { |
| 662 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) |
| 663 | { |
| 664 | if (bucket->data) |
| 665 | { |
| 666 | for (cursor = bucket; cursor; cursor = cursor->next) |
| 667 | { |
| 668 | (*table->data_freer) (cursor->data); |
| 669 | } |
| 670 | } |
| 671 | } |
| 672 | } |
| 673 | |
| 674 | #if USE_OBSTACK |
| 675 | |
| 676 | obstack_free (&table->entry_stack, NULL); |
| 677 | |
| 678 | #else |
| 679 | |
| 680 | /* Free all bucket overflowed entries. */ |
| 681 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) |
| 682 | { |
| 683 | for (cursor = bucket->next; cursor; cursor = next) |
| 684 | { |
| 685 | next = cursor->next; |
| 686 | free (cursor); |
| 687 | } |
| 688 | } |
| 689 | |
| 690 | /* Also reclaim the internal list of previously freed entries. */ |
| 691 | for (cursor = table->free_entry_list; cursor; cursor = next) |
| 692 | { |
| 693 | next = cursor->next; |
| 694 | free (cursor); |
| 695 | } |
| 696 | |
| 697 | #endif |
| 698 | |
| 699 | /* Free the remainder of the hash table structure. */ |
| 700 | free (table->bucket); |
| 701 | free (table); |
| 702 | } |
| 703 | |
| 704 | /* Insertion and deletion. */ |
| 705 | |
| 706 | /* Get a new hash entry for a bucket overflow, possibly by reclying a |
| 707 | previously freed one. If this is not possible, allocate a new one. */ |
| 708 | |
| 709 | static struct hash_entry * |
| 710 | allocate_entry (Hash_table *table) |
| 711 | { |
| 712 | struct hash_entry *new; |
| 713 | |
| 714 | if (table->free_entry_list) |
| 715 | { |
| 716 | new = table->free_entry_list; |
| 717 | table->free_entry_list = new->next; |
| 718 | } |
| 719 | else |
| 720 | { |
| 721 | #if USE_OBSTACK |
| 722 | new = (struct hash_entry *) |
| 723 | obstack_alloc (&table->entry_stack, sizeof (struct hash_entry)); |
| 724 | #else |
| 725 | new = (struct hash_entry *) malloc (sizeof (struct hash_entry)); |
| 726 | #endif |
| 727 | } |
| 728 | |
| 729 | return new; |
| 730 | } |
| 731 | |
| 732 | /* Free a hash entry which was part of some bucket overflow, |
| 733 | saving it for later recycling. */ |
| 734 | |
| 735 | static void |
| 736 | free_entry (Hash_table *table, struct hash_entry *entry) |
| 737 | { |
| 738 | entry->data = NULL; |
| 739 | entry->next = table->free_entry_list; |
| 740 | table->free_entry_list = entry; |
| 741 | } |
| 742 | |
| 743 | /* This private function is used to help with insertion and deletion. When |
| 744 | ENTRY matches an entry in the table, return a pointer to the corresponding |
| 745 | user data and set *BUCKET_HEAD to the head of the selected bucket. |
| 746 | Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in |
| 747 | the table, unlink the matching entry. */ |
| 748 | |
| 749 | static void * |
| 750 | hash_find_entry (Hash_table *table, const void *entry, |
| 751 | struct hash_entry **bucket_head, bool delete) |
| 752 | { |
| 753 | struct hash_entry *bucket |
| 754 | = table->bucket + table->hasher (entry, table->n_buckets); |
| 755 | struct hash_entry *cursor; |
| 756 | |
| 757 | assert (bucket < table->bucket_limit); |
| 758 | *bucket_head = bucket; |
| 759 | |
| 760 | /* Test for empty bucket. */ |
| 761 | if (bucket->data == NULL) |
| 762 | return NULL; |
| 763 | |
| 764 | /* See if the entry is the first in the bucket. */ |
| 765 | if ((*table->comparator) (entry, bucket->data)) |
| 766 | { |
| 767 | void *data = bucket->data; |
| 768 | |
| 769 | if (delete) |
| 770 | { |
| 771 | if (bucket->next) |
| 772 | { |
| 773 | struct hash_entry *next = bucket->next; |
| 774 | |
| 775 | /* Bump the first overflow entry into the bucket head, then save |
| 776 | the previous first overflow entry for later recycling. */ |
| 777 | *bucket = *next; |
| 778 | free_entry (table, next); |
| 779 | } |
| 780 | else |
| 781 | { |
| 782 | bucket->data = NULL; |
| 783 | } |
| 784 | } |
| 785 | |
| 786 | return data; |
| 787 | } |
| 788 | |
| 789 | /* Scan the bucket overflow. */ |
| 790 | for (cursor = bucket; cursor->next; cursor = cursor->next) |
| 791 | { |
| 792 | if ((*table->comparator) (entry, cursor->next->data)) |
| 793 | { |
| 794 | void *data = cursor->next->data; |
| 795 | |
| 796 | if (delete) |
| 797 | { |
| 798 | struct hash_entry *next = cursor->next; |
| 799 | |
| 800 | /* Unlink the entry to delete, then save the freed entry for later |
| 801 | recycling. */ |
| 802 | cursor->next = next->next; |
| 803 | free_entry (table, next); |
| 804 | } |
| 805 | |
| 806 | return data; |
| 807 | } |
| 808 | } |
| 809 | |
| 810 | /* No entry found. */ |
| 811 | return NULL; |
| 812 | } |
| 813 | |
| 814 | /* For an already existing hash table, change the number of buckets through |
| 815 | specifying CANDIDATE. The contents of the hash table are preserved. The |
| 816 | new number of buckets is automatically selected so as to _guarantee_ that |
| 817 | the table may receive at least CANDIDATE different user entries, including |
| 818 | those already in the table, before any other growth of the hash table size |
| 819 | occurs. If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the |
| 820 | exact number of buckets desired. */ |
| 821 | |
| 822 | bool |
| 823 | hash_rehash (Hash_table *table, unsigned candidate) |
| 824 | { |
| 825 | Hash_table *new_table; |
| 826 | struct hash_entry *bucket; |
| 827 | struct hash_entry *cursor; |
| 828 | struct hash_entry *next; |
| 829 | |
| 830 | new_table = hash_initialize (candidate, table->tuning, table->hasher, |
| 831 | table->comparator, table->data_freer); |
| 832 | if (new_table == NULL) |
| 833 | return false; |
| 834 | |
| 835 | /* Merely reuse the extra old space into the new table. */ |
| 836 | #if USE_OBSTACK |
| 837 | obstack_free (&new_table->entry_stack, NULL); |
| 838 | new_table->entry_stack = table->entry_stack; |
| 839 | #endif |
| 840 | new_table->free_entry_list = table->free_entry_list; |
| 841 | |
| 842 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) |
| 843 | if (bucket->data) |
| 844 | for (cursor = bucket; cursor; cursor = next) |
| 845 | { |
| 846 | void *data = cursor->data; |
| 847 | struct hash_entry *new_bucket |
| 848 | = (new_table->bucket |
| 849 | + new_table->hasher (data, new_table->n_buckets)); |
| 850 | |
| 851 | assert (new_bucket < new_table->bucket_limit); |
| 852 | next = cursor->next; |
| 853 | |
| 854 | if (new_bucket->data) |
| 855 | { |
| 856 | if (cursor == bucket) |
| 857 | { |
| 858 | /* Allocate or recycle an entry, when moving from a bucket |
| 859 | header into a bucket overflow. */ |
| 860 | struct hash_entry *new_entry = allocate_entry (new_table); |
| 861 | |
| 862 | if (new_entry == NULL) |
| 863 | return false; |
| 864 | |
| 865 | new_entry->data = data; |
| 866 | new_entry->next = new_bucket->next; |
| 867 | new_bucket->next = new_entry; |
| 868 | } |
| 869 | else |
| 870 | { |
| 871 | /* Merely relink an existing entry, when moving from a |
| 872 | bucket overflow into a bucket overflow. */ |
| 873 | cursor->next = new_bucket->next; |
| 874 | new_bucket->next = cursor; |
| 875 | } |
| 876 | } |
| 877 | else |
| 878 | { |
| 879 | /* Free an existing entry, when moving from a bucket |
| 880 | overflow into a bucket header. Also take care of the |
| 881 | simple case of moving from a bucket header into a bucket |
| 882 | header. */ |
| 883 | new_bucket->data = data; |
| 884 | new_table->n_buckets_used++; |
| 885 | if (cursor != bucket) |
| 886 | free_entry (new_table, cursor); |
| 887 | } |
| 888 | } |
| 889 | |
| 890 | free (table->bucket); |
| 891 | table->bucket = new_table->bucket; |
| 892 | table->bucket_limit = new_table->bucket_limit; |
| 893 | table->n_buckets = new_table->n_buckets; |
| 894 | table->n_buckets_used = new_table->n_buckets_used; |
| 895 | table->free_entry_list = new_table->free_entry_list; |
| 896 | /* table->n_entries already holds its value. */ |
| 897 | #if USE_OBSTACK |
| 898 | table->entry_stack = new_table->entry_stack; |
| 899 | #endif |
| 900 | free (new_table); |
| 901 | |
| 902 | return true; |
| 903 | } |
| 904 | |
| 905 | /* If ENTRY matches an entry already in the hash table, return the pointer |
| 906 | to the entry from the table. Otherwise, insert ENTRY and return ENTRY. |
| 907 | Return NULL if the storage required for insertion cannot be allocated. */ |
| 908 | |
| 909 | void * |
| 910 | hash_insert (Hash_table *table, const void *entry) |
| 911 | { |
| 912 | void *data; |
| 913 | struct hash_entry *bucket; |
| 914 | |
| 915 | assert (entry); /* cannot insert a NULL entry */ |
| 916 | |
| 917 | /* If there's a matching entry already in the table, return that. */ |
| 918 | if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL) |
| 919 | return data; |
| 920 | |
| 921 | /* ENTRY is not matched, it should be inserted. */ |
| 922 | |
| 923 | if (bucket->data) |
| 924 | { |
| 925 | struct hash_entry *new_entry = allocate_entry (table); |
| 926 | |
| 927 | if (new_entry == NULL) |
| 928 | return NULL; |
| 929 | |
| 930 | /* Add ENTRY in the overflow of the bucket. */ |
| 931 | |
| 932 | new_entry->data = (void *) entry; |
| 933 | new_entry->next = bucket->next; |
| 934 | bucket->next = new_entry; |
| 935 | table->n_entries++; |
| 936 | return (void *) entry; |
| 937 | } |
| 938 | |
| 939 | /* Add ENTRY right in the bucket head. */ |
| 940 | |
| 941 | bucket->data = (void *) entry; |
| 942 | table->n_entries++; |
| 943 | table->n_buckets_used++; |
| 944 | |
| 945 | /* If the growth threshold of the buckets in use has been reached, increase |
| 946 | the table size and rehash. There's no point in checking the number of |
| 947 | entries: if the hashing function is ill-conditioned, rehashing is not |
| 948 | likely to improve it. */ |
| 949 | |
| 950 | if (table->n_buckets_used |
| 951 | > table->tuning->growth_threshold * table->n_buckets) |
| 952 | { |
| 953 | /* Check more fully, before starting real work. If tuning arguments |
| 954 | became invalid, the second check will rely on proper defaults. */ |
| 955 | check_tuning (table); |
| 956 | if (table->n_buckets_used |
| 957 | > table->tuning->growth_threshold * table->n_buckets) |
| 958 | { |
| 959 | const Hash_tuning *tuning = table->tuning; |
| 960 | unsigned candidate |
| 961 | = (unsigned) (tuning->is_n_buckets |
| 962 | ? (table->n_buckets * tuning->growth_factor) |
| 963 | : (table->n_buckets * tuning->growth_factor |
| 964 | * tuning->growth_threshold)); |
| 965 | |
| 966 | /* If the rehash fails, arrange to return NULL. */ |
| 967 | if (!hash_rehash (table, candidate)) |
| 968 | entry = NULL; |
| 969 | } |
| 970 | } |
| 971 | |
| 972 | return (void *) entry; |
| 973 | } |
| 974 | |
| 975 | /* If ENTRY is already in the table, remove it and return the just-deleted |
| 976 | data (the user may want to deallocate its storage). If ENTRY is not in the |
| 977 | table, don't modify the table and return NULL. */ |
| 978 | |
| 979 | void * |
| 980 | hash_delete (Hash_table *table, const void *entry) |
| 981 | { |
| 982 | void *data; |
| 983 | struct hash_entry *bucket; |
| 984 | |
| 985 | data = hash_find_entry (table, entry, &bucket, true); |
| 986 | if (!data) |
| 987 | return NULL; |
| 988 | |
| 989 | table->n_entries--; |
| 990 | if (!bucket->data) |
| 991 | { |
| 992 | table->n_buckets_used--; |
| 993 | |
| 994 | /* If the shrink threshold of the buckets in use has been reached, |
| 995 | rehash into a smaller table. */ |
| 996 | |
| 997 | if (table->n_buckets_used |
| 998 | < table->tuning->shrink_threshold * table->n_buckets) |
| 999 | { |
| 1000 | /* Check more fully, before starting real work. If tuning arguments |
| 1001 | became invalid, the second check will rely on proper defaults. */ |
| 1002 | check_tuning (table); |
| 1003 | if (table->n_buckets_used |
| 1004 | < table->tuning->shrink_threshold * table->n_buckets) |
| 1005 | { |
| 1006 | const Hash_tuning *tuning = table->tuning; |
| 1007 | unsigned candidate |
| 1008 | = (unsigned) (tuning->is_n_buckets |
| 1009 | ? table->n_buckets * tuning->shrink_factor |
| 1010 | : (table->n_buckets * tuning->shrink_factor |
| 1011 | * tuning->growth_threshold)); |
| 1012 | |
| 1013 | hash_rehash (table, candidate); |
| 1014 | } |
| 1015 | } |
| 1016 | } |
| 1017 | |
| 1018 | return data; |
| 1019 | } |
| 1020 | |
| 1021 | /* Testing. */ |
| 1022 | |
| 1023 | #if TESTING |
| 1024 | |
| 1025 | void |
| 1026 | hash_print (const Hash_table *table) |
| 1027 | { |
| 1028 | struct hash_entry *bucket; |
| 1029 | |
| 1030 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) |
| 1031 | { |
| 1032 | struct hash_entry *cursor; |
| 1033 | |
| 1034 | if (bucket) |
| 1035 | printf ("%d:\n", bucket - table->bucket); |
| 1036 | |
| 1037 | for (cursor = bucket; cursor; cursor = cursor->next) |
| 1038 | { |
| 1039 | char *s = (char *) cursor->data; |
| 1040 | /* FIXME */ |
| 1041 | if (s) |
| 1042 | printf (" %s\n", s); |
| 1043 | } |
| 1044 | } |
| 1045 | } |
| 1046 | |
| 1047 | #endif /* TESTING */ |