]> git.saurik.com Git - bison.git/blob - lib/hash.c
da73d1560db070f8d8db78a2ea21e092f7d07375
[bison.git] / lib / hash.c
1 /* hash - hashing table processing.
2 Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3 Written by Jim Meyering, 1992.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18
19 /* A generic hash table package. */
20
21 /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
22 of malloc. If you change USE_OBSTACK, you have to recompile! */
23
24 #if HAVE_CONFIG_H
25 # include <config.h>
26 #endif
27 #if HAVE_STDLIB_H
28 # include <stdlib.h>
29 #endif
30 #if HAVE_STDBOOL_H
31 # include <stdbool.h>
32 #else
33 typedef enum {false = 0, true = 1} bool;
34 #endif
35 #include <stdio.h>
36 #include <assert.h>
37
38 #ifndef HAVE_DECL_FREE
39 # error "this configure-time declaration test was not run"
40 #endif
41 #if !HAVE_DECL_FREE
42 void free ();
43 #endif
44
45 #ifndef HAVE_DECL_MALLOC
46 # error "this configure-time declaration test was not run"
47 #endif
48 #if !HAVE_DECL_MALLOC
49 char *malloc ();
50 #endif
51
52 #if USE_OBSTACK
53 # include "obstack.h"
54 # ifndef obstack_chunk_alloc
55 # define obstack_chunk_alloc malloc
56 # endif
57 # ifndef obstack_chunk_free
58 # define obstack_chunk_free free
59 # endif
60 #endif
61
62 #include "hash.h"
63
64 /* A hash table contains many internal entries, each holding a pointer to
65 some user provided data (also called a user entry). An entry indistinctly
66 refers to both the internal entry and its associated user entry. A user
67 entry contents may be hashed by a randomization function (the hashing
68 function, or just `hasher' for short) into a number (or `slot') between 0
69 and the current table size. At each slot position in the hash table,
70 starts a linked chain of entries for which the user data all hash to this
71 slot. A bucket is the collection of all entries hashing to the same slot.
72
73 A good `hasher' function will distribute entries rather evenly in buckets.
74 In the ideal case, the length of each bucket is roughly the number of
75 entries divided by the table size. Finding the slot for a data is usually
76 done in constant time by the `hasher', and the later finding of a precise
77 entry is linear in time with the size of the bucket. Consequently, a
78 larger hash table size (that is, a larger number of buckets) is prone to
79 yielding shorter chains, *given* the `hasher' function behaves properly.
80
81 Long buckets slow down the lookup algorithm. One might use big hash table
82 sizes in hope to reduce the average length of buckets, but this might
83 become inordinate, as unused slots in the hash table take some space. The
84 best bet is to make sure you are using a good `hasher' function (beware
85 that those are not that easy to write! :-), and to use a table size
86 larger than the actual number of entries. */
87
88 /* If an insertion makes the ratio of nonempty buckets to table size larger
89 than the growth threshold (a number between 0.0 and 1.0), then increase
90 the table size by multiplying by the growth factor (a number greater than
91 1.0). The growth threshold defaults to 0.8, and the growth factor
92 defaults to 1.414, meaning that the table will have doubled its size
93 every second time 80% of the buckets get used. */
94 #define DEFAULT_GROWTH_THRESHOLD 0.8
95 #define DEFAULT_GROWTH_FACTOR 1.414
96
97 /* If a deletion empties a bucket and causes the ratio of used buckets to
98 table size to become smaller than the shrink threshold (a number between
99 0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
100 number greater than the shrink threshold but smaller than 1.0). The shrink
101 threshold and factor default to 0.0 and 1.0, meaning that the table never
102 shrinks. */
103 #define DEFAULT_SHRINK_THRESHOLD 0.0
104 #define DEFAULT_SHRINK_FACTOR 1.0
105
106 /* Use this to initialize or reset a TUNING structure to
107 some sensible values. */
108 static const Hash_tuning default_tuning =
109 {
110 DEFAULT_SHRINK_THRESHOLD,
111 DEFAULT_SHRINK_FACTOR,
112 DEFAULT_GROWTH_THRESHOLD,
113 DEFAULT_GROWTH_FACTOR,
114 false
115 };
116
117 /* Information and lookup. */
118
119 /* The following few functions provide information about the overall hash
120 table organization: the number of entries, number of buckets and maximum
121 length of buckets. */
122
123 /* Return the number of buckets in the hash table. The table size, the total
124 number of buckets (used plus unused), or the maximum number of slots, are
125 the same quantity. */
126
127 unsigned
128 hash_get_n_buckets (const Hash_table *table)
129 {
130 return table->n_buckets;
131 }
132
133 /* Return the number of slots in use (non-empty buckets). */
134
135 unsigned
136 hash_get_n_buckets_used (const Hash_table *table)
137 {
138 return table->n_buckets_used;
139 }
140
141 /* Return the number of active entries. */
142
143 unsigned
144 hash_get_n_entries (const Hash_table *table)
145 {
146 return table->n_entries;
147 }
148
149 /* Return the length of the longest chain (bucket). */
150
151 unsigned
152 hash_get_max_bucket_length (const Hash_table *table)
153 {
154 struct hash_entry *bucket;
155 unsigned max_bucket_length = 0;
156
157 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
158 {
159 if (bucket->data)
160 {
161 struct hash_entry *cursor = bucket;
162 unsigned bucket_length = 1;
163
164 while (cursor = cursor->next, cursor)
165 bucket_length++;
166
167 if (bucket_length > max_bucket_length)
168 max_bucket_length = bucket_length;
169 }
170 }
171
172 return max_bucket_length;
173 }
174
175 /* Do a mild validation of a hash table, by traversing it and checking two
176 statistics. */
177
178 bool
179 hash_table_ok (const Hash_table *table)
180 {
181 struct hash_entry *bucket;
182 unsigned n_buckets_used = 0;
183 unsigned n_entries = 0;
184
185 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
186 {
187 if (bucket->data)
188 {
189 struct hash_entry *cursor = bucket;
190
191 /* Count bucket head. */
192 n_buckets_used++;
193 n_entries++;
194
195 /* Count bucket overflow. */
196 while (cursor = cursor->next, cursor)
197 n_entries++;
198 }
199 }
200
201 if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
202 return true;
203
204 return false;
205 }
206
207 void
208 hash_print_statistics (const Hash_table *table, FILE *stream)
209 {
210 unsigned n_entries = hash_get_n_entries (table);
211 unsigned n_buckets = hash_get_n_buckets (table);
212 unsigned n_buckets_used = hash_get_n_buckets_used (table);
213 unsigned max_bucket_length = hash_get_max_bucket_length (table);
214
215 fprintf (stream, "# entries: %u\n", n_entries);
216 fprintf (stream, "# buckets: %u\n", n_buckets);
217 fprintf (stream, "# buckets used: %u (%.2f%%)\n", n_buckets_used,
218 (100.0 * n_buckets_used) / n_buckets);
219 fprintf (stream, "max bucket length: %u\n", max_bucket_length);
220 }
221
222 /* If ENTRY matches an entry already in the hash table, return the
223 entry from the table. Otherwise, return NULL. */
224
225 void *
226 hash_lookup (const Hash_table *table, const void *entry)
227 {
228 struct hash_entry *bucket
229 = table->bucket + table->hasher (entry, table->n_buckets);
230 struct hash_entry *cursor;
231
232 assert (bucket < table->bucket_limit);
233
234 if (bucket->data == NULL)
235 return NULL;
236
237 for (cursor = bucket; cursor; cursor = cursor->next)
238 if (table->comparator (entry, cursor->data))
239 return cursor->data;
240
241 return NULL;
242 }
243
244 /* Walking. */
245
246 /* The functions in this page traverse the hash table and process the
247 contained entries. For the traversal to work properly, the hash table
248 should not be resized nor modified while any particular entry is being
249 processed. In particular, entries should not be added or removed. */
250
251 /* Return the first data in the table, or NULL if the table is empty. */
252
253 void *
254 hash_get_first (const Hash_table *table)
255 {
256 struct hash_entry *bucket;
257
258 if (table->n_entries == 0)
259 return NULL;
260
261 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
262 if (bucket->data)
263 return bucket->data;
264
265 assert (0);
266 return NULL;
267 }
268
269 /* Return the user data for the entry following ENTRY, where ENTRY has been
270 returned by a previous call to either `hash_get_first' or `hash_get_next'.
271 Return NULL if there are no more entries. */
272
273 void *
274 hash_get_next (const Hash_table *table, const void *entry)
275 {
276 struct hash_entry *bucket
277 = table->bucket + table->hasher (entry, table->n_buckets);
278 struct hash_entry *cursor;
279
280 assert (bucket < table->bucket_limit);
281
282 /* Find next entry in the same bucket. */
283 for (cursor = bucket; cursor; cursor = cursor->next)
284 if (cursor->data == entry && cursor->next)
285 return cursor->next->data;
286
287 /* Find first entry in any subsequent bucket. */
288 while (++bucket < table->bucket_limit)
289 if (bucket->data)
290 return bucket->data;
291
292 /* None found. */
293 return NULL;
294 }
295
296 /* Fill BUFFER with pointers to active user entries in the hash table, then
297 return the number of pointers copied. Do not copy more than BUFFER_SIZE
298 pointers. */
299
300 unsigned
301 hash_get_entries (const Hash_table *table, void **buffer,
302 unsigned buffer_size)
303 {
304 unsigned counter = 0;
305 struct hash_entry *bucket;
306 struct hash_entry *cursor;
307
308 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
309 {
310 if (bucket->data)
311 {
312 for (cursor = bucket; cursor; cursor = cursor->next)
313 {
314 if (counter >= buffer_size)
315 return counter;
316 buffer[counter++] = cursor->data;
317 }
318 }
319 }
320
321 return counter;
322 }
323
324 /* Call a PROCESSOR function for each entry of a hash table, and return the
325 number of entries for which the processor function returned success. A
326 pointer to some PROCESSOR_DATA which will be made available to each call to
327 the processor function. The PROCESSOR accepts two arguments: the first is
328 the user entry being walked into, the second is the value of PROCESSOR_DATA
329 as received. The walking continue for as long as the PROCESSOR function
330 returns nonzero. When it returns zero, the walking is interrupted. */
331
332 unsigned
333 hash_do_for_each (const Hash_table *table, Hash_processor processor,
334 void *processor_data)
335 {
336 unsigned counter = 0;
337 struct hash_entry *bucket;
338 struct hash_entry *cursor;
339
340 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
341 {
342 if (bucket->data)
343 {
344 for (cursor = bucket; cursor; cursor = cursor->next)
345 {
346 if (!(*processor) (cursor->data, processor_data))
347 return counter;
348 counter++;
349 }
350 }
351 }
352
353 return counter;
354 }
355
356 /* Allocation and clean-up. */
357
358 /* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
359 This is a convenience routine for constructing other hashing functions. */
360
361 #if USE_DIFF_HASH
362
363 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
364 B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
365 Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash
366 algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
367 may not be good for your application." */
368
369 unsigned
370 hash_string (const char *string, unsigned n_buckets)
371 {
372 # ifndef CHAR_BIT
373 # define CHAR_BIT 8
374 # endif
375 # define ROTATE_LEFT(Value, Shift) \
376 ((Value) << (Shift) | (Value) >> ((sizeof (unsigned) * CHAR_BIT) - (Shift)))
377 # define HASH_ONE_CHAR(Value, Byte) \
378 ((Byte) + ROTATE_LEFT (Value, 7))
379
380 unsigned value = 0;
381
382 for (; *string; string++)
383 value = HASH_ONE_CHAR (value, *(const unsigned char *) string);
384 return value % n_buckets;
385
386 # undef ROTATE_LEFT
387 # undef HASH_ONE_CHAR
388 }
389
390 #else /* not USE_DIFF_HASH */
391
392 /* This one comes from `recode', and performs a bit better than the above as
393 per a few experiments. It is inspired from a hashing routine found in the
394 very old Cyber `snoop', itself written in typical Greg Mansfield style.
395 (By the way, what happened to this excellent man? Is he still alive?) */
396
397 unsigned
398 hash_string (const char *string, unsigned n_buckets)
399 {
400 unsigned value = 0;
401
402 while (*string)
403 value = ((value * 31 + (int) *(const unsigned char *) string++)
404 % n_buckets);
405 return value;
406 }
407
408 #endif /* not USE_DIFF_HASH */
409
410 /* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd
411 number at least equal to 11. */
412
413 static bool
414 is_prime (unsigned long candidate)
415 {
416 unsigned long divisor = 3;
417 unsigned long square = divisor * divisor;
418
419 while (square < candidate && (candidate % divisor))
420 {
421 divisor++;
422 square += 4 * divisor;
423 divisor++;
424 }
425
426 return (candidate % divisor ? true : false);
427 }
428
429 /* Round a given CANDIDATE number up to the nearest prime, and return that
430 prime. Primes lower than 10 are merely skipped. */
431
432 static unsigned long
433 next_prime (unsigned long candidate)
434 {
435 /* Skip small primes. */
436 if (candidate < 10)
437 candidate = 10;
438
439 /* Make it definitely odd. */
440 candidate |= 1;
441
442 while (!is_prime (candidate))
443 candidate += 2;
444
445 return candidate;
446 }
447
448 void
449 hash_reset_tuning (Hash_tuning *tuning)
450 {
451 *tuning = default_tuning;
452 }
453
454 /* For the given hash TABLE, check the user supplied tuning structure for
455 reasonable values, and return true if there is no gross error with it.
456 Otherwise, definitively reset the TUNING field to some acceptable default
457 in the hash table (that is, the user loses the right of further modifying
458 tuning arguments), and return false. */
459
460 static bool
461 check_tuning (Hash_table *table)
462 {
463 const Hash_tuning *tuning = table->tuning;
464
465 if (tuning->growth_threshold > 0.0
466 && tuning->growth_threshold < 1.0
467 && tuning->growth_factor > 1.0
468 && tuning->shrink_threshold >= 0.0
469 && tuning->shrink_threshold < 1.0
470 && tuning->shrink_factor > tuning->shrink_threshold
471 && tuning->shrink_factor <= 1.0
472 && tuning->shrink_threshold < tuning->growth_threshold)
473 return true;
474
475 table->tuning = &default_tuning;
476 return false;
477 }
478
479 /* Allocate and return a new hash table, or NULL upon failure. The initial
480 number of buckets is automatically selected so as to _guarantee_ that you
481 may insert at least CANDIDATE different user entries before any growth of
482 the hash table size occurs. So, if have a reasonably tight a-priori upper
483 bound on the number of entries you intend to insert in the hash table, you
484 may save some table memory and insertion time, by specifying it here. If
485 the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
486 argument has its meaning changed to the wanted number of buckets.
487
488 TUNING points to a structure of user-supplied values, in case some fine
489 tuning is wanted over the default behavior of the hasher. If TUNING is
490 NULL, the default tuning parameters are used instead.
491
492 The user-supplied HASHER function should be provided. It accepts two
493 arguments ENTRY and TABLE_SIZE. It computes, by hashing ENTRY contents, a
494 slot number for that entry which should be in the range 0..TABLE_SIZE-1.
495 This slot number is then returned.
496
497 The user-supplied COMPARATOR function should be provided. It accepts two
498 arguments pointing to user data, it then returns true for a pair of entries
499 that compare equal, or false otherwise. This function is internally called
500 on entries which are already known to hash to the same bucket index.
501
502 The user-supplied DATA_FREER function, when not NULL, may be later called
503 with the user data as an argument, just before the entry containing the
504 data gets freed. This happens from within `hash_free' or `hash_clear'.
505 You should specify this function only if you want these functions to free
506 all of your `data' data. This is typically the case when your data is
507 simply an auxiliary struct that you have malloc'd to aggregate several
508 values. */
509
510 Hash_table *
511 hash_initialize (unsigned candidate, const Hash_tuning *tuning,
512 Hash_hasher hasher, Hash_comparator comparator,
513 Hash_data_freer data_freer)
514 {
515 Hash_table *table;
516 struct hash_entry *bucket;
517
518 if (hasher == NULL || comparator == NULL)
519 return NULL;
520
521 table = (Hash_table *) malloc (sizeof (Hash_table));
522 if (table == NULL)
523 return NULL;
524
525 if (!tuning)
526 tuning = &default_tuning;
527 table->tuning = tuning;
528 if (!check_tuning (table))
529 {
530 /* Fail if the tuning options are invalid. This is the only occasion
531 when the user gets some feedback about it. Once the table is created,
532 if the user provides invalid tuning options, we silently revert to
533 using the defaults, and ignore further request to change the tuning
534 options. */
535 free (table);
536 return NULL;
537 }
538
539 table->n_buckets
540 = next_prime (tuning->is_n_buckets ? candidate
541 : (unsigned) (candidate / tuning->growth_threshold));
542
543 table->bucket = (struct hash_entry *)
544 malloc (table->n_buckets * sizeof (struct hash_entry));
545 if (table->bucket == NULL)
546 {
547 free (table);
548 return NULL;
549 }
550 table->bucket_limit = table->bucket + table->n_buckets;
551
552 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
553 {
554 bucket->data = NULL;
555 bucket->next = NULL;
556 }
557 table->n_buckets_used = 0;
558 table->n_entries = 0;
559
560 table->hasher = hasher;
561 table->comparator = comparator;
562 table->data_freer = data_freer;
563
564 table->free_entry_list = NULL;
565 #if USE_OBSTACK
566 obstack_init (&table->entry_stack);
567 #endif
568 return table;
569 }
570
571 /* Make all buckets empty, placing any chained entries on the free list.
572 Apply the user-specified function data_freer (if any) to the datas of any
573 affected entries. */
574
575 void
576 hash_clear (Hash_table *table)
577 {
578 struct hash_entry *bucket;
579 struct hash_entry *cursor;
580
581 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
582 {
583 if (bucket->data)
584 {
585 /* Free the bucket overflow. */
586 for (cursor = bucket->next; cursor; cursor = cursor->next)
587 {
588 if (table->data_freer)
589 (*table->data_freer) (cursor->data);
590 cursor->data = NULL;
591
592 /* Relinking is done one entry at a time, as it is to be expected
593 that overflows are either rare or short. */
594 cursor->next = table->free_entry_list;
595 table->free_entry_list = cursor;
596 }
597
598 /* Free the bucket head. */
599 if (table->data_freer)
600 (*table->data_freer) (bucket->data);
601 bucket->data = NULL;
602 bucket->next = NULL;
603 }
604 }
605
606 table->n_buckets_used = 0;
607 table->n_entries = 0;
608 }
609
610 /* Reclaim all storage associated with a hash table. If a data_freer
611 function has been supplied by the user when the hash table was created,
612 this function applies it to the data of each entry before freeing that
613 entry. */
614
615 void
616 hash_free (Hash_table *table)
617 {
618 struct hash_entry *bucket;
619 struct hash_entry *cursor;
620 struct hash_entry *next;
621
622 /* Call the user data_freer function. */
623 if (table->data_freer && table->n_entries)
624 {
625 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
626 {
627 if (bucket->data)
628 {
629 for (cursor = bucket; cursor; cursor = cursor->next)
630 {
631 (*table->data_freer) (cursor->data);
632 }
633 }
634 }
635 }
636
637 #if USE_OBSTACK
638
639 obstack_free (&table->entry_stack, NULL);
640
641 #else
642
643 /* Free all bucket overflowed entries. */
644 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
645 {
646 for (cursor = bucket->next; cursor; cursor = next)
647 {
648 next = cursor->next;
649 free (cursor);
650 }
651 }
652
653 /* Also reclaim the internal list of previously freed entries. */
654 for (cursor = table->free_entry_list; cursor; cursor = next)
655 {
656 next = cursor->next;
657 free (cursor);
658 }
659
660 #endif
661
662 /* Free the remainder of the hash table structure. */
663 free (table->bucket);
664 free (table);
665 }
666
667 /* Insertion and deletion. */
668
669 /* Get a new hash entry for a bucket overflow, possibly by reclying a
670 previously freed one. If this is not possible, allocate a new one. */
671
672 static struct hash_entry *
673 allocate_entry (Hash_table *table)
674 {
675 struct hash_entry *new;
676
677 if (table->free_entry_list)
678 {
679 new = table->free_entry_list;
680 table->free_entry_list = new->next;
681 }
682 else
683 {
684 #if USE_OBSTACK
685 new = (struct hash_entry *)
686 obstack_alloc (&table->entry_stack, sizeof (struct hash_entry));
687 #else
688 new = (struct hash_entry *) malloc (sizeof (struct hash_entry));
689 #endif
690 }
691
692 return new;
693 }
694
695 /* Free a hash entry which was part of some bucket overflow,
696 saving it for later recycling. */
697
698 static void
699 free_entry (Hash_table *table, struct hash_entry *entry)
700 {
701 entry->data = NULL;
702 entry->next = table->free_entry_list;
703 table->free_entry_list = entry;
704 }
705
706 /* This private function is used to help with insertion and deletion. When
707 ENTRY matches an entry in the table, return a pointer to the corresponding
708 user data and set *BUCKET_HEAD to the head of the selected bucket.
709 Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in
710 the table, unlink the matching entry. */
711
712 static void *
713 hash_find_entry (Hash_table *table, const void *entry,
714 struct hash_entry **bucket_head, bool delete)
715 {
716 struct hash_entry *bucket
717 = table->bucket + table->hasher (entry, table->n_buckets);
718 struct hash_entry *cursor;
719
720 assert (bucket < table->bucket_limit);
721 *bucket_head = bucket;
722
723 /* Test for empty bucket. */
724 if (bucket->data == NULL)
725 return NULL;
726
727 /* See if the entry is the first in the bucket. */
728 if ((*table->comparator) (entry, bucket->data))
729 {
730 void *data = bucket->data;
731
732 if (delete)
733 {
734 if (bucket->next)
735 {
736 struct hash_entry *next = bucket->next;
737
738 /* Bump the first overflow entry into the bucket head, then save
739 the previous first overflow entry for later recycling. */
740 *bucket = *next;
741 free_entry (table, next);
742 }
743 else
744 {
745 bucket->data = NULL;
746 }
747 }
748
749 return data;
750 }
751
752 /* Scan the bucket overflow. */
753 for (cursor = bucket; cursor->next; cursor = cursor->next)
754 {
755 if ((*table->comparator) (entry, cursor->next->data))
756 {
757 void *data = cursor->next->data;
758
759 if (delete)
760 {
761 struct hash_entry *next = cursor->next;
762
763 /* Unlink the entry to delete, then save the freed entry for later
764 recycling. */
765 cursor->next = next->next;
766 free_entry (table, next);
767 }
768
769 return data;
770 }
771 }
772
773 /* No entry found. */
774 return NULL;
775 }
776
777 /* For an already existing hash table, change the number of buckets through
778 specifying CANDIDATE. The contents of the hash table are preserved. The
779 new number of buckets is automatically selected so as to _guarantee_ that
780 the table may receive at least CANDIDATE different user entries, including
781 those already in the table, before any other growth of the hash table size
782 occurs. If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
783 exact number of buckets desired. */
784
785 bool
786 hash_rehash (Hash_table *table, unsigned candidate)
787 {
788 Hash_table *new_table;
789 struct hash_entry *bucket;
790 struct hash_entry *cursor;
791 struct hash_entry *next;
792
793 new_table = hash_initialize (candidate, table->tuning, table->hasher,
794 table->comparator, table->data_freer);
795 if (new_table == NULL)
796 return false;
797
798 /* Merely reuse the extra old space into the new table. */
799 #if USE_OBSTACK
800 obstack_free (&new_table->entry_stack, NULL);
801 new_table->entry_stack = table->entry_stack;
802 #endif
803 new_table->free_entry_list = table->free_entry_list;
804
805 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
806 if (bucket->data)
807 for (cursor = bucket; cursor; cursor = next)
808 {
809 void *data = cursor->data;
810 struct hash_entry *new_bucket
811 = (new_table->bucket
812 + new_table->hasher (data, new_table->n_buckets));
813
814 assert (new_bucket < new_table->bucket_limit);
815 next = cursor->next;
816
817 if (new_bucket->data)
818 {
819 if (cursor == bucket)
820 {
821 /* Allocate or recycle an entry, when moving from a bucket
822 header into a bucket overflow. */
823 struct hash_entry *new_entry = allocate_entry (new_table);
824
825 if (new_entry == NULL)
826 return false;
827
828 new_entry->data = data;
829 new_entry->next = new_bucket->next;
830 new_bucket->next = new_entry;
831 }
832 else
833 {
834 /* Merely relink an existing entry, when moving from a
835 bucket overflow into a bucket overflow. */
836 cursor->next = new_bucket->next;
837 new_bucket->next = cursor;
838 }
839 }
840 else
841 {
842 /* Free an existing entry, when moving from a bucket
843 overflow into a bucket header. Also take care of the
844 simple case of moving from a bucket header into a bucket
845 header. */
846 new_bucket->data = data;
847 new_table->n_buckets_used++;
848 if (cursor != bucket)
849 free_entry (new_table, cursor);
850 }
851 }
852
853 free (table->bucket);
854 table->bucket = new_table->bucket;
855 table->bucket_limit = new_table->bucket_limit;
856 table->n_buckets = new_table->n_buckets;
857 table->n_buckets_used = new_table->n_buckets_used;
858 table->free_entry_list = new_table->free_entry_list;
859 /* table->n_entries already holds its value. */
860 #if USE_OBSTACK
861 table->entry_stack = new_table->entry_stack;
862 #endif
863 free (new_table);
864
865 return true;
866 }
867
868 /* If ENTRY matches an entry already in the hash table, return the pointer
869 to the entry from the table. Otherwise, insert ENTRY and return ENTRY.
870 Return NULL if the storage required for insertion cannot be allocated. */
871
872 void *
873 hash_insert (Hash_table *table, const void *entry)
874 {
875 void *data;
876 struct hash_entry *bucket;
877
878 assert (entry); /* cannot insert a NULL entry */
879
880 /* If there's a matching entry already in the table, return that. */
881 if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
882 return data;
883
884 /* ENTRY is not matched, it should be inserted. */
885
886 if (bucket->data)
887 {
888 struct hash_entry *new_entry = allocate_entry (table);
889
890 if (new_entry == NULL)
891 return NULL;
892
893 /* Add ENTRY in the overflow of the bucket. */
894
895 new_entry->data = (void *) entry;
896 new_entry->next = bucket->next;
897 bucket->next = new_entry;
898 table->n_entries++;
899 return (void *) entry;
900 }
901
902 /* Add ENTRY right in the bucket head. */
903
904 bucket->data = (void *) entry;
905 table->n_entries++;
906 table->n_buckets_used++;
907
908 /* If the growth threshold of the buckets in use has been reached, increase
909 the table size and rehash. There's no point in checking the number of
910 entries: if the hashing function is ill-conditioned, rehashing is not
911 likely to improve it. */
912
913 if (table->n_buckets_used
914 > table->tuning->growth_threshold * table->n_buckets)
915 {
916 /* Check more fully, before starting real work. If tuning arguments
917 became invalid, the second check will rely on proper defaults. */
918 check_tuning (table);
919 if (table->n_buckets_used
920 > table->tuning->growth_threshold * table->n_buckets)
921 {
922 const Hash_tuning *tuning = table->tuning;
923 unsigned candidate
924 = (unsigned) (tuning->is_n_buckets
925 ? (table->n_buckets * tuning->growth_factor)
926 : (table->n_buckets * tuning->growth_factor
927 * tuning->growth_threshold));
928
929 /* If the rehash fails, arrange to return NULL. */
930 if (!hash_rehash (table, candidate))
931 entry = NULL;
932 }
933 }
934
935 return (void *) entry;
936 }
937
938 /* If ENTRY is already in the table, remove it and return the just-deleted
939 data (the user may want to deallocate its storage). If ENTRY is not in the
940 table, don't modify the table and return NULL. */
941
942 void *
943 hash_delete (Hash_table *table, const void *entry)
944 {
945 void *data;
946 struct hash_entry *bucket;
947
948 data = hash_find_entry (table, entry, &bucket, true);
949 if (!data)
950 return NULL;
951
952 table->n_entries--;
953 if (!bucket->data)
954 {
955 table->n_buckets_used--;
956
957 /* If the shrink threshold of the buckets in use has been reached,
958 rehash into a smaller table. */
959
960 if (table->n_buckets_used
961 < table->tuning->shrink_threshold * table->n_buckets)
962 {
963 /* Check more fully, before starting real work. If tuning arguments
964 became invalid, the second check will rely on proper defaults. */
965 check_tuning (table);
966 if (table->n_buckets_used
967 < table->tuning->shrink_threshold * table->n_buckets)
968 {
969 const Hash_tuning *tuning = table->tuning;
970 unsigned candidate
971 = (unsigned) (tuning->is_n_buckets
972 ? table->n_buckets * tuning->shrink_factor
973 : (table->n_buckets * tuning->shrink_factor
974 * tuning->growth_threshold));
975
976 hash_rehash (table, candidate);
977 }
978 }
979 }
980
981 return data;
982 }
983
984 /* Testing. */
985
986 #if TESTING
987
988 void
989 hash_print (const Hash_table *table)
990 {
991 struct hash_entry *bucket;
992
993 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
994 {
995 struct hash_entry *cursor;
996
997 if (bucket)
998 printf ("%d:\n", slot);
999
1000 for (cursor = bucket; cursor; cursor = cursor->next)
1001 {
1002 char *s = (char *) cursor->data;
1003 /* FIXME */
1004 printf (" %s\n", s);
1005 }
1006 }
1007 }
1008
1009 #endif /* TESTING */