]> git.saurik.com Git - bison.git/blob - lib/hash.c
%expect-violations are now just warnings, reverting to old behavior.
[bison.git] / lib / hash.c
1 /* hash - hashing table processing.
2
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 Free Software
4 Foundation, Inc.
5
6 Written by Jim Meyering, 1992.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software Foundation,
20 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
21
22 /* A generic hash table package. */
23
24 /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
25 of malloc. If you change USE_OBSTACK, you have to recompile! */
26
27 #if HAVE_CONFIG_H
28 # include <config.h>
29 #endif
30 #if HAVE_STDLIB_H
31 # include <stdlib.h>
32 #endif
33
34 #if HAVE_STDBOOL_H
35 # include <stdbool.h>
36 #else
37 # ifndef __bool_true_false_are_defined
38 # if ! HAVE__BOOL
39 # ifdef __cplusplus
40 typedef bool _Bool;
41 # else
42 typedef unsigned char _Bool;
43 # endif
44 # endif
45 # define bool _Bool
46 # define false 0
47 # define true 1
48 # define __bool_true_false_are_defined 1
49 # endif
50 #endif
51
52 #include <stdio.h>
53
54 #ifndef HAVE_DECL_FREE
55 "this configure-time declaration test was not run"
56 #endif
57 #if !HAVE_DECL_FREE
58 void free ();
59 #endif
60
61 #ifndef HAVE_DECL_MALLOC
62 "this configure-time declaration test was not run"
63 #endif
64 #if !HAVE_DECL_MALLOC
65 char *malloc ();
66 #endif
67
68 #if USE_OBSTACK
69 # include "obstack.h"
70 # ifndef obstack_chunk_alloc
71 # define obstack_chunk_alloc malloc
72 # endif
73 # ifndef obstack_chunk_free
74 # define obstack_chunk_free free
75 # endif
76 #endif
77
78 #include "hash.h"
79
80 struct hash_table
81 {
82 /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
83 for a possibility of N_BUCKETS. Among those, N_BUCKETS_USED buckets
84 are not empty, there are N_ENTRIES active entries in the table. */
85 struct hash_entry *bucket;
86 struct hash_entry *bucket_limit;
87 unsigned n_buckets;
88 unsigned n_buckets_used;
89 unsigned n_entries;
90
91 /* Tuning arguments, kept in a physicaly separate structure. */
92 const Hash_tuning *tuning;
93
94 /* Three functions are given to `hash_initialize', see the documentation
95 block for this function. In a word, HASHER randomizes a user entry
96 into a number up from 0 up to some maximum minus 1; COMPARATOR returns
97 true if two user entries compare equally; and DATA_FREER is the cleanup
98 function for a user entry. */
99 Hash_hasher hasher;
100 Hash_comparator comparator;
101 Hash_data_freer data_freer;
102
103 /* A linked list of freed struct hash_entry structs. */
104 struct hash_entry *free_entry_list;
105
106 #if USE_OBSTACK
107 /* Whenever obstacks are used, it is possible to allocate all overflowed
108 entries into a single stack, so they all can be freed in a single
109 operation. It is not clear if the speedup is worth the trouble. */
110 struct obstack entry_stack;
111 #endif
112 };
113
114 /* A hash table contains many internal entries, each holding a pointer to
115 some user provided data (also called a user entry). An entry indistinctly
116 refers to both the internal entry and its associated user entry. A user
117 entry contents may be hashed by a randomization function (the hashing
118 function, or just `hasher' for short) into a number (or `slot') between 0
119 and the current table size. At each slot position in the hash table,
120 starts a linked chain of entries for which the user data all hash to this
121 slot. A bucket is the collection of all entries hashing to the same slot.
122
123 A good `hasher' function will distribute entries rather evenly in buckets.
124 In the ideal case, the length of each bucket is roughly the number of
125 entries divided by the table size. Finding the slot for a data is usually
126 done in constant time by the `hasher', and the later finding of a precise
127 entry is linear in time with the size of the bucket. Consequently, a
128 larger hash table size (that is, a larger number of buckets) is prone to
129 yielding shorter chains, *given* the `hasher' function behaves properly.
130
131 Long buckets slow down the lookup algorithm. One might use big hash table
132 sizes in hope to reduce the average length of buckets, but this might
133 become inordinate, as unused slots in the hash table take some space. The
134 best bet is to make sure you are using a good `hasher' function (beware
135 that those are not that easy to write! :-), and to use a table size
136 larger than the actual number of entries. */
137
138 /* If an insertion makes the ratio of nonempty buckets to table size larger
139 than the growth threshold (a number between 0.0 and 1.0), then increase
140 the table size by multiplying by the growth factor (a number greater than
141 1.0). The growth threshold defaults to 0.8, and the growth factor
142 defaults to 1.414, meaning that the table will have doubled its size
143 every second time 80% of the buckets get used. */
144 #define DEFAULT_GROWTH_THRESHOLD 0.8
145 #define DEFAULT_GROWTH_FACTOR 1.414
146
147 /* If a deletion empties a bucket and causes the ratio of used buckets to
148 table size to become smaller than the shrink threshold (a number between
149 0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
150 number greater than the shrink threshold but smaller than 1.0). The shrink
151 threshold and factor default to 0.0 and 1.0, meaning that the table never
152 shrinks. */
153 #define DEFAULT_SHRINK_THRESHOLD 0.0
154 #define DEFAULT_SHRINK_FACTOR 1.0
155
156 /* Use this to initialize or reset a TUNING structure to
157 some sensible values. */
158 static const Hash_tuning default_tuning =
159 {
160 DEFAULT_SHRINK_THRESHOLD,
161 DEFAULT_SHRINK_FACTOR,
162 DEFAULT_GROWTH_THRESHOLD,
163 DEFAULT_GROWTH_FACTOR,
164 false
165 };
166
167 /* Information and lookup. */
168
169 /* The following few functions provide information about the overall hash
170 table organization: the number of entries, number of buckets and maximum
171 length of buckets. */
172
173 /* Return the number of buckets in the hash table. The table size, the total
174 number of buckets (used plus unused), or the maximum number of slots, are
175 the same quantity. */
176
177 unsigned
178 hash_get_n_buckets (const Hash_table *table)
179 {
180 return table->n_buckets;
181 }
182
183 /* Return the number of slots in use (non-empty buckets). */
184
185 unsigned
186 hash_get_n_buckets_used (const Hash_table *table)
187 {
188 return table->n_buckets_used;
189 }
190
191 /* Return the number of active entries. */
192
193 unsigned
194 hash_get_n_entries (const Hash_table *table)
195 {
196 return table->n_entries;
197 }
198
199 /* Return the length of the longest chain (bucket). */
200
201 unsigned
202 hash_get_max_bucket_length (const Hash_table *table)
203 {
204 struct hash_entry *bucket;
205 unsigned max_bucket_length = 0;
206
207 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
208 {
209 if (bucket->data)
210 {
211 struct hash_entry *cursor = bucket;
212 unsigned bucket_length = 1;
213
214 while (cursor = cursor->next, cursor)
215 bucket_length++;
216
217 if (bucket_length > max_bucket_length)
218 max_bucket_length = bucket_length;
219 }
220 }
221
222 return max_bucket_length;
223 }
224
225 /* Do a mild validation of a hash table, by traversing it and checking two
226 statistics. */
227
228 bool
229 hash_table_ok (const Hash_table *table)
230 {
231 struct hash_entry *bucket;
232 unsigned n_buckets_used = 0;
233 unsigned n_entries = 0;
234
235 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
236 {
237 if (bucket->data)
238 {
239 struct hash_entry *cursor = bucket;
240
241 /* Count bucket head. */
242 n_buckets_used++;
243 n_entries++;
244
245 /* Count bucket overflow. */
246 while (cursor = cursor->next, cursor)
247 n_entries++;
248 }
249 }
250
251 if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
252 return true;
253
254 return false;
255 }
256
257 void
258 hash_print_statistics (const Hash_table *table, FILE *stream)
259 {
260 unsigned n_entries = hash_get_n_entries (table);
261 unsigned n_buckets = hash_get_n_buckets (table);
262 unsigned n_buckets_used = hash_get_n_buckets_used (table);
263 unsigned max_bucket_length = hash_get_max_bucket_length (table);
264
265 fprintf (stream, "# entries: %u\n", n_entries);
266 fprintf (stream, "# buckets: %u\n", n_buckets);
267 fprintf (stream, "# buckets used: %u (%.2f%%)\n", n_buckets_used,
268 (100.0 * n_buckets_used) / n_buckets);
269 fprintf (stream, "max bucket length: %u\n", max_bucket_length);
270 }
271
272 /* If ENTRY matches an entry already in the hash table, return the
273 entry from the table. Otherwise, return NULL. */
274
275 void *
276 hash_lookup (const Hash_table *table, const void *entry)
277 {
278 struct hash_entry *bucket
279 = table->bucket + table->hasher (entry, table->n_buckets);
280 struct hash_entry *cursor;
281
282 if (! (bucket < table->bucket_limit))
283 abort ();
284
285 if (bucket->data == NULL)
286 return NULL;
287
288 for (cursor = bucket; cursor; cursor = cursor->next)
289 if (table->comparator (entry, cursor->data))
290 return cursor->data;
291
292 return NULL;
293 }
294
295 /* Walking. */
296
297 /* The functions in this page traverse the hash table and process the
298 contained entries. For the traversal to work properly, the hash table
299 should not be resized nor modified while any particular entry is being
300 processed. In particular, entries should not be added or removed. */
301
302 /* Return the first data in the table, or NULL if the table is empty. */
303
304 void *
305 hash_get_first (const Hash_table *table)
306 {
307 struct hash_entry *bucket;
308
309 if (table->n_entries == 0)
310 return NULL;
311
312 for (bucket = table->bucket; ; bucket++)
313 if (! (bucket < table->bucket_limit))
314 abort ();
315 else if (bucket->data)
316 return bucket->data;
317 }
318
319 /* Return the user data for the entry following ENTRY, where ENTRY has been
320 returned by a previous call to either `hash_get_first' or `hash_get_next'.
321 Return NULL if there are no more entries. */
322
323 void *
324 hash_get_next (const Hash_table *table, const void *entry)
325 {
326 struct hash_entry *bucket
327 = table->bucket + table->hasher (entry, table->n_buckets);
328 struct hash_entry *cursor;
329
330 if (! (bucket < table->bucket_limit))
331 abort ();
332
333 /* Find next entry in the same bucket. */
334 for (cursor = bucket; cursor; cursor = cursor->next)
335 if (cursor->data == entry && cursor->next)
336 return cursor->next->data;
337
338 /* Find first entry in any subsequent bucket. */
339 while (++bucket < table->bucket_limit)
340 if (bucket->data)
341 return bucket->data;
342
343 /* None found. */
344 return NULL;
345 }
346
347 /* Fill BUFFER with pointers to active user entries in the hash table, then
348 return the number of pointers copied. Do not copy more than BUFFER_SIZE
349 pointers. */
350
351 unsigned
352 hash_get_entries (const Hash_table *table, void **buffer,
353 unsigned buffer_size)
354 {
355 unsigned counter = 0;
356 struct hash_entry *bucket;
357 struct hash_entry *cursor;
358
359 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
360 {
361 if (bucket->data)
362 {
363 for (cursor = bucket; cursor; cursor = cursor->next)
364 {
365 if (counter >= buffer_size)
366 return counter;
367 buffer[counter++] = cursor->data;
368 }
369 }
370 }
371
372 return counter;
373 }
374
375 /* Call a PROCESSOR function for each entry of a hash table, and return the
376 number of entries for which the processor function returned success. A
377 pointer to some PROCESSOR_DATA which will be made available to each call to
378 the processor function. The PROCESSOR accepts two arguments: the first is
379 the user entry being walked into, the second is the value of PROCESSOR_DATA
380 as received. The walking continue for as long as the PROCESSOR function
381 returns nonzero. When it returns zero, the walking is interrupted. */
382
383 unsigned
384 hash_do_for_each (const Hash_table *table, Hash_processor processor,
385 void *processor_data)
386 {
387 unsigned counter = 0;
388 struct hash_entry *bucket;
389 struct hash_entry *cursor;
390
391 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
392 {
393 if (bucket->data)
394 {
395 for (cursor = bucket; cursor; cursor = cursor->next)
396 {
397 if (!(*processor) (cursor->data, processor_data))
398 return counter;
399 counter++;
400 }
401 }
402 }
403
404 return counter;
405 }
406
407 /* Allocation and clean-up. */
408
409 /* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
410 This is a convenience routine for constructing other hashing functions. */
411
412 #if USE_DIFF_HASH
413
414 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
415 B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
416 Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash
417 algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
418 may not be good for your application." */
419
420 unsigned
421 hash_string (const char *string, unsigned n_buckets)
422 {
423 # ifndef CHAR_BIT
424 # define CHAR_BIT 8
425 # endif
426 # define ROTATE_LEFT(Value, Shift) \
427 ((Value) << (Shift) | (Value) >> ((sizeof (unsigned) * CHAR_BIT) - (Shift)))
428 # define HASH_ONE_CHAR(Value, Byte) \
429 ((Byte) + ROTATE_LEFT (Value, 7))
430
431 unsigned value = 0;
432
433 for (; *string; string++)
434 value = HASH_ONE_CHAR (value, *(const unsigned char *) string);
435 return value % n_buckets;
436
437 # undef ROTATE_LEFT
438 # undef HASH_ONE_CHAR
439 }
440
441 #else /* not USE_DIFF_HASH */
442
443 /* This one comes from `recode', and performs a bit better than the above as
444 per a few experiments. It is inspired from a hashing routine found in the
445 very old Cyber `snoop', itself written in typical Greg Mansfield style.
446 (By the way, what happened to this excellent man? Is he still alive?) */
447
448 unsigned
449 hash_string (const char *string, unsigned n_buckets)
450 {
451 unsigned value = 0;
452
453 while (*string)
454 value = ((value * 31 + (int) *(const unsigned char *) string++)
455 % n_buckets);
456 return value;
457 }
458
459 #endif /* not USE_DIFF_HASH */
460
461 /* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd
462 number at least equal to 11. */
463
464 static bool
465 is_prime (unsigned long candidate)
466 {
467 unsigned long divisor = 3;
468 unsigned long square = divisor * divisor;
469
470 while (square < candidate && (candidate % divisor))
471 {
472 divisor++;
473 square += 4 * divisor;
474 divisor++;
475 }
476
477 return (candidate % divisor ? true : false);
478 }
479
480 /* Round a given CANDIDATE number up to the nearest prime, and return that
481 prime. Primes lower than 10 are merely skipped. */
482
483 static unsigned long
484 next_prime (unsigned long candidate)
485 {
486 /* Skip small primes. */
487 if (candidate < 10)
488 candidate = 10;
489
490 /* Make it definitely odd. */
491 candidate |= 1;
492
493 while (!is_prime (candidate))
494 candidate += 2;
495
496 return candidate;
497 }
498
499 void
500 hash_reset_tuning (Hash_tuning *tuning)
501 {
502 *tuning = default_tuning;
503 }
504
505 /* For the given hash TABLE, check the user supplied tuning structure for
506 reasonable values, and return true if there is no gross error with it.
507 Otherwise, definitively reset the TUNING field to some acceptable default
508 in the hash table (that is, the user loses the right of further modifying
509 tuning arguments), and return false. */
510
511 static bool
512 check_tuning (Hash_table *table)
513 {
514 const Hash_tuning *tuning = table->tuning;
515
516 if (tuning->growth_threshold > 0.0
517 && tuning->growth_threshold < 1.0
518 && tuning->growth_factor > 1.0
519 && tuning->shrink_threshold >= 0.0
520 && tuning->shrink_threshold < 1.0
521 && tuning->shrink_factor > tuning->shrink_threshold
522 && tuning->shrink_factor <= 1.0
523 && tuning->shrink_threshold < tuning->growth_threshold)
524 return true;
525
526 table->tuning = &default_tuning;
527 return false;
528 }
529
530 /* Allocate and return a new hash table, or NULL upon failure. The initial
531 number of buckets is automatically selected so as to _guarantee_ that you
532 may insert at least CANDIDATE different user entries before any growth of
533 the hash table size occurs. So, if have a reasonably tight a-priori upper
534 bound on the number of entries you intend to insert in the hash table, you
535 may save some table memory and insertion time, by specifying it here. If
536 the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
537 argument has its meaning changed to the wanted number of buckets.
538
539 TUNING points to a structure of user-supplied values, in case some fine
540 tuning is wanted over the default behavior of the hasher. If TUNING is
541 NULL, the default tuning parameters are used instead.
542
543 The user-supplied HASHER function should be provided. It accepts two
544 arguments ENTRY and TABLE_SIZE. It computes, by hashing ENTRY contents, a
545 slot number for that entry which should be in the range 0..TABLE_SIZE-1.
546 This slot number is then returned.
547
548 The user-supplied COMPARATOR function should be provided. It accepts two
549 arguments pointing to user data, it then returns true for a pair of entries
550 that compare equal, or false otherwise. This function is internally called
551 on entries which are already known to hash to the same bucket index.
552
553 The user-supplied DATA_FREER function, when not NULL, may be later called
554 with the user data as an argument, just before the entry containing the
555 data gets freed. This happens from within `hash_free' or `hash_clear'.
556 You should specify this function only if you want these functions to free
557 all of your `data' data. This is typically the case when your data is
558 simply an auxiliary struct that you have malloc'd to aggregate several
559 values. */
560
561 Hash_table *
562 hash_initialize (unsigned candidate, const Hash_tuning *tuning,
563 Hash_hasher hasher, Hash_comparator comparator,
564 Hash_data_freer data_freer)
565 {
566 Hash_table *table;
567 struct hash_entry *bucket;
568
569 if (hasher == NULL || comparator == NULL)
570 return NULL;
571
572 table = (Hash_table *) malloc (sizeof (Hash_table));
573 if (table == NULL)
574 return NULL;
575
576 if (!tuning)
577 tuning = &default_tuning;
578 table->tuning = tuning;
579 if (!check_tuning (table))
580 {
581 /* Fail if the tuning options are invalid. This is the only occasion
582 when the user gets some feedback about it. Once the table is created,
583 if the user provides invalid tuning options, we silently revert to
584 using the defaults, and ignore further request to change the tuning
585 options. */
586 free (table);
587 return NULL;
588 }
589
590 table->n_buckets
591 = next_prime (tuning->is_n_buckets ? candidate
592 : (unsigned) (candidate / tuning->growth_threshold));
593
594 table->bucket = (struct hash_entry *)
595 malloc (table->n_buckets * sizeof (struct hash_entry));
596 if (table->bucket == NULL)
597 {
598 free (table);
599 return NULL;
600 }
601 table->bucket_limit = table->bucket + table->n_buckets;
602
603 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
604 {
605 bucket->data = NULL;
606 bucket->next = NULL;
607 }
608 table->n_buckets_used = 0;
609 table->n_entries = 0;
610
611 table->hasher = hasher;
612 table->comparator = comparator;
613 table->data_freer = data_freer;
614
615 table->free_entry_list = NULL;
616 #if USE_OBSTACK
617 obstack_init (&table->entry_stack);
618 #endif
619 return table;
620 }
621
622 /* Make all buckets empty, placing any chained entries on the free list.
623 Apply the user-specified function data_freer (if any) to the datas of any
624 affected entries. */
625
626 void
627 hash_clear (Hash_table *table)
628 {
629 struct hash_entry *bucket;
630
631 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
632 {
633 if (bucket->data)
634 {
635 struct hash_entry *cursor;
636 struct hash_entry *next;
637
638 /* Free the bucket overflow. */
639 for (cursor = bucket->next; cursor; cursor = next)
640 {
641 if (table->data_freer)
642 (*table->data_freer) (cursor->data);
643 cursor->data = NULL;
644
645 next = cursor->next;
646 /* Relinking is done one entry at a time, as it is to be expected
647 that overflows are either rare or short. */
648 cursor->next = table->free_entry_list;
649 table->free_entry_list = cursor;
650 }
651
652 /* Free the bucket head. */
653 if (table->data_freer)
654 (*table->data_freer) (bucket->data);
655 bucket->data = NULL;
656 bucket->next = NULL;
657 }
658 }
659
660 table->n_buckets_used = 0;
661 table->n_entries = 0;
662 }
663
664 /* Reclaim all storage associated with a hash table. If a data_freer
665 function has been supplied by the user when the hash table was created,
666 this function applies it to the data of each entry before freeing that
667 entry. */
668
669 void
670 hash_free (Hash_table *table)
671 {
672 struct hash_entry *bucket;
673 struct hash_entry *cursor;
674 struct hash_entry *next;
675
676 /* Call the user data_freer function. */
677 if (table->data_freer && table->n_entries)
678 {
679 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
680 {
681 if (bucket->data)
682 {
683 for (cursor = bucket; cursor; cursor = cursor->next)
684 {
685 (*table->data_freer) (cursor->data);
686 }
687 }
688 }
689 }
690
691 #if USE_OBSTACK
692
693 obstack_free (&table->entry_stack, NULL);
694
695 #else
696
697 /* Free all bucket overflowed entries. */
698 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
699 {
700 for (cursor = bucket->next; cursor; cursor = next)
701 {
702 next = cursor->next;
703 free (cursor);
704 }
705 }
706
707 /* Also reclaim the internal list of previously freed entries. */
708 for (cursor = table->free_entry_list; cursor; cursor = next)
709 {
710 next = cursor->next;
711 free (cursor);
712 }
713
714 #endif
715
716 /* Free the remainder of the hash table structure. */
717 free (table->bucket);
718 free (table);
719 }
720
721 /* Insertion and deletion. */
722
723 /* Get a new hash entry for a bucket overflow, possibly by reclying a
724 previously freed one. If this is not possible, allocate a new one. */
725
726 static struct hash_entry *
727 allocate_entry (Hash_table *table)
728 {
729 struct hash_entry *new;
730
731 if (table->free_entry_list)
732 {
733 new = table->free_entry_list;
734 table->free_entry_list = new->next;
735 }
736 else
737 {
738 #if USE_OBSTACK
739 new = (struct hash_entry *)
740 obstack_alloc (&table->entry_stack, sizeof (struct hash_entry));
741 #else
742 new = (struct hash_entry *) malloc (sizeof (struct hash_entry));
743 #endif
744 }
745
746 return new;
747 }
748
749 /* Free a hash entry which was part of some bucket overflow,
750 saving it for later recycling. */
751
752 static void
753 free_entry (Hash_table *table, struct hash_entry *entry)
754 {
755 entry->data = NULL;
756 entry->next = table->free_entry_list;
757 table->free_entry_list = entry;
758 }
759
760 /* This private function is used to help with insertion and deletion. When
761 ENTRY matches an entry in the table, return a pointer to the corresponding
762 user data and set *BUCKET_HEAD to the head of the selected bucket.
763 Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in
764 the table, unlink the matching entry. */
765
766 static void *
767 hash_find_entry (Hash_table *table, const void *entry,
768 struct hash_entry **bucket_head, bool delete)
769 {
770 struct hash_entry *bucket
771 = table->bucket + table->hasher (entry, table->n_buckets);
772 struct hash_entry *cursor;
773
774 if (! (bucket < table->bucket_limit))
775 abort ();
776
777 *bucket_head = bucket;
778
779 /* Test for empty bucket. */
780 if (bucket->data == NULL)
781 return NULL;
782
783 /* See if the entry is the first in the bucket. */
784 if ((*table->comparator) (entry, bucket->data))
785 {
786 void *data = bucket->data;
787
788 if (delete)
789 {
790 if (bucket->next)
791 {
792 struct hash_entry *next = bucket->next;
793
794 /* Bump the first overflow entry into the bucket head, then save
795 the previous first overflow entry for later recycling. */
796 *bucket = *next;
797 free_entry (table, next);
798 }
799 else
800 {
801 bucket->data = NULL;
802 }
803 }
804
805 return data;
806 }
807
808 /* Scan the bucket overflow. */
809 for (cursor = bucket; cursor->next; cursor = cursor->next)
810 {
811 if ((*table->comparator) (entry, cursor->next->data))
812 {
813 void *data = cursor->next->data;
814
815 if (delete)
816 {
817 struct hash_entry *next = cursor->next;
818
819 /* Unlink the entry to delete, then save the freed entry for later
820 recycling. */
821 cursor->next = next->next;
822 free_entry (table, next);
823 }
824
825 return data;
826 }
827 }
828
829 /* No entry found. */
830 return NULL;
831 }
832
833 /* For an already existing hash table, change the number of buckets through
834 specifying CANDIDATE. The contents of the hash table are preserved. The
835 new number of buckets is automatically selected so as to _guarantee_ that
836 the table may receive at least CANDIDATE different user entries, including
837 those already in the table, before any other growth of the hash table size
838 occurs. If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
839 exact number of buckets desired. */
840
841 bool
842 hash_rehash (Hash_table *table, unsigned candidate)
843 {
844 Hash_table *new_table;
845 struct hash_entry *bucket;
846 struct hash_entry *cursor;
847 struct hash_entry *next;
848
849 new_table = hash_initialize (candidate, table->tuning, table->hasher,
850 table->comparator, table->data_freer);
851 if (new_table == NULL)
852 return false;
853
854 /* Merely reuse the extra old space into the new table. */
855 #if USE_OBSTACK
856 obstack_free (&new_table->entry_stack, NULL);
857 new_table->entry_stack = table->entry_stack;
858 #endif
859 new_table->free_entry_list = table->free_entry_list;
860
861 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
862 if (bucket->data)
863 for (cursor = bucket; cursor; cursor = next)
864 {
865 void *data = cursor->data;
866 struct hash_entry *new_bucket
867 = (new_table->bucket
868 + new_table->hasher (data, new_table->n_buckets));
869
870 if (! (new_bucket < new_table->bucket_limit))
871 abort ();
872
873 next = cursor->next;
874
875 if (new_bucket->data)
876 {
877 if (cursor == bucket)
878 {
879 /* Allocate or recycle an entry, when moving from a bucket
880 header into a bucket overflow. */
881 struct hash_entry *new_entry = allocate_entry (new_table);
882
883 if (new_entry == NULL)
884 return false;
885
886 new_entry->data = data;
887 new_entry->next = new_bucket->next;
888 new_bucket->next = new_entry;
889 }
890 else
891 {
892 /* Merely relink an existing entry, when moving from a
893 bucket overflow into a bucket overflow. */
894 cursor->next = new_bucket->next;
895 new_bucket->next = cursor;
896 }
897 }
898 else
899 {
900 /* Free an existing entry, when moving from a bucket
901 overflow into a bucket header. Also take care of the
902 simple case of moving from a bucket header into a bucket
903 header. */
904 new_bucket->data = data;
905 new_table->n_buckets_used++;
906 if (cursor != bucket)
907 free_entry (new_table, cursor);
908 }
909 }
910
911 free (table->bucket);
912 table->bucket = new_table->bucket;
913 table->bucket_limit = new_table->bucket_limit;
914 table->n_buckets = new_table->n_buckets;
915 table->n_buckets_used = new_table->n_buckets_used;
916 table->free_entry_list = new_table->free_entry_list;
917 /* table->n_entries already holds its value. */
918 #if USE_OBSTACK
919 table->entry_stack = new_table->entry_stack;
920 #endif
921 free (new_table);
922
923 return true;
924 }
925
926 /* If ENTRY matches an entry already in the hash table, return the pointer
927 to the entry from the table. Otherwise, insert ENTRY and return ENTRY.
928 Return NULL if the storage required for insertion cannot be allocated. */
929
930 void *
931 hash_insert (Hash_table *table, const void *entry)
932 {
933 void *data;
934 struct hash_entry *bucket;
935
936 /* The caller cannot insert a NULL entry. */
937 if (! entry)
938 abort ();
939
940 /* If there's a matching entry already in the table, return that. */
941 if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
942 return data;
943
944 /* ENTRY is not matched, it should be inserted. */
945
946 if (bucket->data)
947 {
948 struct hash_entry *new_entry = allocate_entry (table);
949
950 if (new_entry == NULL)
951 return NULL;
952
953 /* Add ENTRY in the overflow of the bucket. */
954
955 new_entry->data = (void *) entry;
956 new_entry->next = bucket->next;
957 bucket->next = new_entry;
958 table->n_entries++;
959 return (void *) entry;
960 }
961
962 /* Add ENTRY right in the bucket head. */
963
964 bucket->data = (void *) entry;
965 table->n_entries++;
966 table->n_buckets_used++;
967
968 /* If the growth threshold of the buckets in use has been reached, increase
969 the table size and rehash. There's no point in checking the number of
970 entries: if the hashing function is ill-conditioned, rehashing is not
971 likely to improve it. */
972
973 if (table->n_buckets_used
974 > table->tuning->growth_threshold * table->n_buckets)
975 {
976 /* Check more fully, before starting real work. If tuning arguments
977 became invalid, the second check will rely on proper defaults. */
978 check_tuning (table);
979 if (table->n_buckets_used
980 > table->tuning->growth_threshold * table->n_buckets)
981 {
982 const Hash_tuning *tuning = table->tuning;
983 unsigned candidate
984 = (unsigned) (tuning->is_n_buckets
985 ? (table->n_buckets * tuning->growth_factor)
986 : (table->n_buckets * tuning->growth_factor
987 * tuning->growth_threshold));
988
989 /* If the rehash fails, arrange to return NULL. */
990 if (!hash_rehash (table, candidate))
991 entry = NULL;
992 }
993 }
994
995 return (void *) entry;
996 }
997
998 /* If ENTRY is already in the table, remove it and return the just-deleted
999 data (the user may want to deallocate its storage). If ENTRY is not in the
1000 table, don't modify the table and return NULL. */
1001
1002 void *
1003 hash_delete (Hash_table *table, const void *entry)
1004 {
1005 void *data;
1006 struct hash_entry *bucket;
1007
1008 data = hash_find_entry (table, entry, &bucket, true);
1009 if (!data)
1010 return NULL;
1011
1012 table->n_entries--;
1013 if (!bucket->data)
1014 {
1015 table->n_buckets_used--;
1016
1017 /* If the shrink threshold of the buckets in use has been reached,
1018 rehash into a smaller table. */
1019
1020 if (table->n_buckets_used
1021 < table->tuning->shrink_threshold * table->n_buckets)
1022 {
1023 /* Check more fully, before starting real work. If tuning arguments
1024 became invalid, the second check will rely on proper defaults. */
1025 check_tuning (table);
1026 if (table->n_buckets_used
1027 < table->tuning->shrink_threshold * table->n_buckets)
1028 {
1029 const Hash_tuning *tuning = table->tuning;
1030 unsigned candidate
1031 = (unsigned) (tuning->is_n_buckets
1032 ? table->n_buckets * tuning->shrink_factor
1033 : (table->n_buckets * tuning->shrink_factor
1034 * tuning->growth_threshold));
1035
1036 hash_rehash (table, candidate);
1037 }
1038 }
1039 }
1040
1041 return data;
1042 }
1043
1044 /* Testing. */
1045
1046 #if TESTING
1047
1048 void
1049 hash_print (const Hash_table *table)
1050 {
1051 struct hash_entry *bucket;
1052
1053 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
1054 {
1055 struct hash_entry *cursor;
1056
1057 if (bucket)
1058 printf ("%d:\n", bucket - table->bucket);
1059
1060 for (cursor = bucket; cursor; cursor = cursor->next)
1061 {
1062 char *s = (char *) cursor->data;
1063 /* FIXME */
1064 if (s)
1065 printf (" %s\n", s);
1066 }
1067 }
1068 }
1069
1070 #endif /* TESTING */