]>
Commit | Line | Data |
---|---|---|
beda758b | 1 | /* hash - hashing table processing. |
1029ea6d PE |
2 | |
3 | Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 Free Software | |
4 | Foundation, Inc. | |
5 | ||
beda758b | 6 | Written by Jim Meyering, 1992. |
68bd3b6b RA |
7 | |
8 | This program is free software; you can redistribute it and/or modify | |
9 | it under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 2, or (at your option) | |
11 | any later version. | |
12 | ||
13 | This program is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
beda758b AD |
19 | along with this program; if not, write to the Free Software Foundation, |
20 | Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | |
21 | ||
22 | /* A generic hash table package. */ | |
23 | ||
24 | /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead | |
25 | of malloc. If you change USE_OBSTACK, you have to recompile! */ | |
26 | ||
27 | #if HAVE_CONFIG_H | |
28 | # include <config.h> | |
29 | #endif | |
30 | #if HAVE_STDLIB_H | |
31 | # include <stdlib.h> | |
32 | #endif | |
1029ea6d | 33 | |
f61aad93 | 34 | #include <stdbool.h> |
68bd3b6b | 35 | #include <stdio.h> |
beda758b AD |
36 | |
37 | #ifndef HAVE_DECL_FREE | |
eb956856 | 38 | "this configure-time declaration test was not run" |
beda758b AD |
39 | #endif |
40 | #if !HAVE_DECL_FREE | |
41 | void free (); | |
42 | #endif | |
43 | ||
44 | #ifndef HAVE_DECL_MALLOC | |
eb956856 | 45 | "this configure-time declaration test was not run" |
beda758b AD |
46 | #endif |
47 | #if !HAVE_DECL_MALLOC | |
48 | char *malloc (); | |
49 | #endif | |
50 | ||
51 | #if USE_OBSTACK | |
52 | # include "obstack.h" | |
53 | # ifndef obstack_chunk_alloc | |
54 | # define obstack_chunk_alloc malloc | |
55 | # endif | |
56 | # ifndef obstack_chunk_free | |
57 | # define obstack_chunk_free free | |
58 | # endif | |
59 | #endif | |
60 | ||
68bd3b6b | 61 | #include "hash.h" |
68bd3b6b | 62 | |
eb956856 PE |
63 | struct hash_table |
64 | { | |
65 | /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1, | |
66 | for a possibility of N_BUCKETS. Among those, N_BUCKETS_USED buckets | |
67 | are not empty, there are N_ENTRIES active entries in the table. */ | |
68 | struct hash_entry *bucket; | |
69 | struct hash_entry *bucket_limit; | |
70 | unsigned n_buckets; | |
71 | unsigned n_buckets_used; | |
72 | unsigned n_entries; | |
73 | ||
74 | /* Tuning arguments, kept in a physicaly separate structure. */ | |
75 | const Hash_tuning *tuning; | |
76 | ||
77 | /* Three functions are given to `hash_initialize', see the documentation | |
78 | block for this function. In a word, HASHER randomizes a user entry | |
79 | into a number up from 0 up to some maximum minus 1; COMPARATOR returns | |
80 | true if two user entries compare equally; and DATA_FREER is the cleanup | |
81 | function for a user entry. */ | |
82 | Hash_hasher hasher; | |
83 | Hash_comparator comparator; | |
84 | Hash_data_freer data_freer; | |
85 | ||
86 | /* A linked list of freed struct hash_entry structs. */ | |
87 | struct hash_entry *free_entry_list; | |
88 | ||
89 | #if USE_OBSTACK | |
90 | /* Whenever obstacks are used, it is possible to allocate all overflowed | |
91 | entries into a single stack, so they all can be freed in a single | |
92 | operation. It is not clear if the speedup is worth the trouble. */ | |
93 | struct obstack entry_stack; | |
94 | #endif | |
95 | }; | |
96 | ||
beda758b AD |
97 | /* A hash table contains many internal entries, each holding a pointer to |
98 | some user provided data (also called a user entry). An entry indistinctly | |
99 | refers to both the internal entry and its associated user entry. A user | |
100 | entry contents may be hashed by a randomization function (the hashing | |
101 | function, or just `hasher' for short) into a number (or `slot') between 0 | |
102 | and the current table size. At each slot position in the hash table, | |
103 | starts a linked chain of entries for which the user data all hash to this | |
104 | slot. A bucket is the collection of all entries hashing to the same slot. | |
105 | ||
106 | A good `hasher' function will distribute entries rather evenly in buckets. | |
107 | In the ideal case, the length of each bucket is roughly the number of | |
108 | entries divided by the table size. Finding the slot for a data is usually | |
109 | done in constant time by the `hasher', and the later finding of a precise | |
110 | entry is linear in time with the size of the bucket. Consequently, a | |
111 | larger hash table size (that is, a larger number of buckets) is prone to | |
112 | yielding shorter chains, *given* the `hasher' function behaves properly. | |
113 | ||
114 | Long buckets slow down the lookup algorithm. One might use big hash table | |
115 | sizes in hope to reduce the average length of buckets, but this might | |
116 | become inordinate, as unused slots in the hash table take some space. The | |
117 | best bet is to make sure you are using a good `hasher' function (beware | |
118 | that those are not that easy to write! :-), and to use a table size | |
119 | larger than the actual number of entries. */ | |
120 | ||
121 | /* If an insertion makes the ratio of nonempty buckets to table size larger | |
122 | than the growth threshold (a number between 0.0 and 1.0), then increase | |
123 | the table size by multiplying by the growth factor (a number greater than | |
124 | 1.0). The growth threshold defaults to 0.8, and the growth factor | |
125 | defaults to 1.414, meaning that the table will have doubled its size | |
126 | every second time 80% of the buckets get used. */ | |
127 | #define DEFAULT_GROWTH_THRESHOLD 0.8 | |
128 | #define DEFAULT_GROWTH_FACTOR 1.414 | |
129 | ||
130 | /* If a deletion empties a bucket and causes the ratio of used buckets to | |
131 | table size to become smaller than the shrink threshold (a number between | |
132 | 0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a | |
133 | number greater than the shrink threshold but smaller than 1.0). The shrink | |
134 | threshold and factor default to 0.0 and 1.0, meaning that the table never | |
135 | shrinks. */ | |
136 | #define DEFAULT_SHRINK_THRESHOLD 0.0 | |
137 | #define DEFAULT_SHRINK_FACTOR 1.0 | |
138 | ||
139 | /* Use this to initialize or reset a TUNING structure to | |
140 | some sensible values. */ | |
141 | static const Hash_tuning default_tuning = | |
142 | { | |
143 | DEFAULT_SHRINK_THRESHOLD, | |
144 | DEFAULT_SHRINK_FACTOR, | |
145 | DEFAULT_GROWTH_THRESHOLD, | |
146 | DEFAULT_GROWTH_FACTOR, | |
147 | false | |
148 | }; | |
149 | ||
150 | /* Information and lookup. */ | |
151 | ||
152 | /* The following few functions provide information about the overall hash | |
153 | table organization: the number of entries, number of buckets and maximum | |
154 | length of buckets. */ | |
155 | ||
156 | /* Return the number of buckets in the hash table. The table size, the total | |
157 | number of buckets (used plus unused), or the maximum number of slots, are | |
158 | the same quantity. */ | |
159 | ||
160 | unsigned | |
161 | hash_get_n_buckets (const Hash_table *table) | |
162 | { | |
163 | return table->n_buckets; | |
164 | } | |
68bd3b6b | 165 | |
beda758b | 166 | /* Return the number of slots in use (non-empty buckets). */ |
68bd3b6b | 167 | |
beda758b AD |
168 | unsigned |
169 | hash_get_n_buckets_used (const Hash_table *table) | |
170 | { | |
171 | return table->n_buckets_used; | |
172 | } | |
68bd3b6b | 173 | |
beda758b | 174 | /* Return the number of active entries. */ |
68bd3b6b | 175 | |
beda758b AD |
176 | unsigned |
177 | hash_get_n_entries (const Hash_table *table) | |
68bd3b6b | 178 | { |
beda758b | 179 | return table->n_entries; |
68bd3b6b RA |
180 | } |
181 | ||
beda758b | 182 | /* Return the length of the longest chain (bucket). */ |
68bd3b6b | 183 | |
beda758b AD |
184 | unsigned |
185 | hash_get_max_bucket_length (const Hash_table *table) | |
68bd3b6b | 186 | { |
beda758b AD |
187 | struct hash_entry *bucket; |
188 | unsigned max_bucket_length = 0; | |
189 | ||
190 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) | |
68bd3b6b | 191 | { |
beda758b AD |
192 | if (bucket->data) |
193 | { | |
194 | struct hash_entry *cursor = bucket; | |
195 | unsigned bucket_length = 1; | |
196 | ||
197 | while (cursor = cursor->next, cursor) | |
198 | bucket_length++; | |
199 | ||
200 | if (bucket_length > max_bucket_length) | |
201 | max_bucket_length = bucket_length; | |
202 | } | |
68bd3b6b | 203 | } |
beda758b AD |
204 | |
205 | return max_bucket_length; | |
68bd3b6b RA |
206 | } |
207 | ||
beda758b AD |
208 | /* Do a mild validation of a hash table, by traversing it and checking two |
209 | statistics. */ | |
68bd3b6b | 210 | |
beda758b AD |
211 | bool |
212 | hash_table_ok (const Hash_table *table) | |
68bd3b6b | 213 | { |
beda758b AD |
214 | struct hash_entry *bucket; |
215 | unsigned n_buckets_used = 0; | |
216 | unsigned n_entries = 0; | |
68bd3b6b | 217 | |
beda758b | 218 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) |
68bd3b6b | 219 | { |
beda758b | 220 | if (bucket->data) |
68bd3b6b | 221 | { |
beda758b AD |
222 | struct hash_entry *cursor = bucket; |
223 | ||
224 | /* Count bucket head. */ | |
225 | n_buckets_used++; | |
226 | n_entries++; | |
227 | ||
228 | /* Count bucket overflow. */ | |
229 | while (cursor = cursor->next, cursor) | |
230 | n_entries++; | |
68bd3b6b | 231 | } |
68bd3b6b | 232 | } |
beda758b AD |
233 | |
234 | if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries) | |
235 | return true; | |
236 | ||
237 | return false; | |
238 | } | |
239 | ||
240 | void | |
241 | hash_print_statistics (const Hash_table *table, FILE *stream) | |
242 | { | |
243 | unsigned n_entries = hash_get_n_entries (table); | |
244 | unsigned n_buckets = hash_get_n_buckets (table); | |
245 | unsigned n_buckets_used = hash_get_n_buckets_used (table); | |
246 | unsigned max_bucket_length = hash_get_max_bucket_length (table); | |
247 | ||
248 | fprintf (stream, "# entries: %u\n", n_entries); | |
249 | fprintf (stream, "# buckets: %u\n", n_buckets); | |
250 | fprintf (stream, "# buckets used: %u (%.2f%%)\n", n_buckets_used, | |
251 | (100.0 * n_buckets_used) / n_buckets); | |
252 | fprintf (stream, "max bucket length: %u\n", max_bucket_length); | |
68bd3b6b RA |
253 | } |
254 | ||
beda758b AD |
255 | /* If ENTRY matches an entry already in the hash table, return the |
256 | entry from the table. Otherwise, return NULL. */ | |
257 | ||
68bd3b6b | 258 | void * |
beda758b | 259 | hash_lookup (const Hash_table *table, const void *entry) |
68bd3b6b | 260 | { |
beda758b AD |
261 | struct hash_entry *bucket |
262 | = table->bucket + table->hasher (entry, table->n_buckets); | |
263 | struct hash_entry *cursor; | |
264 | ||
4f395381 | 265 | if (! (bucket < table->bucket_limit)) |
937fd864 | 266 | abort (); |
beda758b AD |
267 | |
268 | if (bucket->data == NULL) | |
269 | return NULL; | |
270 | ||
271 | for (cursor = bucket; cursor; cursor = cursor->next) | |
272 | if (table->comparator (entry, cursor->data)) | |
273 | return cursor->data; | |
274 | ||
275 | return NULL; | |
68bd3b6b RA |
276 | } |
277 | ||
beda758b AD |
278 | /* Walking. */ |
279 | ||
280 | /* The functions in this page traverse the hash table and process the | |
281 | contained entries. For the traversal to work properly, the hash table | |
282 | should not be resized nor modified while any particular entry is being | |
283 | processed. In particular, entries should not be added or removed. */ | |
284 | ||
285 | /* Return the first data in the table, or NULL if the table is empty. */ | |
286 | ||
287 | void * | |
288 | hash_get_first (const Hash_table *table) | |
68bd3b6b | 289 | { |
beda758b AD |
290 | struct hash_entry *bucket; |
291 | ||
292 | if (table->n_entries == 0) | |
293 | return NULL; | |
294 | ||
937fd864 | 295 | for (bucket = table->bucket; ; bucket++) |
4f395381 | 296 | if (! (bucket < table->bucket_limit)) |
937fd864 PE |
297 | abort (); |
298 | else if (bucket->data) | |
beda758b | 299 | return bucket->data; |
68bd3b6b RA |
300 | } |
301 | ||
beda758b AD |
302 | /* Return the user data for the entry following ENTRY, where ENTRY has been |
303 | returned by a previous call to either `hash_get_first' or `hash_get_next'. | |
304 | Return NULL if there are no more entries. */ | |
305 | ||
306 | void * | |
307 | hash_get_next (const Hash_table *table, const void *entry) | |
308 | { | |
309 | struct hash_entry *bucket | |
310 | = table->bucket + table->hasher (entry, table->n_buckets); | |
311 | struct hash_entry *cursor; | |
312 | ||
4f395381 | 313 | if (! (bucket < table->bucket_limit)) |
937fd864 | 314 | abort (); |
beda758b AD |
315 | |
316 | /* Find next entry in the same bucket. */ | |
317 | for (cursor = bucket; cursor; cursor = cursor->next) | |
318 | if (cursor->data == entry && cursor->next) | |
319 | return cursor->next->data; | |
320 | ||
321 | /* Find first entry in any subsequent bucket. */ | |
322 | while (++bucket < table->bucket_limit) | |
323 | if (bucket->data) | |
324 | return bucket->data; | |
325 | ||
326 | /* None found. */ | |
327 | return NULL; | |
328 | } | |
329 | ||
330 | /* Fill BUFFER with pointers to active user entries in the hash table, then | |
331 | return the number of pointers copied. Do not copy more than BUFFER_SIZE | |
332 | pointers. */ | |
333 | ||
334 | unsigned | |
335 | hash_get_entries (const Hash_table *table, void **buffer, | |
336 | unsigned buffer_size) | |
68bd3b6b | 337 | { |
beda758b AD |
338 | unsigned counter = 0; |
339 | struct hash_entry *bucket; | |
340 | struct hash_entry *cursor; | |
341 | ||
342 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) | |
68bd3b6b | 343 | { |
beda758b AD |
344 | if (bucket->data) |
345 | { | |
346 | for (cursor = bucket; cursor; cursor = cursor->next) | |
347 | { | |
348 | if (counter >= buffer_size) | |
349 | return counter; | |
350 | buffer[counter++] = cursor->data; | |
351 | } | |
352 | } | |
68bd3b6b | 353 | } |
beda758b AD |
354 | |
355 | return counter; | |
68bd3b6b RA |
356 | } |
357 | ||
beda758b AD |
358 | /* Call a PROCESSOR function for each entry of a hash table, and return the |
359 | number of entries for which the processor function returned success. A | |
360 | pointer to some PROCESSOR_DATA which will be made available to each call to | |
361 | the processor function. The PROCESSOR accepts two arguments: the first is | |
362 | the user entry being walked into, the second is the value of PROCESSOR_DATA | |
363 | as received. The walking continue for as long as the PROCESSOR function | |
364 | returns nonzero. When it returns zero, the walking is interrupted. */ | |
365 | ||
366 | unsigned | |
367 | hash_do_for_each (const Hash_table *table, Hash_processor processor, | |
368 | void *processor_data) | |
68bd3b6b | 369 | { |
beda758b AD |
370 | unsigned counter = 0; |
371 | struct hash_entry *bucket; | |
372 | struct hash_entry *cursor; | |
373 | ||
374 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) | |
375 | { | |
376 | if (bucket->data) | |
377 | { | |
378 | for (cursor = bucket; cursor; cursor = cursor->next) | |
379 | { | |
380 | if (!(*processor) (cursor->data, processor_data)) | |
381 | return counter; | |
382 | counter++; | |
383 | } | |
384 | } | |
385 | } | |
386 | ||
387 | return counter; | |
68bd3b6b RA |
388 | } |
389 | ||
beda758b AD |
390 | /* Allocation and clean-up. */ |
391 | ||
392 | /* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1. | |
393 | This is a convenience routine for constructing other hashing functions. */ | |
394 | ||
395 | #if USE_DIFF_HASH | |
396 | ||
397 | /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see | |
398 | B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm, | |
399 | Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash | |
400 | algorithms tend to be domain-specific, so what's good for [diffutils'] io.c | |
401 | may not be good for your application." */ | |
402 | ||
403 | unsigned | |
404 | hash_string (const char *string, unsigned n_buckets) | |
68bd3b6b | 405 | { |
beda758b AD |
406 | # ifndef CHAR_BIT |
407 | # define CHAR_BIT 8 | |
408 | # endif | |
409 | # define ROTATE_LEFT(Value, Shift) \ | |
410 | ((Value) << (Shift) | (Value) >> ((sizeof (unsigned) * CHAR_BIT) - (Shift))) | |
411 | # define HASH_ONE_CHAR(Value, Byte) \ | |
412 | ((Byte) + ROTATE_LEFT (Value, 7)) | |
413 | ||
414 | unsigned value = 0; | |
415 | ||
416 | for (; *string; string++) | |
417 | value = HASH_ONE_CHAR (value, *(const unsigned char *) string); | |
418 | return value % n_buckets; | |
419 | ||
420 | # undef ROTATE_LEFT | |
421 | # undef HASH_ONE_CHAR | |
422 | } | |
423 | ||
424 | #else /* not USE_DIFF_HASH */ | |
425 | ||
426 | /* This one comes from `recode', and performs a bit better than the above as | |
427 | per a few experiments. It is inspired from a hashing routine found in the | |
428 | very old Cyber `snoop', itself written in typical Greg Mansfield style. | |
429 | (By the way, what happened to this excellent man? Is he still alive?) */ | |
430 | ||
431 | unsigned | |
432 | hash_string (const char *string, unsigned n_buckets) | |
433 | { | |
434 | unsigned value = 0; | |
435 | ||
436 | while (*string) | |
437 | value = ((value * 31 + (int) *(const unsigned char *) string++) | |
438 | % n_buckets); | |
439 | return value; | |
440 | } | |
441 | ||
442 | #endif /* not USE_DIFF_HASH */ | |
443 | ||
444 | /* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd | |
445 | number at least equal to 11. */ | |
446 | ||
447 | static bool | |
448 | is_prime (unsigned long candidate) | |
449 | { | |
450 | unsigned long divisor = 3; | |
451 | unsigned long square = divisor * divisor; | |
452 | ||
453 | while (square < candidate && (candidate % divisor)) | |
68bd3b6b | 454 | { |
beda758b AD |
455 | divisor++; |
456 | square += 4 * divisor; | |
457 | divisor++; | |
68bd3b6b | 458 | } |
beda758b AD |
459 | |
460 | return (candidate % divisor ? true : false); | |
461 | } | |
462 | ||
463 | /* Round a given CANDIDATE number up to the nearest prime, and return that | |
464 | prime. Primes lower than 10 are merely skipped. */ | |
465 | ||
466 | static unsigned long | |
467 | next_prime (unsigned long candidate) | |
468 | { | |
469 | /* Skip small primes. */ | |
470 | if (candidate < 10) | |
471 | candidate = 10; | |
472 | ||
473 | /* Make it definitely odd. */ | |
474 | candidate |= 1; | |
475 | ||
476 | while (!is_prime (candidate)) | |
477 | candidate += 2; | |
478 | ||
479 | return candidate; | |
68bd3b6b RA |
480 | } |
481 | ||
482 | void | |
beda758b AD |
483 | hash_reset_tuning (Hash_tuning *tuning) |
484 | { | |
485 | *tuning = default_tuning; | |
486 | } | |
487 | ||
488 | /* For the given hash TABLE, check the user supplied tuning structure for | |
489 | reasonable values, and return true if there is no gross error with it. | |
490 | Otherwise, definitively reset the TUNING field to some acceptable default | |
491 | in the hash table (that is, the user loses the right of further modifying | |
492 | tuning arguments), and return false. */ | |
493 | ||
494 | static bool | |
495 | check_tuning (Hash_table *table) | |
68bd3b6b | 496 | { |
beda758b AD |
497 | const Hash_tuning *tuning = table->tuning; |
498 | ||
499 | if (tuning->growth_threshold > 0.0 | |
500 | && tuning->growth_threshold < 1.0 | |
501 | && tuning->growth_factor > 1.0 | |
502 | && tuning->shrink_threshold >= 0.0 | |
503 | && tuning->shrink_threshold < 1.0 | |
504 | && tuning->shrink_factor > tuning->shrink_threshold | |
505 | && tuning->shrink_factor <= 1.0 | |
506 | && tuning->shrink_threshold < tuning->growth_threshold) | |
507 | return true; | |
508 | ||
509 | table->tuning = &default_tuning; | |
510 | return false; | |
511 | } | |
512 | ||
513 | /* Allocate and return a new hash table, or NULL upon failure. The initial | |
514 | number of buckets is automatically selected so as to _guarantee_ that you | |
515 | may insert at least CANDIDATE different user entries before any growth of | |
516 | the hash table size occurs. So, if have a reasonably tight a-priori upper | |
517 | bound on the number of entries you intend to insert in the hash table, you | |
518 | may save some table memory and insertion time, by specifying it here. If | |
519 | the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE | |
520 | argument has its meaning changed to the wanted number of buckets. | |
521 | ||
522 | TUNING points to a structure of user-supplied values, in case some fine | |
523 | tuning is wanted over the default behavior of the hasher. If TUNING is | |
524 | NULL, the default tuning parameters are used instead. | |
525 | ||
526 | The user-supplied HASHER function should be provided. It accepts two | |
527 | arguments ENTRY and TABLE_SIZE. It computes, by hashing ENTRY contents, a | |
528 | slot number for that entry which should be in the range 0..TABLE_SIZE-1. | |
529 | This slot number is then returned. | |
530 | ||
531 | The user-supplied COMPARATOR function should be provided. It accepts two | |
532 | arguments pointing to user data, it then returns true for a pair of entries | |
533 | that compare equal, or false otherwise. This function is internally called | |
534 | on entries which are already known to hash to the same bucket index. | |
535 | ||
536 | The user-supplied DATA_FREER function, when not NULL, may be later called | |
537 | with the user data as an argument, just before the entry containing the | |
538 | data gets freed. This happens from within `hash_free' or `hash_clear'. | |
539 | You should specify this function only if you want these functions to free | |
540 | all of your `data' data. This is typically the case when your data is | |
541 | simply an auxiliary struct that you have malloc'd to aggregate several | |
542 | values. */ | |
543 | ||
544 | Hash_table * | |
545 | hash_initialize (unsigned candidate, const Hash_tuning *tuning, | |
546 | Hash_hasher hasher, Hash_comparator comparator, | |
547 | Hash_data_freer data_freer) | |
548 | { | |
549 | Hash_table *table; | |
550 | struct hash_entry *bucket; | |
551 | ||
552 | if (hasher == NULL || comparator == NULL) | |
553 | return NULL; | |
554 | ||
555 | table = (Hash_table *) malloc (sizeof (Hash_table)); | |
556 | if (table == NULL) | |
557 | return NULL; | |
558 | ||
559 | if (!tuning) | |
560 | tuning = &default_tuning; | |
561 | table->tuning = tuning; | |
562 | if (!check_tuning (table)) | |
68bd3b6b | 563 | { |
beda758b AD |
564 | /* Fail if the tuning options are invalid. This is the only occasion |
565 | when the user gets some feedback about it. Once the table is created, | |
566 | if the user provides invalid tuning options, we silently revert to | |
567 | using the defaults, and ignore further request to change the tuning | |
568 | options. */ | |
569 | free (table); | |
570 | return NULL; | |
68bd3b6b | 571 | } |
beda758b AD |
572 | |
573 | table->n_buckets | |
574 | = next_prime (tuning->is_n_buckets ? candidate | |
575 | : (unsigned) (candidate / tuning->growth_threshold)); | |
576 | ||
577 | table->bucket = (struct hash_entry *) | |
578 | malloc (table->n_buckets * sizeof (struct hash_entry)); | |
579 | if (table->bucket == NULL) | |
580 | { | |
581 | free (table); | |
582 | return NULL; | |
583 | } | |
584 | table->bucket_limit = table->bucket + table->n_buckets; | |
585 | ||
586 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) | |
587 | { | |
588 | bucket->data = NULL; | |
589 | bucket->next = NULL; | |
590 | } | |
591 | table->n_buckets_used = 0; | |
592 | table->n_entries = 0; | |
593 | ||
594 | table->hasher = hasher; | |
595 | table->comparator = comparator; | |
596 | table->data_freer = data_freer; | |
597 | ||
598 | table->free_entry_list = NULL; | |
599 | #if USE_OBSTACK | |
600 | obstack_init (&table->entry_stack); | |
601 | #endif | |
602 | return table; | |
68bd3b6b RA |
603 | } |
604 | ||
beda758b AD |
605 | /* Make all buckets empty, placing any chained entries on the free list. |
606 | Apply the user-specified function data_freer (if any) to the datas of any | |
607 | affected entries. */ | |
608 | ||
68bd3b6b | 609 | void |
beda758b | 610 | hash_clear (Hash_table *table) |
68bd3b6b | 611 | { |
beda758b | 612 | struct hash_entry *bucket; |
beda758b AD |
613 | |
614 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) | |
615 | { | |
616 | if (bucket->data) | |
617 | { | |
eb956856 PE |
618 | struct hash_entry *cursor; |
619 | struct hash_entry *next; | |
620 | ||
beda758b | 621 | /* Free the bucket overflow. */ |
eb956856 | 622 | for (cursor = bucket->next; cursor; cursor = next) |
beda758b AD |
623 | { |
624 | if (table->data_freer) | |
625 | (*table->data_freer) (cursor->data); | |
626 | cursor->data = NULL; | |
627 | ||
eb956856 | 628 | next = cursor->next; |
beda758b AD |
629 | /* Relinking is done one entry at a time, as it is to be expected |
630 | that overflows are either rare or short. */ | |
631 | cursor->next = table->free_entry_list; | |
632 | table->free_entry_list = cursor; | |
633 | } | |
634 | ||
635 | /* Free the bucket head. */ | |
636 | if (table->data_freer) | |
637 | (*table->data_freer) (bucket->data); | |
638 | bucket->data = NULL; | |
639 | bucket->next = NULL; | |
640 | } | |
641 | } | |
642 | ||
643 | table->n_buckets_used = 0; | |
644 | table->n_entries = 0; | |
68bd3b6b RA |
645 | } |
646 | ||
beda758b AD |
647 | /* Reclaim all storage associated with a hash table. If a data_freer |
648 | function has been supplied by the user when the hash table was created, | |
649 | this function applies it to the data of each entry before freeing that | |
650 | entry. */ | |
651 | ||
68bd3b6b | 652 | void |
beda758b | 653 | hash_free (Hash_table *table) |
68bd3b6b | 654 | { |
beda758b AD |
655 | struct hash_entry *bucket; |
656 | struct hash_entry *cursor; | |
657 | struct hash_entry *next; | |
658 | ||
659 | /* Call the user data_freer function. */ | |
660 | if (table->data_freer && table->n_entries) | |
661 | { | |
662 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) | |
663 | { | |
664 | if (bucket->data) | |
665 | { | |
666 | for (cursor = bucket; cursor; cursor = cursor->next) | |
667 | { | |
668 | (*table->data_freer) (cursor->data); | |
669 | } | |
670 | } | |
671 | } | |
672 | } | |
673 | ||
674 | #if USE_OBSTACK | |
675 | ||
676 | obstack_free (&table->entry_stack, NULL); | |
677 | ||
678 | #else | |
679 | ||
680 | /* Free all bucket overflowed entries. */ | |
681 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) | |
682 | { | |
683 | for (cursor = bucket->next; cursor; cursor = next) | |
684 | { | |
685 | next = cursor->next; | |
686 | free (cursor); | |
687 | } | |
688 | } | |
689 | ||
690 | /* Also reclaim the internal list of previously freed entries. */ | |
691 | for (cursor = table->free_entry_list; cursor; cursor = next) | |
692 | { | |
693 | next = cursor->next; | |
694 | free (cursor); | |
695 | } | |
696 | ||
697 | #endif | |
698 | ||
699 | /* Free the remainder of the hash table structure. */ | |
700 | free (table->bucket); | |
701 | free (table); | |
68bd3b6b RA |
702 | } |
703 | ||
beda758b AD |
704 | /* Insertion and deletion. */ |
705 | ||
706 | /* Get a new hash entry for a bucket overflow, possibly by reclying a | |
707 | previously freed one. If this is not possible, allocate a new one. */ | |
708 | ||
709 | static struct hash_entry * | |
710 | allocate_entry (Hash_table *table) | |
68bd3b6b | 711 | { |
beda758b | 712 | struct hash_entry *new; |
68bd3b6b | 713 | |
beda758b AD |
714 | if (table->free_entry_list) |
715 | { | |
716 | new = table->free_entry_list; | |
717 | table->free_entry_list = new->next; | |
718 | } | |
719 | else | |
68bd3b6b | 720 | { |
beda758b AD |
721 | #if USE_OBSTACK |
722 | new = (struct hash_entry *) | |
723 | obstack_alloc (&table->entry_stack, sizeof (struct hash_entry)); | |
724 | #else | |
725 | new = (struct hash_entry *) malloc (sizeof (struct hash_entry)); | |
726 | #endif | |
68bd3b6b | 727 | } |
beda758b AD |
728 | |
729 | return new; | |
68bd3b6b RA |
730 | } |
731 | ||
beda758b AD |
732 | /* Free a hash entry which was part of some bucket overflow, |
733 | saving it for later recycling. */ | |
68bd3b6b RA |
734 | |
735 | static void | |
beda758b AD |
736 | free_entry (Hash_table *table, struct hash_entry *entry) |
737 | { | |
738 | entry->data = NULL; | |
739 | entry->next = table->free_entry_list; | |
740 | table->free_entry_list = entry; | |
741 | } | |
742 | ||
743 | /* This private function is used to help with insertion and deletion. When | |
744 | ENTRY matches an entry in the table, return a pointer to the corresponding | |
745 | user data and set *BUCKET_HEAD to the head of the selected bucket. | |
746 | Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in | |
747 | the table, unlink the matching entry. */ | |
748 | ||
749 | static void * | |
750 | hash_find_entry (Hash_table *table, const void *entry, | |
751 | struct hash_entry **bucket_head, bool delete) | |
68bd3b6b | 752 | { |
beda758b AD |
753 | struct hash_entry *bucket |
754 | = table->bucket + table->hasher (entry, table->n_buckets); | |
755 | struct hash_entry *cursor; | |
756 | ||
4f395381 | 757 | if (! (bucket < table->bucket_limit)) |
937fd864 PE |
758 | abort (); |
759 | ||
beda758b AD |
760 | *bucket_head = bucket; |
761 | ||
762 | /* Test for empty bucket. */ | |
763 | if (bucket->data == NULL) | |
764 | return NULL; | |
765 | ||
766 | /* See if the entry is the first in the bucket. */ | |
767 | if ((*table->comparator) (entry, bucket->data)) | |
768 | { | |
769 | void *data = bucket->data; | |
770 | ||
771 | if (delete) | |
772 | { | |
773 | if (bucket->next) | |
774 | { | |
775 | struct hash_entry *next = bucket->next; | |
776 | ||
777 | /* Bump the first overflow entry into the bucket head, then save | |
778 | the previous first overflow entry for later recycling. */ | |
779 | *bucket = *next; | |
780 | free_entry (table, next); | |
781 | } | |
782 | else | |
783 | { | |
784 | bucket->data = NULL; | |
785 | } | |
786 | } | |
68bd3b6b | 787 | |
beda758b AD |
788 | return data; |
789 | } | |
68bd3b6b | 790 | |
beda758b AD |
791 | /* Scan the bucket overflow. */ |
792 | for (cursor = bucket; cursor->next; cursor = cursor->next) | |
68bd3b6b | 793 | { |
beda758b AD |
794 | if ((*table->comparator) (entry, cursor->next->data)) |
795 | { | |
796 | void *data = cursor->next->data; | |
797 | ||
798 | if (delete) | |
799 | { | |
800 | struct hash_entry *next = cursor->next; | |
801 | ||
802 | /* Unlink the entry to delete, then save the freed entry for later | |
803 | recycling. */ | |
804 | cursor->next = next->next; | |
805 | free_entry (table, next); | |
806 | } | |
807 | ||
808 | return data; | |
809 | } | |
68bd3b6b | 810 | } |
beda758b AD |
811 | |
812 | /* No entry found. */ | |
813 | return NULL; | |
68bd3b6b RA |
814 | } |
815 | ||
beda758b AD |
816 | /* For an already existing hash table, change the number of buckets through |
817 | specifying CANDIDATE. The contents of the hash table are preserved. The | |
818 | new number of buckets is automatically selected so as to _guarantee_ that | |
819 | the table may receive at least CANDIDATE different user entries, including | |
820 | those already in the table, before any other growth of the hash table size | |
821 | occurs. If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the | |
822 | exact number of buckets desired. */ | |
823 | ||
824 | bool | |
825 | hash_rehash (Hash_table *table, unsigned candidate) | |
68bd3b6b | 826 | { |
beda758b AD |
827 | Hash_table *new_table; |
828 | struct hash_entry *bucket; | |
829 | struct hash_entry *cursor; | |
830 | struct hash_entry *next; | |
831 | ||
832 | new_table = hash_initialize (candidate, table->tuning, table->hasher, | |
833 | table->comparator, table->data_freer); | |
834 | if (new_table == NULL) | |
835 | return false; | |
836 | ||
837 | /* Merely reuse the extra old space into the new table. */ | |
838 | #if USE_OBSTACK | |
839 | obstack_free (&new_table->entry_stack, NULL); | |
840 | new_table->entry_stack = table->entry_stack; | |
841 | #endif | |
842 | new_table->free_entry_list = table->free_entry_list; | |
843 | ||
844 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) | |
845 | if (bucket->data) | |
846 | for (cursor = bucket; cursor; cursor = next) | |
847 | { | |
848 | void *data = cursor->data; | |
849 | struct hash_entry *new_bucket | |
850 | = (new_table->bucket | |
851 | + new_table->hasher (data, new_table->n_buckets)); | |
852 | ||
4f395381 | 853 | if (! (new_bucket < new_table->bucket_limit)) |
937fd864 PE |
854 | abort (); |
855 | ||
beda758b AD |
856 | next = cursor->next; |
857 | ||
858 | if (new_bucket->data) | |
859 | { | |
860 | if (cursor == bucket) | |
861 | { | |
862 | /* Allocate or recycle an entry, when moving from a bucket | |
863 | header into a bucket overflow. */ | |
864 | struct hash_entry *new_entry = allocate_entry (new_table); | |
865 | ||
866 | if (new_entry == NULL) | |
867 | return false; | |
868 | ||
869 | new_entry->data = data; | |
870 | new_entry->next = new_bucket->next; | |
871 | new_bucket->next = new_entry; | |
872 | } | |
873 | else | |
874 | { | |
875 | /* Merely relink an existing entry, when moving from a | |
876 | bucket overflow into a bucket overflow. */ | |
877 | cursor->next = new_bucket->next; | |
878 | new_bucket->next = cursor; | |
879 | } | |
880 | } | |
881 | else | |
882 | { | |
883 | /* Free an existing entry, when moving from a bucket | |
884 | overflow into a bucket header. Also take care of the | |
885 | simple case of moving from a bucket header into a bucket | |
886 | header. */ | |
887 | new_bucket->data = data; | |
888 | new_table->n_buckets_used++; | |
889 | if (cursor != bucket) | |
890 | free_entry (new_table, cursor); | |
891 | } | |
892 | } | |
893 | ||
894 | free (table->bucket); | |
895 | table->bucket = new_table->bucket; | |
896 | table->bucket_limit = new_table->bucket_limit; | |
897 | table->n_buckets = new_table->n_buckets; | |
898 | table->n_buckets_used = new_table->n_buckets_used; | |
899 | table->free_entry_list = new_table->free_entry_list; | |
900 | /* table->n_entries already holds its value. */ | |
901 | #if USE_OBSTACK | |
902 | table->entry_stack = new_table->entry_stack; | |
903 | #endif | |
904 | free (new_table); | |
905 | ||
906 | return true; | |
68bd3b6b RA |
907 | } |
908 | ||
beda758b AD |
909 | /* If ENTRY matches an entry already in the hash table, return the pointer |
910 | to the entry from the table. Otherwise, insert ENTRY and return ENTRY. | |
911 | Return NULL if the storage required for insertion cannot be allocated. */ | |
68bd3b6b | 912 | |
beda758b AD |
913 | void * |
914 | hash_insert (Hash_table *table, const void *entry) | |
68bd3b6b | 915 | { |
beda758b AD |
916 | void *data; |
917 | struct hash_entry *bucket; | |
918 | ||
937fd864 PE |
919 | /* The caller cannot insert a NULL entry. */ |
920 | if (! entry) | |
921 | abort (); | |
beda758b AD |
922 | |
923 | /* If there's a matching entry already in the table, return that. */ | |
924 | if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL) | |
925 | return data; | |
926 | ||
927 | /* ENTRY is not matched, it should be inserted. */ | |
928 | ||
929 | if (bucket->data) | |
930 | { | |
931 | struct hash_entry *new_entry = allocate_entry (table); | |
932 | ||
933 | if (new_entry == NULL) | |
934 | return NULL; | |
935 | ||
936 | /* Add ENTRY in the overflow of the bucket. */ | |
937 | ||
938 | new_entry->data = (void *) entry; | |
939 | new_entry->next = bucket->next; | |
940 | bucket->next = new_entry; | |
941 | table->n_entries++; | |
942 | return (void *) entry; | |
943 | } | |
944 | ||
945 | /* Add ENTRY right in the bucket head. */ | |
946 | ||
947 | bucket->data = (void *) entry; | |
948 | table->n_entries++; | |
949 | table->n_buckets_used++; | |
950 | ||
951 | /* If the growth threshold of the buckets in use has been reached, increase | |
952 | the table size and rehash. There's no point in checking the number of | |
953 | entries: if the hashing function is ill-conditioned, rehashing is not | |
954 | likely to improve it. */ | |
955 | ||
956 | if (table->n_buckets_used | |
957 | > table->tuning->growth_threshold * table->n_buckets) | |
958 | { | |
959 | /* Check more fully, before starting real work. If tuning arguments | |
960 | became invalid, the second check will rely on proper defaults. */ | |
961 | check_tuning (table); | |
962 | if (table->n_buckets_used | |
963 | > table->tuning->growth_threshold * table->n_buckets) | |
964 | { | |
965 | const Hash_tuning *tuning = table->tuning; | |
966 | unsigned candidate | |
967 | = (unsigned) (tuning->is_n_buckets | |
968 | ? (table->n_buckets * tuning->growth_factor) | |
969 | : (table->n_buckets * tuning->growth_factor | |
970 | * tuning->growth_threshold)); | |
971 | ||
972 | /* If the rehash fails, arrange to return NULL. */ | |
973 | if (!hash_rehash (table, candidate)) | |
974 | entry = NULL; | |
975 | } | |
976 | } | |
977 | ||
978 | return (void *) entry; | |
68bd3b6b RA |
979 | } |
980 | ||
beda758b AD |
981 | /* If ENTRY is already in the table, remove it and return the just-deleted |
982 | data (the user may want to deallocate its storage). If ENTRY is not in the | |
983 | table, don't modify the table and return NULL. */ | |
68bd3b6b | 984 | |
beda758b AD |
985 | void * |
986 | hash_delete (Hash_table *table, const void *entry) | |
987 | { | |
988 | void *data; | |
989 | struct hash_entry *bucket; | |
990 | ||
991 | data = hash_find_entry (table, entry, &bucket, true); | |
992 | if (!data) | |
993 | return NULL; | |
994 | ||
995 | table->n_entries--; | |
996 | if (!bucket->data) | |
997 | { | |
998 | table->n_buckets_used--; | |
999 | ||
1000 | /* If the shrink threshold of the buckets in use has been reached, | |
1001 | rehash into a smaller table. */ | |
1002 | ||
1003 | if (table->n_buckets_used | |
1004 | < table->tuning->shrink_threshold * table->n_buckets) | |
1005 | { | |
1006 | /* Check more fully, before starting real work. If tuning arguments | |
1007 | became invalid, the second check will rely on proper defaults. */ | |
1008 | check_tuning (table); | |
1009 | if (table->n_buckets_used | |
1010 | < table->tuning->shrink_threshold * table->n_buckets) | |
1011 | { | |
1012 | const Hash_tuning *tuning = table->tuning; | |
1013 | unsigned candidate | |
1014 | = (unsigned) (tuning->is_n_buckets | |
1015 | ? table->n_buckets * tuning->shrink_factor | |
1016 | : (table->n_buckets * tuning->shrink_factor | |
1017 | * tuning->growth_threshold)); | |
1018 | ||
1019 | hash_rehash (table, candidate); | |
1020 | } | |
1021 | } | |
1022 | } | |
1023 | ||
1024 | return data; | |
1025 | } | |
1026 | ||
1027 | /* Testing. */ | |
1028 | ||
1029 | #if TESTING | |
1030 | ||
1031 | void | |
1032 | hash_print (const Hash_table *table) | |
68bd3b6b | 1033 | { |
beda758b | 1034 | struct hash_entry *bucket; |
68bd3b6b | 1035 | |
beda758b | 1036 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) |
68bd3b6b | 1037 | { |
beda758b AD |
1038 | struct hash_entry *cursor; |
1039 | ||
1040 | if (bucket) | |
eb956856 | 1041 | printf ("%d:\n", bucket - table->bucket); |
beda758b AD |
1042 | |
1043 | for (cursor = bucket; cursor; cursor = cursor->next) | |
1044 | { | |
1045 | char *s = (char *) cursor->data; | |
1046 | /* FIXME */ | |
eb956856 PE |
1047 | if (s) |
1048 | printf (" %s\n", s); | |
beda758b | 1049 | } |
68bd3b6b | 1050 | } |
68bd3b6b | 1051 | } |
beda758b AD |
1052 | |
1053 | #endif /* TESTING */ |