git.saurik.com Git - bison.git/blame_incremental

0 / 1047 ( 0%)

Commit	Line	Data
	1	/* hash - hashing table processing.
	2	Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
	3	Written by Jim Meyering, 1992.
	4
	5	This program is free software; you can redistribute it and/or modify
	6	it under the terms of the GNU General Public License as published by
	7	the Free Software Foundation; either version 2, or (at your option)
	8	any later version.
	9
	10	This program is distributed in the hope that it will be useful,
	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	GNU General Public License for more details.
	14
	15	You should have received a copy of the GNU General Public License
	16	along with this program; if not, write to the Free Software Foundation,
	17	Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
	18
	19	/* A generic hash table package. */
	20
	21	/* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
	22	of malloc. If you change USE_OBSTACK, you have to recompile! */
	23
	24	#if HAVE_CONFIG_H
	25	# include <config.h>
	26	#endif
	27	#if HAVE_STDLIB_H
	28	# include <stdlib.h>
	29	#endif
	30	#if HAVE_STDBOOL_H
	31	# include <stdbool.h>
	32	#else
	33	typedef enum {false = 0, true = 1} bool;
	34	#endif
	35	#include <stdio.h>
	36	#include <assert.h>
	37
	38	#ifndef HAVE_DECL_FREE
	39	"this configure-time declaration test was not run"
	40	#endif
	41	#if !HAVE_DECL_FREE
	42	void free ();
	43	#endif
	44
	45	#ifndef HAVE_DECL_MALLOC
	46	"this configure-time declaration test was not run"
	47	#endif
	48	#if !HAVE_DECL_MALLOC
	49	char *malloc ();
	50	#endif
	51
	52	#if USE_OBSTACK
	53	# include "obstack.h"
	54	# ifndef obstack_chunk_alloc
	55	# define obstack_chunk_alloc malloc
	56	# endif
	57	# ifndef obstack_chunk_free
	58	# define obstack_chunk_free free
	59	# endif
	60	#endif
	61
	62	#include "hash.h"
	63
	64	struct hash_table
	65	{
	66	/* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
	67	for a possibility of N_BUCKETS. Among those, N_BUCKETS_USED buckets
	68	are not empty, there are N_ENTRIES active entries in the table. */
	69	struct hash_entry *bucket;
	70	struct hash_entry *bucket_limit;
	71	unsigned n_buckets;
	72	unsigned n_buckets_used;
	73	unsigned n_entries;
	74
	75	/* Tuning arguments, kept in a physicaly separate structure. */
	76	const Hash_tuning *tuning;
	77
	78	/* Three functions are given to `hash_initialize', see the documentation
	79	block for this function. In a word, HASHER randomizes a user entry
	80	into a number up from 0 up to some maximum minus 1; COMPARATOR returns
	81	true if two user entries compare equally; and DATA_FREER is the cleanup
	82	function for a user entry. */
	83	Hash_hasher hasher;
	84	Hash_comparator comparator;
	85	Hash_data_freer data_freer;
	86
	87	/* A linked list of freed struct hash_entry structs. */
	88	struct hash_entry *free_entry_list;
	89
	90	#if USE_OBSTACK
	91	/* Whenever obstacks are used, it is possible to allocate all overflowed
	92	entries into a single stack, so they all can be freed in a single
	93	operation. It is not clear if the speedup is worth the trouble. */
	94	struct obstack entry_stack;
	95	#endif
	96	};
	97
	98	/* A hash table contains many internal entries, each holding a pointer to
	99	some user provided data (also called a user entry). An entry indistinctly
	100	refers to both the internal entry and its associated user entry. A user
	101	entry contents may be hashed by a randomization function (the hashing
	102	function, or just `hasher' for short) into a number (or `slot') between 0
	103	and the current table size. At each slot position in the hash table,
	104	starts a linked chain of entries for which the user data all hash to this
	105	slot. A bucket is the collection of all entries hashing to the same slot.
	106
	107	A good `hasher' function will distribute entries rather evenly in buckets.
	108	In the ideal case, the length of each bucket is roughly the number of
	109	entries divided by the table size. Finding the slot for a data is usually
	110	done in constant time by the `hasher', and the later finding of a precise
	111	entry is linear in time with the size of the bucket. Consequently, a
	112	larger hash table size (that is, a larger number of buckets) is prone to
	113	yielding shorter chains, given the `hasher' function behaves properly.
	114
	115	Long buckets slow down the lookup algorithm. One might use big hash table
	116	sizes in hope to reduce the average length of buckets, but this might
	117	become inordinate, as unused slots in the hash table take some space. The
	118	best bet is to make sure you are using a good `hasher' function (beware
	119	that those are not that easy to write! :-), and to use a table size
	120	larger than the actual number of entries. */
	121
	122	/* If an insertion makes the ratio of nonempty buckets to table size larger
	123	than the growth threshold (a number between 0.0 and 1.0), then increase
	124	the table size by multiplying by the growth factor (a number greater than
	125	1.0). The growth threshold defaults to 0.8, and the growth factor
	126	defaults to 1.414, meaning that the table will have doubled its size
	127	every second time 80% of the buckets get used. */
	128	#define DEFAULT_GROWTH_THRESHOLD 0.8
	129	#define DEFAULT_GROWTH_FACTOR 1.414
	130
	131	/* If a deletion empties a bucket and causes the ratio of used buckets to
	132	table size to become smaller than the shrink threshold (a number between
	133	0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
	134	number greater than the shrink threshold but smaller than 1.0). The shrink
	135	threshold and factor default to 0.0 and 1.0, meaning that the table never
	136	shrinks. */
	137	#define DEFAULT_SHRINK_THRESHOLD 0.0
	138	#define DEFAULT_SHRINK_FACTOR 1.0
	139
	140	/* Use this to initialize or reset a TUNING structure to
	141	some sensible values. */
	142	static const Hash_tuning default_tuning =
	143	{
	144	DEFAULT_SHRINK_THRESHOLD,
	145	DEFAULT_SHRINK_FACTOR,
	146	DEFAULT_GROWTH_THRESHOLD,
	147	DEFAULT_GROWTH_FACTOR,
	148	false
	149	};
	150
	151	/* Information and lookup. */
	152
	153	/* The following few functions provide information about the overall hash
	154	table organization: the number of entries, number of buckets and maximum
	155	length of buckets. */
	156
	157	/* Return the number of buckets in the hash table. The table size, the total
	158	number of buckets (used plus unused), or the maximum number of slots, are
	159	the same quantity. */
	160
	161	unsigned
	162	hash_get_n_buckets (const Hash_table *table)
	163	{
	164	return table->n_buckets;
	165	}
	166
	167	/* Return the number of slots in use (non-empty buckets). */
	168
	169	unsigned
	170	hash_get_n_buckets_used (const Hash_table *table)
	171	{
	172	return table->n_buckets_used;
	173	}
	174
	175	/* Return the number of active entries. */
	176
	177	unsigned
	178	hash_get_n_entries (const Hash_table *table)
	179	{
	180	return table->n_entries;
	181	}
	182
	183	/* Return the length of the longest chain (bucket). */
	184
	185	unsigned
	186	hash_get_max_bucket_length (const Hash_table *table)
	187	{
	188	struct hash_entry *bucket;
	189	unsigned max_bucket_length = 0;
	190
	191	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
	192	{
	193	if (bucket->data)
	194	{
	195	struct hash_entry *cursor = bucket;
	196	unsigned bucket_length = 1;
	197
	198	while (cursor = cursor->next, cursor)
	199	bucket_length++;
	200
	201	if (bucket_length > max_bucket_length)
	202	max_bucket_length = bucket_length;
	203	}
	204	}
	205
	206	return max_bucket_length;
	207	}
	208
	209	/* Do a mild validation of a hash table, by traversing it and checking two
	210	statistics. */
	211
	212	bool
	213	hash_table_ok (const Hash_table *table)
	214	{
	215	struct hash_entry *bucket;
	216	unsigned n_buckets_used = 0;
	217	unsigned n_entries = 0;
	218
	219	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
	220	{
	221	if (bucket->data)
	222	{
	223	struct hash_entry *cursor = bucket;
	224
	225	/* Count bucket head. */
	226	n_buckets_used++;
	227	n_entries++;
	228
	229	/* Count bucket overflow. */
	230	while (cursor = cursor->next, cursor)
	231	n_entries++;
	232	}
	233	}
	234
	235	if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
	236	return true;
	237
	238	return false;
	239	}
	240
	241	void
	242	hash_print_statistics (const Hash_table table, FILE stream)
	243	{
	244	unsigned n_entries = hash_get_n_entries (table);
	245	unsigned n_buckets = hash_get_n_buckets (table);
	246	unsigned n_buckets_used = hash_get_n_buckets_used (table);
	247	unsigned max_bucket_length = hash_get_max_bucket_length (table);
	248
	249	fprintf (stream, "# entries: %u\n", n_entries);
	250	fprintf (stream, "# buckets: %u\n", n_buckets);
	251	fprintf (stream, "# buckets used: %u (%.2f%%)\n", n_buckets_used,
	252	(100.0 * n_buckets_used) / n_buckets);
	253	fprintf (stream, "max bucket length: %u\n", max_bucket_length);
	254	}
	255
	256	/* If ENTRY matches an entry already in the hash table, return the
	257	entry from the table. Otherwise, return NULL. */
	258
	259	void *
	260	hash_lookup (const Hash_table table, const void entry)
	261	{
	262	struct hash_entry *bucket
	263	= table->bucket + table->hasher (entry, table->n_buckets);
	264	struct hash_entry *cursor;
	265
	266	assert (bucket < table->bucket_limit);
	267
	268	if (bucket->data == NULL)
	269	return NULL;
	270
	271	for (cursor = bucket; cursor; cursor = cursor->next)
	272	if (table->comparator (entry, cursor->data))
	273	return cursor->data;
	274
	275	return NULL;
	276	}
	277
	278	/* Walking. */
	279
	280	/* The functions in this page traverse the hash table and process the
	281	contained entries. For the traversal to work properly, the hash table
	282	should not be resized nor modified while any particular entry is being
	283	processed. In particular, entries should not be added or removed. */
	284
	285	/* Return the first data in the table, or NULL if the table is empty. */
	286
	287	void *
	288	hash_get_first (const Hash_table *table)
	289	{
	290	struct hash_entry *bucket;
	291
	292	if (table->n_entries == 0)
	293	return NULL;
	294
	295	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
	296	if (bucket->data)
	297	return bucket->data;
	298
	299	assert (0);
	300	return NULL;
	301	}
	302
	303	/* Return the user data for the entry following ENTRY, where ENTRY has been
	304	returned by a previous call to either `hash_get_first' or `hash_get_next'.
	305	Return NULL if there are no more entries. */
	306
	307	void *
	308	hash_get_next (const Hash_table table, const void entry)
	309	{
	310	struct hash_entry *bucket
	311	= table->bucket + table->hasher (entry, table->n_buckets);
	312	struct hash_entry *cursor;
	313
	314	assert (bucket < table->bucket_limit);
	315
	316	/* Find next entry in the same bucket. */
	317	for (cursor = bucket; cursor; cursor = cursor->next)
	318	if (cursor->data == entry && cursor->next)
	319	return cursor->next->data;
	320
	321	/* Find first entry in any subsequent bucket. */
	322	while (++bucket < table->bucket_limit)
	323	if (bucket->data)
	324	return bucket->data;
	325
	326	/* None found. */
	327	return NULL;
	328	}
	329
	330	/* Fill BUFFER with pointers to active user entries in the hash table, then
	331	return the number of pointers copied. Do not copy more than BUFFER_SIZE
	332	pointers. */
	333
	334	unsigned
	335	hash_get_entries (const Hash_table table, void *buffer,
	336	unsigned buffer_size)
	337	{
	338	unsigned counter = 0;
	339	struct hash_entry *bucket;
	340	struct hash_entry *cursor;
	341
	342	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
	343	{
	344	if (bucket->data)
	345	{
	346	for (cursor = bucket; cursor; cursor = cursor->next)
	347	{
	348	if (counter >= buffer_size)
	349	return counter;
	350	buffer[counter++] = cursor->data;
	351	}
	352	}
	353	}
	354
	355	return counter;
	356	}
	357
	358	/* Call a PROCESSOR function for each entry of a hash table, and return the
	359	number of entries for which the processor function returned success. A
	360	pointer to some PROCESSOR_DATA which will be made available to each call to
	361	the processor function. The PROCESSOR accepts two arguments: the first is
	362	the user entry being walked into, the second is the value of PROCESSOR_DATA
	363	as received. The walking continue for as long as the PROCESSOR function
	364	returns nonzero. When it returns zero, the walking is interrupted. */
	365
	366	unsigned
	367	hash_do_for_each (const Hash_table *table, Hash_processor processor,
	368	void *processor_data)
	369	{
	370	unsigned counter = 0;
	371	struct hash_entry *bucket;
	372	struct hash_entry *cursor;
	373
	374	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
	375	{
	376	if (bucket->data)
	377	{
	378	for (cursor = bucket; cursor; cursor = cursor->next)
	379	{
	380	if (!(*processor) (cursor->data, processor_data))
	381	return counter;
	382	counter++;
	383	}
	384	}
	385	}
	386
	387	return counter;
	388	}
	389
	390	/* Allocation and clean-up. */
	391
	392	/* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
	393	This is a convenience routine for constructing other hashing functions. */
	394
	395	#if USE_DIFF_HASH
	396
	397	/* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
	398	B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
	399	Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash
	400	algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
	401	may not be good for your application." */
	402
	403	unsigned
	404	hash_string (const char *string, unsigned n_buckets)
	405	{
	406	# ifndef CHAR_BIT
	407	# define CHAR_BIT 8
	408	# endif
	409	# define ROTATE_LEFT(Value, Shift) \
	410	((Value) << (Shift) \| (Value) >> ((sizeof (unsigned) * CHAR_BIT) - (Shift)))
	411	# define HASH_ONE_CHAR(Value, Byte) \
	412	((Byte) + ROTATE_LEFT (Value, 7))
	413
	414	unsigned value = 0;
	415
	416	for (; *string; string++)
	417	value = HASH_ONE_CHAR (value, (const unsigned char ) string);
	418	return value % n_buckets;
	419
	420	# undef ROTATE_LEFT
	421	# undef HASH_ONE_CHAR
	422	}
	423
	424	#else /* not USE_DIFF_HASH */
	425
	426	/* This one comes from `recode', and performs a bit better than the above as
	427	per a few experiments. It is inspired from a hashing routine found in the
	428	very old Cyber `snoop', itself written in typical Greg Mansfield style.
	429	(By the way, what happened to this excellent man? Is he still alive?) */
	430
	431	unsigned
	432	hash_string (const char *string, unsigned n_buckets)
	433	{
	434	unsigned value = 0;
	435
	436	while (*string)
	437	value = ((value * 31 + (int) (const unsigned char ) string++)
	438	% n_buckets);
	439	return value;
	440	}
	441
	442	#endif /* not USE_DIFF_HASH */
	443
	444	/* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd
	445	number at least equal to 11. */
	446
	447	static bool
	448	is_prime (unsigned long candidate)
	449	{
	450	unsigned long divisor = 3;
	451	unsigned long square = divisor * divisor;
	452
	453	while (square < candidate && (candidate % divisor))
	454	{
	455	divisor++;
	456	square += 4 * divisor;
	457	divisor++;
	458	}
	459
	460	return (candidate % divisor ? true : false);
	461	}
	462
	463	/* Round a given CANDIDATE number up to the nearest prime, and return that
	464	prime. Primes lower than 10 are merely skipped. */
	465
	466	static unsigned long
	467	next_prime (unsigned long candidate)
	468	{
	469	/* Skip small primes. */
	470	if (candidate < 10)
	471	candidate = 10;
	472
	473	/* Make it definitely odd. */
	474	candidate \|= 1;
	475
	476	while (!is_prime (candidate))
	477	candidate += 2;
	478
	479	return candidate;
	480	}
	481
	482	void
	483	hash_reset_tuning (Hash_tuning *tuning)
	484	{
	485	*tuning = default_tuning;
	486	}
	487
	488	/* For the given hash TABLE, check the user supplied tuning structure for
	489	reasonable values, and return true if there is no gross error with it.
	490	Otherwise, definitively reset the TUNING field to some acceptable default
	491	in the hash table (that is, the user loses the right of further modifying
	492	tuning arguments), and return false. */
	493
	494	static bool
	495	check_tuning (Hash_table *table)
	496	{
	497	const Hash_tuning *tuning = table->tuning;
	498
	499	if (tuning->growth_threshold > 0.0
	500	&& tuning->growth_threshold < 1.0
	501	&& tuning->growth_factor > 1.0
	502	&& tuning->shrink_threshold >= 0.0
	503	&& tuning->shrink_threshold < 1.0
	504	&& tuning->shrink_factor > tuning->shrink_threshold
	505	&& tuning->shrink_factor <= 1.0
	506	&& tuning->shrink_threshold < tuning->growth_threshold)
	507	return true;
	508
	509	table->tuning = &default_tuning;
	510	return false;
	511	}
	512
	513	/* Allocate and return a new hash table, or NULL upon failure. The initial
	514	number of buckets is automatically selected so as to _guarantee_ that you
	515	may insert at least CANDIDATE different user entries before any growth of
	516	the hash table size occurs. So, if have a reasonably tight a-priori upper
	517	bound on the number of entries you intend to insert in the hash table, you
	518	may save some table memory and insertion time, by specifying it here. If
	519	the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
	520	argument has its meaning changed to the wanted number of buckets.
	521
	522	TUNING points to a structure of user-supplied values, in case some fine
	523	tuning is wanted over the default behavior of the hasher. If TUNING is
	524	NULL, the default tuning parameters are used instead.
	525
	526	The user-supplied HASHER function should be provided. It accepts two
	527	arguments ENTRY and TABLE_SIZE. It computes, by hashing ENTRY contents, a
	528	slot number for that entry which should be in the range 0..TABLE_SIZE-1.
	529	This slot number is then returned.
	530
	531	The user-supplied COMPARATOR function should be provided. It accepts two
	532	arguments pointing to user data, it then returns true for a pair of entries
	533	that compare equal, or false otherwise. This function is internally called
	534	on entries which are already known to hash to the same bucket index.
	535
	536	The user-supplied DATA_FREER function, when not NULL, may be later called
	537	with the user data as an argument, just before the entry containing the
	538	data gets freed. This happens from within `hash_free' or `hash_clear'.
	539	You should specify this function only if you want these functions to free
	540	all of your `data' data. This is typically the case when your data is
	541	simply an auxiliary struct that you have malloc'd to aggregate several
	542	values. */
	543
	544	Hash_table *
	545	hash_initialize (unsigned candidate, const Hash_tuning *tuning,
	546	Hash_hasher hasher, Hash_comparator comparator,
	547	Hash_data_freer data_freer)
	548	{
	549	Hash_table *table;
	550	struct hash_entry *bucket;
	551
	552	if (hasher == NULL \|\| comparator == NULL)
	553	return NULL;
	554
	555	table = (Hash_table *) malloc (sizeof (Hash_table));
	556	if (table == NULL)
	557	return NULL;
	558
	559	if (!tuning)
	560	tuning = &default_tuning;
	561	table->tuning = tuning;
	562	if (!check_tuning (table))
	563	{
	564	/* Fail if the tuning options are invalid. This is the only occasion
	565	when the user gets some feedback about it. Once the table is created,
	566	if the user provides invalid tuning options, we silently revert to
	567	using the defaults, and ignore further request to change the tuning
	568	options. */
	569	free (table);
	570	return NULL;
	571	}
	572
	573	table->n_buckets
	574	= next_prime (tuning->is_n_buckets ? candidate
	575	: (unsigned) (candidate / tuning->growth_threshold));
	576
	577	table->bucket = (struct hash_entry *)
	578	malloc (table->n_buckets * sizeof (struct hash_entry));
	579	if (table->bucket == NULL)
	580	{
	581	free (table);
	582	return NULL;
	583	}
	584	table->bucket_limit = table->bucket + table->n_buckets;
	585
	586	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
	587	{
	588	bucket->data = NULL;
	589	bucket->next = NULL;
	590	}
	591	table->n_buckets_used = 0;
	592	table->n_entries = 0;
	593
	594	table->hasher = hasher;
	595	table->comparator = comparator;
	596	table->data_freer = data_freer;
	597
	598	table->free_entry_list = NULL;
	599	#if USE_OBSTACK
	600	obstack_init (&table->entry_stack);
	601	#endif
	602	return table;
	603	}
	604
	605	/* Make all buckets empty, placing any chained entries on the free list.
	606	Apply the user-specified function data_freer (if any) to the datas of any
	607	affected entries. */
	608
	609	void
	610	hash_clear (Hash_table *table)
	611	{
	612	struct hash_entry *bucket;
	613
	614	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
	615	{
	616	if (bucket->data)
	617	{
	618	struct hash_entry *cursor;
	619	struct hash_entry *next;
	620
	621	/* Free the bucket overflow. */
	622	for (cursor = bucket->next; cursor; cursor = next)
	623	{
	624	if (table->data_freer)
	625	(*table->data_freer) (cursor->data);
	626	cursor->data = NULL;
	627
	628	next = cursor->next;
	629	/* Relinking is done one entry at a time, as it is to be expected
	630	that overflows are either rare or short. */
	631	cursor->next = table->free_entry_list;
	632	table->free_entry_list = cursor;
	633	}
	634
	635	/* Free the bucket head. */
	636	if (table->data_freer)
	637	(*table->data_freer) (bucket->data);
	638	bucket->data = NULL;
	639	bucket->next = NULL;
	640	}
	641	}
	642
	643	table->n_buckets_used = 0;
	644	table->n_entries = 0;
	645	}
	646
	647	/* Reclaim all storage associated with a hash table. If a data_freer
	648	function has been supplied by the user when the hash table was created,
	649	this function applies it to the data of each entry before freeing that
	650	entry. */
	651
	652	void
	653	hash_free (Hash_table *table)
	654	{
	655	struct hash_entry *bucket;
	656	struct hash_entry *cursor;
	657	struct hash_entry *next;
	658
	659	/* Call the user data_freer function. */
	660	if (table->data_freer && table->n_entries)
	661	{
	662	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
	663	{
	664	if (bucket->data)
	665	{
	666	for (cursor = bucket; cursor; cursor = cursor->next)
	667	{
	668	(*table->data_freer) (cursor->data);
	669	}
	670	}
	671	}
	672	}
	673
	674	#if USE_OBSTACK
	675
	676	obstack_free (&table->entry_stack, NULL);
	677
	678	#else
	679
	680	/* Free all bucket overflowed entries. */
	681	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
	682	{
	683	for (cursor = bucket->next; cursor; cursor = next)
	684	{
	685	next = cursor->next;
	686	free (cursor);
	687	}
	688	}
	689
	690	/* Also reclaim the internal list of previously freed entries. */
	691	for (cursor = table->free_entry_list; cursor; cursor = next)
	692	{
	693	next = cursor->next;
	694	free (cursor);
	695	}
	696
	697	#endif
	698
	699	/* Free the remainder of the hash table structure. */
	700	free (table->bucket);
	701	free (table);
	702	}
	703
	704	/* Insertion and deletion. */
	705
	706	/* Get a new hash entry for a bucket overflow, possibly by reclying a
	707	previously freed one. If this is not possible, allocate a new one. */
	708
	709	static struct hash_entry *
	710	allocate_entry (Hash_table *table)
	711	{
	712	struct hash_entry *new;
	713
	714	if (table->free_entry_list)
	715	{
	716	new = table->free_entry_list;
	717	table->free_entry_list = new->next;
	718	}
	719	else
	720	{
	721	#if USE_OBSTACK
	722	new = (struct hash_entry *)
	723	obstack_alloc (&table->entry_stack, sizeof (struct hash_entry));
	724	#else
	725	new = (struct hash_entry *) malloc (sizeof (struct hash_entry));
	726	#endif
	727	}
	728
	729	return new;
	730	}
	731
	732	/* Free a hash entry which was part of some bucket overflow,
	733	saving it for later recycling. */
	734
	735	static void
	736	free_entry (Hash_table table, struct hash_entry entry)
	737	{
	738	entry->data = NULL;
	739	entry->next = table->free_entry_list;
	740	table->free_entry_list = entry;
	741	}
	742
	743	/* This private function is used to help with insertion and deletion. When
	744	ENTRY matches an entry in the table, return a pointer to the corresponding
	745	user data and set *BUCKET_HEAD to the head of the selected bucket.
	746	Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in
	747	the table, unlink the matching entry. */
	748
	749	static void *
	750	hash_find_entry (Hash_table table, const void entry,
	751	struct hash_entry **bucket_head, bool delete)
	752	{
	753	struct hash_entry *bucket
	754	= table->bucket + table->hasher (entry, table->n_buckets);
	755	struct hash_entry *cursor;
	756
	757	assert (bucket < table->bucket_limit);
	758	*bucket_head = bucket;
	759
	760	/* Test for empty bucket. */
	761	if (bucket->data == NULL)
	762	return NULL;
	763
	764	/* See if the entry is the first in the bucket. */
	765	if ((*table->comparator) (entry, bucket->data))
	766	{
	767	void *data = bucket->data;
	768
	769	if (delete)
	770	{
	771	if (bucket->next)
	772	{
	773	struct hash_entry *next = bucket->next;
	774
	775	/* Bump the first overflow entry into the bucket head, then save
	776	the previous first overflow entry for later recycling. */
	777	bucket = next;
	778	free_entry (table, next);
	779	}
	780	else
	781	{
	782	bucket->data = NULL;
	783	}
	784	}
	785
	786	return data;
	787	}
	788
	789	/* Scan the bucket overflow. */
	790	for (cursor = bucket; cursor->next; cursor = cursor->next)
	791	{
	792	if ((*table->comparator) (entry, cursor->next->data))
	793	{
	794	void *data = cursor->next->data;
	795
	796	if (delete)
	797	{
	798	struct hash_entry *next = cursor->next;
	799
	800	/* Unlink the entry to delete, then save the freed entry for later
	801	recycling. */
	802	cursor->next = next->next;
	803	free_entry (table, next);
	804	}
	805
	806	return data;
	807	}
	808	}
	809
	810	/* No entry found. */
	811	return NULL;
	812	}
	813
	814	/* For an already existing hash table, change the number of buckets through
	815	specifying CANDIDATE. The contents of the hash table are preserved. The
	816	new number of buckets is automatically selected so as to _guarantee_ that
	817	the table may receive at least CANDIDATE different user entries, including
	818	those already in the table, before any other growth of the hash table size
	819	occurs. If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
	820	exact number of buckets desired. */
	821
	822	bool
	823	hash_rehash (Hash_table *table, unsigned candidate)
	824	{
	825	Hash_table *new_table;
	826	struct hash_entry *bucket;
	827	struct hash_entry *cursor;
	828	struct hash_entry *next;
	829
	830	new_table = hash_initialize (candidate, table->tuning, table->hasher,
	831	table->comparator, table->data_freer);
	832	if (new_table == NULL)
	833	return false;
	834
	835	/* Merely reuse the extra old space into the new table. */
	836	#if USE_OBSTACK
	837	obstack_free (&new_table->entry_stack, NULL);
	838	new_table->entry_stack = table->entry_stack;
	839	#endif
	840	new_table->free_entry_list = table->free_entry_list;
	841
	842	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
	843	if (bucket->data)
	844	for (cursor = bucket; cursor; cursor = next)
	845	{
	846	void *data = cursor->data;
	847	struct hash_entry *new_bucket
	848	= (new_table->bucket
	849	+ new_table->hasher (data, new_table->n_buckets));
	850
	851	assert (new_bucket < new_table->bucket_limit);
	852	next = cursor->next;
	853
	854	if (new_bucket->data)
	855	{
	856	if (cursor == bucket)
	857	{
	858	/* Allocate or recycle an entry, when moving from a bucket
	859	header into a bucket overflow. */
	860	struct hash_entry *new_entry = allocate_entry (new_table);
	861
	862	if (new_entry == NULL)
	863	return false;
	864
	865	new_entry->data = data;
	866	new_entry->next = new_bucket->next;
	867	new_bucket->next = new_entry;
	868	}
	869	else
	870	{
	871	/* Merely relink an existing entry, when moving from a
	872	bucket overflow into a bucket overflow. */
	873	cursor->next = new_bucket->next;
	874	new_bucket->next = cursor;
	875	}
	876	}
	877	else
	878	{
	879	/* Free an existing entry, when moving from a bucket
	880	overflow into a bucket header. Also take care of the
	881	simple case of moving from a bucket header into a bucket
	882	header. */
	883	new_bucket->data = data;
	884	new_table->n_buckets_used++;
	885	if (cursor != bucket)
	886	free_entry (new_table, cursor);
	887	}
	888	}
	889
	890	free (table->bucket);
	891	table->bucket = new_table->bucket;
	892	table->bucket_limit = new_table->bucket_limit;
	893	table->n_buckets = new_table->n_buckets;
	894	table->n_buckets_used = new_table->n_buckets_used;
	895	table->free_entry_list = new_table->free_entry_list;
	896	/* table->n_entries already holds its value. */
	897	#if USE_OBSTACK
	898	table->entry_stack = new_table->entry_stack;
	899	#endif
	900	free (new_table);
	901
	902	return true;
	903	}
	904
	905	/* If ENTRY matches an entry already in the hash table, return the pointer
	906	to the entry from the table. Otherwise, insert ENTRY and return ENTRY.
	907	Return NULL if the storage required for insertion cannot be allocated. */
	908
	909	void *
	910	hash_insert (Hash_table table, const void entry)
	911	{
	912	void *data;
	913	struct hash_entry *bucket;
	914
	915	assert (entry); /* cannot insert a NULL entry */
	916
	917	/* If there's a matching entry already in the table, return that. */
	918	if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
	919	return data;
	920
	921	/* ENTRY is not matched, it should be inserted. */
	922
	923	if (bucket->data)
	924	{
	925	struct hash_entry *new_entry = allocate_entry (table);
	926
	927	if (new_entry == NULL)
	928	return NULL;
	929
	930	/* Add ENTRY in the overflow of the bucket. */
	931
	932	new_entry->data = (void *) entry;
	933	new_entry->next = bucket->next;
	934	bucket->next = new_entry;
	935	table->n_entries++;
	936	return (void *) entry;
	937	}
	938
	939	/* Add ENTRY right in the bucket head. */
	940
	941	bucket->data = (void *) entry;
	942	table->n_entries++;
	943	table->n_buckets_used++;
	944
	945	/* If the growth threshold of the buckets in use has been reached, increase
	946	the table size and rehash. There's no point in checking the number of
	947	entries: if the hashing function is ill-conditioned, rehashing is not
	948	likely to improve it. */
	949
	950	if (table->n_buckets_used
	951	> table->tuning->growth_threshold * table->n_buckets)
	952	{
	953	/* Check more fully, before starting real work. If tuning arguments
	954	became invalid, the second check will rely on proper defaults. */
	955	check_tuning (table);
	956	if (table->n_buckets_used
	957	> table->tuning->growth_threshold * table->n_buckets)
	958	{
	959	const Hash_tuning *tuning = table->tuning;
	960	unsigned candidate
	961	= (unsigned) (tuning->is_n_buckets
	962	? (table->n_buckets * tuning->growth_factor)
	963	: (table->n_buckets * tuning->growth_factor
	964	* tuning->growth_threshold));
	965
	966	/* If the rehash fails, arrange to return NULL. */
	967	if (!hash_rehash (table, candidate))
	968	entry = NULL;
	969	}
	970	}
	971
	972	return (void *) entry;
	973	}
	974
	975	/* If ENTRY is already in the table, remove it and return the just-deleted
	976	data (the user may want to deallocate its storage). If ENTRY is not in the
	977	table, don't modify the table and return NULL. */
	978
	979	void *
	980	hash_delete (Hash_table table, const void entry)
	981	{
	982	void *data;
	983	struct hash_entry *bucket;
	984
	985	data = hash_find_entry (table, entry, &bucket, true);
	986	if (!data)
	987	return NULL;
	988
	989	table->n_entries--;
	990	if (!bucket->data)
	991	{
	992	table->n_buckets_used--;
	993
	994	/* If the shrink threshold of the buckets in use has been reached,
	995	rehash into a smaller table. */
	996
	997	if (table->n_buckets_used
	998	< table->tuning->shrink_threshold * table->n_buckets)
	999	{
	1000	/* Check more fully, before starting real work. If tuning arguments
	1001	became invalid, the second check will rely on proper defaults. */
	1002	check_tuning (table);
	1003	if (table->n_buckets_used
	1004	< table->tuning->shrink_threshold * table->n_buckets)
	1005	{
	1006	const Hash_tuning *tuning = table->tuning;
	1007	unsigned candidate
	1008	= (unsigned) (tuning->is_n_buckets
	1009	? table->n_buckets * tuning->shrink_factor
	1010	: (table->n_buckets * tuning->shrink_factor
	1011	* tuning->growth_threshold));
	1012
	1013	hash_rehash (table, candidate);
	1014	}
	1015	}
	1016	}
	1017
	1018	return data;
	1019	}
	1020
	1021	/* Testing. */
	1022
	1023	#if TESTING
	1024
	1025	void
	1026	hash_print (const Hash_table *table)
	1027	{
	1028	struct hash_entry *bucket;
	1029
	1030	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
	1031	{
	1032	struct hash_entry *cursor;
	1033
	1034	if (bucket)
	1035	printf ("%d:\n", bucket - table->bucket);
	1036
	1037	for (cursor = bucket; cursor; cursor = cursor->next)
	1038	{
	1039	char s = (char ) cursor->data;
	1040	/* FIXME */
	1041	if (s)
	1042	printf (" %s\n", s);
	1043	}
	1044	}
	1045	}
	1046
	1047	#endif /* TESTING */