git.saurik.com Git - apple/libc.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*-
	2	* Copyright (c) 1990, 1993, 1994
	3	* The Regents of the University of California. All rights reserved.
	4	*
	5	* This code is derived from software contributed to Berkeley by
	6	* Margo Seltzer.
	7	*
	8	* Redistribution and use in source and binary forms, with or without
	9	* modification, are permitted provided that the following conditions
	10	* are met:
	11	* 1. Redistributions of source code must retain the above copyright
	12	* notice, this list of conditions and the following disclaimer.
	13	* 2. Redistributions in binary form must reproduce the above copyright
	14	* notice, this list of conditions and the following disclaimer in the
	15	* documentation and/or other materials provided with the distribution.
	16	* 4. Neither the name of the University nor the names of its contributors
	17	* may be used to endorse or promote products derived from this software
	18	* without specific prior written permission.
	19	*
	20	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	21	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	22	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	23	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	24	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	25	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	26	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	27	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	28	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	29	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	30	* SUCH DAMAGE.
	31	*/
	32
	33	#if defined(LIBC_SCCS) && !defined(lint)
	34	static char sccsid[] = "@(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94";
	35	#endif /* LIBC_SCCS and not lint */
	36	#include <sys/cdefs.h>
	37	__FBSDID("$FreeBSD: src/lib/libc/db/hash/hash_bigkey.c,v 1.10 2009/03/28 06:47:05 delphij Exp $");
	38
	39	/*
	40	* PACKAGE: hash
	41	* DESCRIPTION:
	42	* Big key/data handling for the hashing package.
	43	*
	44	* ROUTINES:
	45	* External
	46	* __big_keydata
	47	* __big_split
	48	* __big_insert
	49	* __big_return
	50	* __big_delete
	51	* __find_last_page
	52	* Internal
	53	* collect_key
	54	* collect_data
	55	*/
	56
	57	#include <sys/param.h>
	58
	59	#include <errno.h>
	60	#include <stdio.h>
	61	#include <stdlib.h>
	62	#include <string.h>
	63
	64	#ifdef DEBUG
	65	#include <assert.h>
	66	#endif
	67
	68	#include <db.h>
	69	#include "hash.h"
	70	#include "page.h"
	71	#include "hash_extern.h"
	72
	73	static int collect_key(HTAB , BUFHEAD , int, DBT *, int);
	74	static int collect_data(HTAB , BUFHEAD , int, int);
	75
	76	/*
	77	* Big_insert
	78	*
	79	* You need to do an insert and the key/data pair is too big
	80	*
	81	* Returns:
	82	* 0 ==> OK
	83	*-1 ==> ERROR
	84	*/
	85	int
	86	__big_insert(HTAB hashp, BUFHEAD bufp, const DBT key, const DBT val)
	87	{
	88	u_int16_t *p;
	89	int key_size, n;
	90	unsigned int val_size;
	91	u_int16_t space, move_bytes, off;
	92	char cp, key_data, *val_data;
	93
	94	cp = bufp->page; /* Character pointer of p. */
	95	p = (u_int16_t *)cp;
	96
	97	key_data = (char *)key->data;
	98	key_size = key->size;
	99	val_data = (char *)val->data;
	100	val_size = val->size;
	101
	102	/* First move the Key */
	103	for (space = FREESPACE(p) - BIGOVERHEAD; key_size;
	104	space = FREESPACE(p) - BIGOVERHEAD) {
	105	move_bytes = MIN(space, key_size);
	106	off = OFFSET(p) - move_bytes;
	107	memmove(cp + off, key_data, move_bytes);
	108	key_size -= move_bytes;
	109	key_data += move_bytes;
	110	n = p[0];
	111	p[++n] = off;
	112	p[0] = ++n;
	113	FREESPACE(p) = off - PAGE_META(n);
	114	OFFSET(p) = off;
	115	p[n] = PARTIAL_KEY;
	116	bufp = __add_ovflpage(hashp, bufp);
	117	if (!bufp)
	118	return (-1);
	119	n = p[0];
	120	if (!key_size) {
	121	space = FREESPACE(p);
	122	if (space) {
	123	move_bytes = MIN(space, val_size);
	124	/*
	125	* If the data would fit exactly in the
	126	* remaining space, we must overflow it to the
	127	* next page; otherwise the invariant that the
	128	* data must end on a page with FREESPACE
	129	* non-zero would fail.
	130	*/
	131	if (space == val_size && val_size == val->size)
	132	goto toolarge;
	133	off = OFFSET(p) - move_bytes;
	134	memmove(cp + off, val_data, move_bytes);
	135	val_data += move_bytes;
	136	val_size -= move_bytes;
	137	p[n] = off;
	138	p[n - 2] = FULL_KEY_DATA;
	139	FREESPACE(p) = FREESPACE(p) - move_bytes;
	140	OFFSET(p) = off;
	141	} else {
	142	toolarge:
	143	p[n - 2] = FULL_KEY;
	144	}
	145	}
	146	p = (u_int16_t *)bufp->page;
	147	cp = bufp->page;
	148	bufp->flags \|= BUF_MOD;
	149	}
	150
	151	/* Now move the data */
	152	for (space = FREESPACE(p) - BIGOVERHEAD; val_size;
	153	space = FREESPACE(p) - BIGOVERHEAD) {
	154	move_bytes = MIN(space, val_size);
	155	/*
	156	* Here's the hack to make sure that if the data ends on the
	157	* same page as the key ends, FREESPACE is at least one.
	158	*/
	159	if (space == val_size && val_size == val->size)
	160	move_bytes--;
	161	off = OFFSET(p) - move_bytes;
	162	memmove(cp + off, val_data, move_bytes);
	163	val_size -= move_bytes;
	164	val_data += move_bytes;
	165	n = p[0];
	166	p[++n] = off;
	167	p[0] = ++n;
	168	FREESPACE(p) = off - PAGE_META(n);
	169	OFFSET(p) = off;
	170	if (val_size) {
	171	p[n] = FULL_KEY;
	172	bufp = __add_ovflpage(hashp, bufp);
	173	if (!bufp)
	174	return (-1);
	175	cp = bufp->page;
	176	p = (u_int16_t *)cp;
	177	} else
	178	p[n] = FULL_KEY_DATA;
	179	bufp->flags \|= BUF_MOD;
	180	}
	181	return (0);
	182	}
	183
	184	/*
	185	* Called when bufp's page contains a partial key (index should be 1)
	186	*
	187	* All pages in the big key/data pair except bufp are freed. We cannot
	188	* free bufp because the page pointing to it is lost and we can't get rid
	189	* of its pointer.
	190	*
	191	* Returns:
	192	* 0 => OK
	193	*-1 => ERROR
	194	*/
	195	int
	196	__big_delete(HTAB hashp, BUFHEAD bufp)
	197	{
	198	BUFHEAD last_bfp, rbufp;
	199	u_int16_t *bp, pageno;
	200	int key_done, n;
	201
	202	rbufp = bufp;
	203	last_bfp = NULL;
	204	bp = (u_int16_t *)bufp->page;
	205	pageno = 0;
	206	key_done = 0;
	207
	208	while (!key_done \|\| (bp[2] != FULL_KEY_DATA)) {
	209	if (bp[2] == FULL_KEY \|\| bp[2] == FULL_KEY_DATA)
	210	key_done = 1;
	211
	212	/*
	213	* If there is freespace left on a FULL_KEY_DATA page, then
	214	* the data is short and fits entirely on this page, and this
	215	* is the last page.
	216	*/
	217	if (bp[2] == FULL_KEY_DATA && FREESPACE(bp))
	218	break;
	219	pageno = bp[bp[0] - 1];
	220	rbufp->flags \|= BUF_MOD;
	221	rbufp = __get_buf(hashp, pageno, rbufp, 0);
	222	if (last_bfp)
	223	__free_ovflpage(hashp, last_bfp);
	224	last_bfp = rbufp;
	225	if (!rbufp)
	226	return (-1); /* Error. */
	227	bp = (u_int16_t *)rbufp->page;
	228	}
	229
	230	/*
	231	* If we get here then rbufp points to the last page of the big
	232	* key/data pair. Bufp points to the first one -- it should now be
	233	* empty pointing to the next page after this pair. Can't free it
	234	* because we don't have the page pointing to it.
	235	*/
	236
	237	/* This is information from the last page of the pair. */
	238	n = bp[0];
	239	pageno = bp[n - 1];
	240
	241	/* Now, bp is the first page of the pair. */
	242	bp = (u_int16_t *)bufp->page;
	243	if (n > 2) {
	244	/* There is an overflow page. */
	245	bp[1] = pageno;
	246	bp[2] = OVFLPAGE;
	247	bufp->ovfl = rbufp->ovfl;
	248	} else
	249	/* This is the last page. */
	250	bufp->ovfl = NULL;
	251	n -= 2;
	252	bp[0] = n;
	253	FREESPACE(bp) = hashp->BSIZE - PAGE_META(n);
	254	OFFSET(bp) = hashp->BSIZE;
	255
	256	bufp->flags \|= BUF_MOD;
	257	if (rbufp)
	258	__free_ovflpage(hashp, rbufp);
	259	if (last_bfp && last_bfp != rbufp)
	260	__free_ovflpage(hashp, last_bfp);
	261
	262	hashp->NKEYS--;
	263	return (0);
	264	}
	265	/*
	266	* Returns:
	267	* 0 = key not found
	268	* -1 = get next overflow page
	269	* -2 means key not found and this is big key/data
	270	* -3 error
	271	*/
	272	int
	273	__find_bigpair(HTAB hashp, BUFHEAD bufp, int ndx, char *key, int size)
	274	{
	275	u_int16_t *bp;
	276	char *p;
	277	int ksize;
	278	u_int16_t bytes;
	279	char *kkey;
	280
	281	bp = (u_int16_t *)bufp->page;
	282	p = bufp->page;
	283	ksize = size;
	284	kkey = key;
	285
	286	for (bytes = hashp->BSIZE - bp[ndx];
	287	bytes <= size && bp[ndx + 1] == PARTIAL_KEY;
	288	bytes = hashp->BSIZE - bp[ndx]) {
	289	if (memcmp(p + bp[ndx], kkey, bytes))
	290	return (-2);
	291	kkey += bytes;
	292	ksize -= bytes;
	293	bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0);
	294	if (!bufp)
	295	return (-3);
	296	p = bufp->page;
	297	bp = (u_int16_t *)p;
	298	ndx = 1;
	299	}
	300
	301	if (bytes != ksize \|\| memcmp(p + bp[ndx], kkey, bytes)) {
	302	#ifdef HASH_STATISTICS
	303	++hash_collisions;
	304	#endif
	305	return (-2);
	306	} else
	307	return (ndx);
	308	}
	309
	310	/*
	311	* Given the buffer pointer of the first overflow page of a big pair,
	312	* find the end of the big pair
	313	*
	314	* This will set bpp to the buffer header of the last page of the big pair.
	315	* It will return the pageno of the overflow page following the last page
	316	* of the pair; 0 if there isn't any (i.e. big pair is the last key in the
	317	* bucket)
	318	*/
	319	u_int16_t
	320	__find_last_page(HTAB hashp, BUFHEAD *bpp)
	321	{
	322	BUFHEAD *bufp;
	323	u_int16_t *bp, pageno;
	324	int n;
	325
	326	bufp = *bpp;
	327	bp = (u_int16_t *)bufp->page;
	328	for (;;) {
	329	n = bp[0];
	330
	331	/*
	332	* This is the last page if: the tag is FULL_KEY_DATA and
	333	* either only 2 entries OVFLPAGE marker is explicit there
	334	* is freespace on the page.
	335	*/
	336	if (bp[2] == FULL_KEY_DATA &&
	337	((n == 2) \|\| (bp[n] == OVFLPAGE) \|\| (FREESPACE(bp))))
	338	break;
	339
	340	pageno = bp[n - 1];
	341	bufp = __get_buf(hashp, pageno, bufp, 0);
	342	if (!bufp)
	343	return (0); /* Need to indicate an error! */
	344	bp = (u_int16_t *)bufp->page;
	345	}
	346
	347	*bpp = bufp;
	348	if (bp[0] > 2)
	349	return (bp[3]);
	350	else
	351	return (0);
	352	}
	353
	354	/*
	355	* Return the data for the key/data pair that begins on this page at this
	356	* index (index should always be 1).
	357	*/
	358	int
	359	__big_return(HTAB hashp, BUFHEAD bufp, int ndx, DBT *val, int set_current)
	360	{
	361	BUFHEAD *save_p;
	362	u_int16_t *bp, len, off, save_addr;
	363	char *tp;
	364
	365	bp = (u_int16_t *)bufp->page;
	366	while (bp[ndx + 1] == PARTIAL_KEY) {
	367	bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
	368	if (!bufp)
	369	return (-1);
	370	bp = (u_int16_t *)bufp->page;
	371	ndx = 1;
	372	}
	373
	374	if (bp[ndx + 1] == FULL_KEY) {
	375	bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
	376	if (!bufp)
	377	return (-1);
	378	bp = (u_int16_t *)bufp->page;
	379	save_p = bufp;
	380	save_addr = save_p->addr;
	381	off = bp[1];
	382	len = 0;
	383	} else
	384	if (!FREESPACE(bp)) {
	385	/*
	386	* This is a hack. We can't distinguish between
	387	* FULL_KEY_DATA that contains complete data or
	388	* incomplete data, so we require that if the data
	389	* is complete, there is at least 1 byte of free
	390	* space left.
	391	*/
	392	off = bp[bp[0]];
	393	len = bp[1] - off;
	394	save_p = bufp;
	395	save_addr = bufp->addr;
	396	bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
	397	if (!bufp)
	398	return (-1);
	399	bp = (u_int16_t *)bufp->page;
	400	} else {
	401	/* The data is all on one page. */
	402	tp = (char *)bp;
	403	off = bp[bp[0]];
	404	val->data = (u_char *)tp + off;
	405	val->size = bp[1] - off;
	406	if (set_current) {
	407	if (bp[0] == 2) { /* No more buckets in
	408	* chain */
	409	hashp->cpage = NULL;
	410	hashp->cbucket++;
	411	hashp->cndx = 1;
	412	} else {
	413	hashp->cpage = __get_buf(hashp,
	414	bp[bp[0] - 1], bufp, 0);
	415	if (!hashp->cpage)
	416	return (-1);
	417	hashp->cndx = 1;
	418	if (!((u_int16_t *)
	419	hashp->cpage->page)[0]) {
	420	hashp->cbucket++;
	421	hashp->cpage = NULL;
	422	}
	423	}
	424	}
	425	return (0);
	426	}
	427
	428	val->size = (size_t)collect_data(hashp, bufp, (int)len, set_current);
	429	if (val->size == (size_t)-1)
	430	return (-1);
	431	if (save_p->addr != save_addr) {
	432	/* We are pretty short on buffers. */
	433	errno = EINVAL; /* OUT OF BUFFERS */
	434	return (-1);
	435	}
	436	memmove(hashp->tmp_buf, (save_p->page) + off, len);
	437	val->data = (u_char *)hashp->tmp_buf;
	438	return (0);
	439	}
	440	/*
	441	* Count how big the total datasize is by recursing through the pages. Then
	442	* allocate a buffer and copy the data as you recurse up.
	443	*/
	444	static int
	445	collect_data(HTAB hashp, BUFHEAD bufp, int len, int set)
	446	{
	447	u_int16_t *bp;
	448	char *p;
	449	BUFHEAD *xbp;
	450	u_int16_t save_addr;
	451	int mylen, totlen;
	452
	453	p = bufp->page;
	454	bp = (u_int16_t *)p;
	455	mylen = hashp->BSIZE - bp[1];
	456	save_addr = bufp->addr;
	457
	458	if (bp[2] == FULL_KEY_DATA) { /* End of Data */
	459	totlen = len + mylen;
	460	if (hashp->tmp_buf)
	461	free(hashp->tmp_buf);
	462	if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL)
	463	return (-1);
	464	if (set) {
	465	hashp->cndx = 1;
	466	if (bp[0] == 2) { /* No more buckets in chain */
	467	hashp->cpage = NULL;
	468	hashp->cbucket++;
	469	} else {
	470	hashp->cpage =
	471	__get_buf(hashp, bp[bp[0] - 1], bufp, 0);
	472	if (!hashp->cpage)
	473	return (-1);
	474	else if (!((u_int16_t *)hashp->cpage->page)[0]) {
	475	hashp->cbucket++;
	476	hashp->cpage = NULL;
	477	}
	478	}
	479	}
	480	} else {
	481	xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
	482	if (!xbp \|\| ((totlen =
	483	collect_data(hashp, xbp, len + mylen, set)) < 1))
	484	return (-1);
	485	}
	486	if (bufp->addr != save_addr) {
	487	errno = EINVAL; /* Out of buffers. */
	488	return (-1);
	489	}
	490	memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen);
	491	return (totlen);
	492	}
	493
	494	/*
	495	* Fill in the key and data for this big pair.
	496	*/
	497	int
	498	__big_keydata(HTAB hashp, BUFHEAD bufp, DBT key, DBT val, int set)
	499	{
	500	key->size = (size_t)collect_key(hashp, bufp, 0, val, set);
	501	if (key->size == (size_t)-1)
	502	return (-1);
	503	key->data = (u_char *)hashp->tmp_key;
	504	return (0);
	505	}
	506
	507	/*
	508	* Count how big the total key size is by recursing through the pages. Then
	509	* collect the data, allocate a buffer and copy the key as you recurse up.
	510	*/
	511	static int
	512	collect_key(HTAB hashp, BUFHEAD bufp, int len, DBT *val, int set)
	513	{
	514	BUFHEAD *xbp;
	515	char *p;
	516	int mylen, totlen;
	517	u_int16_t *bp, save_addr;
	518
	519	p = bufp->page;
	520	bp = (u_int16_t *)p;
	521	mylen = hashp->BSIZE - bp[1];
	522
	523	save_addr = bufp->addr;
	524	totlen = len + mylen;
	525	if (bp[2] == FULL_KEY \|\| bp[2] == FULL_KEY_DATA) { /* End of Key. */
	526	if (hashp->tmp_key != NULL)
	527	free(hashp->tmp_key);
	528	if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL)
	529	return (-1);
	530	if (__big_return(hashp, bufp, 1, val, set))
	531	return (-1);
	532	} else {
	533	xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
	534	if (!xbp \|\| ((totlen =
	535	collect_key(hashp, xbp, totlen, val, set)) < 1))
	536	return (-1);
	537	}
	538	if (bufp->addr != save_addr) {
	539	errno = EINVAL; /* MIS -- OUT OF BUFFERS */
	540	return (-1);
	541	}
	542	memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen);
	543	return (totlen);
	544	}
	545
	546	/*
	547	* Returns:
	548	* 0 => OK
	549	* -1 => error
	550	*/
	551	int
	552	__big_split(HTAB *hashp,
	553	BUFHEAD op, / Pointer to where to put keys that go in old bucket */
	554	BUFHEAD np, / Pointer to new bucket page */
	555	BUFHEAD big_keyp, / Pointer to first page containing the big key/data */
	556	int addr, /* Address of big_keyp */
	557	u_int32_t obucket, /* Old Bucket */
	558	SPLIT_RETURN *ret)
	559	{
	560	BUFHEAD bp, tmpp;
	561	DBT key, val;
	562	u_int32_t change;
	563	u_int16_t free_space, n, off, *tp;
	564
	565	bp = big_keyp;
	566
	567	/* Now figure out where the big key/data goes */
	568	if (__big_keydata(hashp, big_keyp, &key, &val, 0))
	569	return (-1);
	570	change = (__call_hash(hashp, key.data, key.size) != obucket);
	571
	572	if ( (ret->next_addr = __find_last_page(hashp, &big_keyp)) ) {
	573	if (!(ret->nextp =
	574	__get_buf(hashp, ret->next_addr, big_keyp, 0)))
	575	return (-1);
	576	} else
	577	ret->nextp = NULL;
	578
	579	/* Now make one of np/op point to the big key/data pair */
	580	#ifdef DEBUG
	581	assert(np->ovfl == NULL);
	582	#endif
	583	if (change)
	584	tmpp = np;
	585	else
	586	tmpp = op;
	587
	588	tmpp->flags \|= BUF_MOD;
	589	#ifdef DEBUG1
	590	(void)fprintf(stderr,
	591	"BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr,
	592	(tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0));
	593	#endif
	594	tmpp->ovfl = bp; /* one of op/np point to big_keyp */
	595	tp = (u_int16_t *)tmpp->page;
	596	#ifdef DEBUG
	597	assert(FREESPACE(tp) >= OVFLSIZE);
	598	#endif
	599	n = tp[0];
	600	off = OFFSET(tp);
	601	free_space = FREESPACE(tp);
	602	tp[++n] = (u_int16_t)addr;
	603	tp[++n] = OVFLPAGE;
	604	tp[0] = n;
	605	OFFSET(tp) = off;
	606	FREESPACE(tp) = free_space - OVFLSIZE;
	607
	608	/*
	609	* Finally, set the new and old return values. BIG_KEYP contains a
	610	* pointer to the last page of the big key_data pair. Make sure that
	611	* big_keyp has no following page (2 elements) or create an empty
	612	* following page.
	613	*/
	614
	615	ret->newp = np;
	616	ret->oldp = op;
	617
	618	tp = (u_int16_t *)big_keyp->page;
	619	big_keyp->flags \|= BUF_MOD;
	620	if (tp[0] > 2) {
	621	/*
	622	* There may be either one or two offsets on this page. If
	623	* there is one, then the overflow page is linked on normally
	624	* and tp[4] is OVFLPAGE. If there are two, tp[4] contains
	625	* the second offset and needs to get stuffed in after the
	626	* next overflow page is added.
	627	*/
	628	n = tp[4];
	629	free_space = FREESPACE(tp);
	630	off = OFFSET(tp);
	631	tp[0] -= 2;
	632	FREESPACE(tp) = free_space + OVFLSIZE;
	633	OFFSET(tp) = off;
	634	tmpp = __add_ovflpage(hashp, big_keyp);
	635	if (!tmpp)
	636	return (-1);
	637	tp[4] = n;
	638	} else
	639	tmpp = big_keyp;
	640
	641	if (change)
	642	ret->newp = tmpp;
	643	else
	644	ret->oldp = tmpp;
	645	return (0);
	646	}