]> git.saurik.com Git - apple/libc.git/blob - db/hash/hash_bigkey.c
91ed98a789fca397bdd4d30e23b4e06e19a5c50e
[apple/libc.git] / db / hash / hash_bigkey.c
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /*-
24 * Copyright (c) 1990, 1993, 1994
25 * The Regents of the University of California. All rights reserved.
26 *
27 * This code is derived from software contributed to Berkeley by
28 * Margo Seltzer.
29 *
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that the following conditions
32 * are met:
33 * 1. Redistributions of source code must retain the above copyright
34 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in the
37 * documentation and/or other materials provided with the distribution.
38 * 3. All advertising materials mentioning features or use of this software
39 * must display the following acknowledgement:
40 * This product includes software developed by the University of
41 * California, Berkeley and its contributors.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 */
58
59 #if defined(LIBC_SCCS) && !defined(lint)
60 static char sccsid[] = "@(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94";
61 #endif /* LIBC_SCCS and not lint */
62 #include <sys/cdefs.h>
63
64 /*
65 * PACKAGE: hash
66 * DESCRIPTION:
67 * Big key/data handling for the hashing package.
68 *
69 * ROUTINES:
70 * External
71 * __big_keydata
72 * __big_split
73 * __big_insert
74 * __big_return
75 * __big_delete
76 * __find_last_page
77 * Internal
78 * collect_key
79 * collect_data
80 */
81
82 #include <sys/param.h>
83
84 #include <errno.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88
89 #ifdef DEBUG
90 #include <assert.h>
91 #endif
92
93 #include <db.h>
94 #include "hash.h"
95 #include "page.h"
96 #include "extern.h"
97
98 static int collect_key(HTAB *, BUFHEAD *, int, DBT *, int);
99 static int collect_data(HTAB *, BUFHEAD *, int, int);
100
101 /*
102 * Big_insert
103 *
104 * You need to do an insert and the key/data pair is too big
105 *
106 * Returns:
107 * 0 ==> OK
108 *-1 ==> ERROR
109 */
110 extern int
111 __big_insert(hashp, bufp, key, val)
112 HTAB *hashp;
113 BUFHEAD *bufp;
114 const DBT *key, *val;
115 {
116 u_int16_t *p;
117 int key_size, n, val_size;
118 u_int16_t space, move_bytes, off;
119 char *cp, *key_data, *val_data;
120
121 cp = bufp->page; /* Character pointer of p. */
122 p = (u_int16_t *)cp;
123
124 key_data = (char *)key->data;
125 key_size = key->size;
126 val_data = (char *)val->data;
127 val_size = val->size;
128
129 /* First move the Key */
130 for (space = FREESPACE(p) - BIGOVERHEAD; key_size;
131 space = FREESPACE(p) - BIGOVERHEAD) {
132 move_bytes = MIN(space, key_size);
133 off = OFFSET(p) - move_bytes;
134 memmove(cp + off, key_data, move_bytes);
135 key_size -= move_bytes;
136 key_data += move_bytes;
137 n = p[0];
138 p[++n] = off;
139 p[0] = ++n;
140 FREESPACE(p) = off - PAGE_META(n);
141 OFFSET(p) = off;
142 p[n] = PARTIAL_KEY;
143 bufp = __add_ovflpage(hashp, bufp);
144 if (!bufp)
145 return (-1);
146 n = p[0];
147 if (!key_size) {
148 if (FREESPACE(p)) {
149 move_bytes = MIN(FREESPACE(p), val_size);
150 off = OFFSET(p) - move_bytes;
151 p[n] = off;
152 memmove(cp + off, val_data, move_bytes);
153 val_data += move_bytes;
154 val_size -= move_bytes;
155 p[n - 2] = FULL_KEY_DATA;
156 FREESPACE(p) = FREESPACE(p) - move_bytes;
157 OFFSET(p) = off;
158 } else
159 p[n - 2] = FULL_KEY;
160 }
161 p = (u_int16_t *)bufp->page;
162 cp = bufp->page;
163 bufp->flags |= BUF_MOD;
164 }
165
166 /* Now move the data */
167 for (space = FREESPACE(p) - BIGOVERHEAD; val_size;
168 space = FREESPACE(p) - BIGOVERHEAD) {
169 move_bytes = MIN(space, val_size);
170 /*
171 * Here's the hack to make sure that if the data ends on the
172 * same page as the key ends, FREESPACE is at least one.
173 */
174 if (space == val_size && val_size == val->size)
175 move_bytes--;
176 off = OFFSET(p) - move_bytes;
177 memmove(cp + off, val_data, move_bytes);
178 val_size -= move_bytes;
179 val_data += move_bytes;
180 n = p[0];
181 p[++n] = off;
182 p[0] = ++n;
183 FREESPACE(p) = off - PAGE_META(n);
184 OFFSET(p) = off;
185 if (val_size) {
186 p[n] = FULL_KEY;
187 bufp = __add_ovflpage(hashp, bufp);
188 if (!bufp)
189 return (-1);
190 cp = bufp->page;
191 p = (u_int16_t *)cp;
192 } else
193 p[n] = FULL_KEY_DATA;
194 bufp->flags |= BUF_MOD;
195 }
196 return (0);
197 }
198
199 /*
200 * Called when bufp's page contains a partial key (index should be 1)
201 *
202 * All pages in the big key/data pair except bufp are freed. We cannot
203 * free bufp because the page pointing to it is lost and we can't get rid
204 * of its pointer.
205 *
206 * Returns:
207 * 0 => OK
208 *-1 => ERROR
209 */
210 extern int
211 __big_delete(hashp, bufp)
212 HTAB *hashp;
213 BUFHEAD *bufp;
214 {
215 BUFHEAD *last_bfp, *rbufp;
216 u_int16_t *bp, pageno;
217 int key_done, n;
218
219 rbufp = bufp;
220 last_bfp = NULL;
221 bp = (u_int16_t *)bufp->page;
222 pageno = 0;
223 key_done = 0;
224
225 while (!key_done || (bp[2] != FULL_KEY_DATA)) {
226 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA)
227 key_done = 1;
228
229 /*
230 * If there is freespace left on a FULL_KEY_DATA page, then
231 * the data is short and fits entirely on this page, and this
232 * is the last page.
233 */
234 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp))
235 break;
236 pageno = bp[bp[0] - 1];
237 rbufp->flags |= BUF_MOD;
238 rbufp = __get_buf(hashp, pageno, rbufp, 0);
239 if (last_bfp)
240 __free_ovflpage(hashp, last_bfp);
241 last_bfp = rbufp;
242 if (!rbufp)
243 return (-1); /* Error. */
244 bp = (u_int16_t *)rbufp->page;
245 }
246
247 /*
248 * If we get here then rbufp points to the last page of the big
249 * key/data pair. Bufp points to the first one -- it should now be
250 * empty pointing to the next page after this pair. Can't free it
251 * because we don't have the page pointing to it.
252 */
253
254 /* This is information from the last page of the pair. */
255 n = bp[0];
256 pageno = bp[n - 1];
257
258 /* Now, bp is the first page of the pair. */
259 bp = (u_int16_t *)bufp->page;
260 if (n > 2) {
261 /* There is an overflow page. */
262 bp[1] = pageno;
263 bp[2] = OVFLPAGE;
264 bufp->ovfl = rbufp->ovfl;
265 } else
266 /* This is the last page. */
267 bufp->ovfl = NULL;
268 n -= 2;
269 bp[0] = n;
270 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n);
271 OFFSET(bp) = hashp->BSIZE - 1;
272
273 bufp->flags |= BUF_MOD;
274 if (rbufp)
275 __free_ovflpage(hashp, rbufp);
276 if (last_bfp != rbufp)
277 __free_ovflpage(hashp, last_bfp);
278
279 hashp->NKEYS--;
280 return (0);
281 }
282 /*
283 * Returns:
284 * 0 = key not found
285 * -1 = get next overflow page
286 * -2 means key not found and this is big key/data
287 * -3 error
288 */
289 extern int
290 __find_bigpair(hashp, bufp, ndx, key, size)
291 HTAB *hashp;
292 BUFHEAD *bufp;
293 int ndx;
294 char *key;
295 int size;
296 {
297 u_int16_t *bp;
298 char *p;
299 int ksize;
300 u_int16_t bytes;
301 char *kkey;
302
303 bp = (u_int16_t *)bufp->page;
304 p = bufp->page;
305 ksize = size;
306 kkey = key;
307
308 for (bytes = hashp->BSIZE - bp[ndx];
309 bytes <= size && bp[ndx + 1] == PARTIAL_KEY;
310 bytes = hashp->BSIZE - bp[ndx]) {
311 if (memcmp(p + bp[ndx], kkey, bytes))
312 return (-2);
313 kkey += bytes;
314 ksize -= bytes;
315 bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0);
316 if (!bufp)
317 return (-3);
318 p = bufp->page;
319 bp = (u_int16_t *)p;
320 ndx = 1;
321 }
322
323 if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) {
324 #ifdef HASH_STATISTICS
325 ++hash_collisions;
326 #endif
327 return (-2);
328 } else
329 return (ndx);
330 }
331
332 /*
333 * Given the buffer pointer of the first overflow page of a big pair,
334 * find the end of the big pair
335 *
336 * This will set bpp to the buffer header of the last page of the big pair.
337 * It will return the pageno of the overflow page following the last page
338 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the
339 * bucket)
340 */
341 extern u_int16_t
342 __find_last_page(hashp, bpp)
343 HTAB *hashp;
344 BUFHEAD **bpp;
345 {
346 BUFHEAD *bufp;
347 u_int16_t *bp, pageno;
348 int n;
349
350 bufp = *bpp;
351 bp = (u_int16_t *)bufp->page;
352 for (;;) {
353 n = bp[0];
354
355 /*
356 * This is the last page if: the tag is FULL_KEY_DATA and
357 * either only 2 entries OVFLPAGE marker is explicit there
358 * is freespace on the page.
359 */
360 if (bp[2] == FULL_KEY_DATA &&
361 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp))))
362 break;
363
364 pageno = bp[n - 1];
365 bufp = __get_buf(hashp, pageno, bufp, 0);
366 if (!bufp)
367 return (0); /* Need to indicate an error! */
368 bp = (u_int16_t *)bufp->page;
369 }
370
371 *bpp = bufp;
372 if (bp[0] > 2)
373 return (bp[3]);
374 else
375 return (0);
376 }
377
378 /*
379 * Return the data for the key/data pair that begins on this page at this
380 * index (index should always be 1).
381 */
382 extern int
383 __big_return(hashp, bufp, ndx, val, set_current)
384 HTAB *hashp;
385 BUFHEAD *bufp;
386 int ndx;
387 DBT *val;
388 int set_current;
389 {
390 BUFHEAD *save_p;
391 u_int16_t *bp, len, off, save_addr;
392 char *tp;
393
394 bp = (u_int16_t *)bufp->page;
395 while (bp[ndx + 1] == PARTIAL_KEY) {
396 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
397 if (!bufp)
398 return (-1);
399 bp = (u_int16_t *)bufp->page;
400 ndx = 1;
401 }
402
403 if (bp[ndx + 1] == FULL_KEY) {
404 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
405 if (!bufp)
406 return (-1);
407 bp = (u_int16_t *)bufp->page;
408 save_p = bufp;
409 save_addr = save_p->addr;
410 off = bp[1];
411 len = 0;
412 } else
413 if (!FREESPACE(bp)) {
414 /*
415 * This is a hack. We can't distinguish between
416 * FULL_KEY_DATA that contains complete data or
417 * incomplete data, so we require that if the data
418 * is complete, there is at least 1 byte of free
419 * space left.
420 */
421 off = bp[bp[0]];
422 len = bp[1] - off;
423 save_p = bufp;
424 save_addr = bufp->addr;
425 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
426 if (!bufp)
427 return (-1);
428 bp = (u_int16_t *)bufp->page;
429 } else {
430 /* The data is all on one page. */
431 tp = (char *)bp;
432 off = bp[bp[0]];
433 val->data = (u_char *)tp + off;
434 val->size = bp[1] - off;
435 if (set_current) {
436 if (bp[0] == 2) { /* No more buckets in
437 * chain */
438 hashp->cpage = NULL;
439 hashp->cbucket++;
440 hashp->cndx = 1;
441 } else {
442 hashp->cpage = __get_buf(hashp,
443 bp[bp[0] - 1], bufp, 0);
444 if (!hashp->cpage)
445 return (-1);
446 hashp->cndx = 1;
447 if (!((u_int16_t *)
448 hashp->cpage->page)[0]) {
449 hashp->cbucket++;
450 hashp->cpage = NULL;
451 }
452 }
453 }
454 return (0);
455 }
456
457 val->size = collect_data(hashp, bufp, (int)len, set_current);
458 if (val->size == -1)
459 return (-1);
460 if (save_p->addr != save_addr) {
461 /* We are pretty short on buffers. */
462 errno = EINVAL; /* OUT OF BUFFERS */
463 return (-1);
464 }
465 memmove(hashp->tmp_buf, (save_p->page) + off, len);
466 val->data = (u_char *)hashp->tmp_buf;
467 return (0);
468 }
469 /*
470 * Count how big the total datasize is by recursing through the pages. Then
471 * allocate a buffer and copy the data as you recurse up.
472 */
473 static int
474 collect_data(hashp, bufp, len, set)
475 HTAB *hashp;
476 BUFHEAD *bufp;
477 int len, set;
478 {
479 u_int16_t *bp;
480 char *p;
481 BUFHEAD *xbp;
482 u_int16_t save_addr;
483 int mylen, totlen;
484
485 p = bufp->page;
486 bp = (u_int16_t *)p;
487 mylen = hashp->BSIZE - bp[1];
488 save_addr = bufp->addr;
489
490 if (bp[2] == FULL_KEY_DATA) { /* End of Data */
491 totlen = len + mylen;
492 if (hashp->tmp_buf)
493 free(hashp->tmp_buf);
494 if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL)
495 return (-1);
496 if (set) {
497 hashp->cndx = 1;
498 if (bp[0] == 2) { /* No more buckets in chain */
499 hashp->cpage = NULL;
500 hashp->cbucket++;
501 } else {
502 hashp->cpage =
503 __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
504 if (!hashp->cpage)
505 return (-1);
506 else if (!((u_int16_t *)hashp->cpage->page)[0]) {
507 hashp->cbucket++;
508 hashp->cpage = NULL;
509 }
510 }
511 }
512 } else {
513 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
514 if (!xbp || ((totlen =
515 collect_data(hashp, xbp, len + mylen, set)) < 1))
516 return (-1);
517 }
518 if (bufp->addr != save_addr) {
519 errno = EINVAL; /* Out of buffers. */
520 return (-1);
521 }
522 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen);
523 return (totlen);
524 }
525
526 /*
527 * Fill in the key and data for this big pair.
528 */
529 extern int
530 __big_keydata(hashp, bufp, key, val, set)
531 HTAB *hashp;
532 BUFHEAD *bufp;
533 DBT *key, *val;
534 int set;
535 {
536 key->size = collect_key(hashp, bufp, 0, val, set);
537 if (key->size == -1)
538 return (-1);
539 key->data = (u_char *)hashp->tmp_key;
540 return (0);
541 }
542
543 /*
544 * Count how big the total key size is by recursing through the pages. Then
545 * collect the data, allocate a buffer and copy the key as you recurse up.
546 */
547 static int
548 collect_key(hashp, bufp, len, val, set)
549 HTAB *hashp;
550 BUFHEAD *bufp;
551 int len;
552 DBT *val;
553 int set;
554 {
555 BUFHEAD *xbp;
556 char *p;
557 int mylen, totlen;
558 u_int16_t *bp, save_addr;
559
560 p = bufp->page;
561 bp = (u_int16_t *)p;
562 mylen = hashp->BSIZE - bp[1];
563
564 save_addr = bufp->addr;
565 totlen = len + mylen;
566 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */
567 if (hashp->tmp_key != NULL)
568 free(hashp->tmp_key);
569 if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL)
570 return (-1);
571 if (__big_return(hashp, bufp, 1, val, set))
572 return (-1);
573 } else {
574 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
575 if (!xbp || ((totlen =
576 collect_key(hashp, xbp, totlen, val, set)) < 1))
577 return (-1);
578 }
579 if (bufp->addr != save_addr) {
580 errno = EINVAL; /* MIS -- OUT OF BUFFERS */
581 return (-1);
582 }
583 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen);
584 return (totlen);
585 }
586
587 /*
588 * Returns:
589 * 0 => OK
590 * -1 => error
591 */
592 extern int
593 __big_split(hashp, op, np, big_keyp, addr, obucket, ret)
594 HTAB *hashp;
595 BUFHEAD *op; /* Pointer to where to put keys that go in old bucket */
596 BUFHEAD *np; /* Pointer to new bucket page */
597 /* Pointer to first page containing the big key/data */
598 BUFHEAD *big_keyp;
599 int addr; /* Address of big_keyp */
600 u_int32_t obucket;/* Old Bucket */
601 SPLIT_RETURN *ret;
602 {
603 BUFHEAD *tmpp;
604 u_int16_t *tp;
605 BUFHEAD *bp;
606 DBT key, val;
607 u_int32_t change;
608 u_int16_t free_space, n, off;
609
610 bp = big_keyp;
611
612 /* Now figure out where the big key/data goes */
613 if (__big_keydata(hashp, big_keyp, &key, &val, 0))
614 return (-1);
615 change = (__call_hash(hashp, key.data, key.size) != obucket);
616
617 if ( (ret->next_addr = __find_last_page(hashp, &big_keyp)) ) {
618 if (!(ret->nextp =
619 __get_buf(hashp, ret->next_addr, big_keyp, 0)))
620 return (-1);;
621 } else
622 ret->nextp = NULL;
623
624 /* Now make one of np/op point to the big key/data pair */
625 #ifdef DEBUG
626 assert(np->ovfl == NULL);
627 #endif
628 if (change)
629 tmpp = np;
630 else
631 tmpp = op;
632
633 tmpp->flags |= BUF_MOD;
634 #ifdef DEBUG1
635 (void)fprintf(stderr,
636 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr,
637 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0));
638 #endif
639 tmpp->ovfl = bp; /* one of op/np point to big_keyp */
640 tp = (u_int16_t *)tmpp->page;
641 #ifdef DEBUG
642 assert(FREESPACE(tp) >= OVFLSIZE);
643 #endif
644 n = tp[0];
645 off = OFFSET(tp);
646 free_space = FREESPACE(tp);
647 tp[++n] = (u_int16_t)addr;
648 tp[++n] = OVFLPAGE;
649 tp[0] = n;
650 OFFSET(tp) = off;
651 FREESPACE(tp) = free_space - OVFLSIZE;
652
653 /*
654 * Finally, set the new and old return values. BIG_KEYP contains a
655 * pointer to the last page of the big key_data pair. Make sure that
656 * big_keyp has no following page (2 elements) or create an empty
657 * following page.
658 */
659
660 ret->newp = np;
661 ret->oldp = op;
662
663 tp = (u_int16_t *)big_keyp->page;
664 big_keyp->flags |= BUF_MOD;
665 if (tp[0] > 2) {
666 /*
667 * There may be either one or two offsets on this page. If
668 * there is one, then the overflow page is linked on normally
669 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains
670 * the second offset and needs to get stuffed in after the
671 * next overflow page is added.
672 */
673 n = tp[4];
674 free_space = FREESPACE(tp);
675 off = OFFSET(tp);
676 tp[0] -= 2;
677 FREESPACE(tp) = free_space + OVFLSIZE;
678 OFFSET(tp) = off;
679 tmpp = __add_ovflpage(hashp, big_keyp);
680 if (!tmpp)
681 return (-1);
682 tp[4] = n;
683 } else
684 tmpp = big_keyp;
685
686 if (change)
687 ret->newp = tmpp;
688 else
689 ret->oldp = tmpp;
690 return (0);
691 }