]>
Commit | Line | Data |
---|---|---|
1 | /*- | |
2 | * Copyright (c) 1990, 1993, 1994 | |
3 | * The Regents of the University of California. All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Margo Seltzer. | |
7 | * | |
8 | * Redistribution and use in source and binary forms, with or without | |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * 1. Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * 2. Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
16 | * 4. Neither the name of the University nor the names of its contributors | |
17 | * may be used to endorse or promote products derived from this software | |
18 | * without specific prior written permission. | |
19 | * | |
20 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
21 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
23 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
26 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
27 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
28 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
29 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
30 | * SUCH DAMAGE. | |
31 | */ | |
32 | ||
33 | #if defined(LIBC_SCCS) && !defined(lint) | |
34 | static char sccsid[] = "@(#)hash_page.c 8.7 (Berkeley) 8/16/94"; | |
35 | #endif /* LIBC_SCCS and not lint */ | |
36 | #include <sys/cdefs.h> | |
37 | __FBSDID("$FreeBSD: src/lib/libc/db/hash/hash_page.c,v 1.16 2009/03/28 06:30:43 delphij Exp $"); | |
38 | ||
39 | /* | |
40 | * PACKAGE: hashing | |
41 | * | |
42 | * DESCRIPTION: | |
43 | * Page manipulation for hashing package. | |
44 | * | |
45 | * ROUTINES: | |
46 | * | |
47 | * External | |
48 | * __get_page | |
49 | * __add_ovflpage | |
50 | * Internal | |
51 | * overflow_page | |
52 | * open_temp | |
53 | */ | |
54 | ||
55 | #include "namespace.h" | |
56 | #include <sys/param.h> | |
57 | ||
58 | #include <errno.h> | |
59 | #include <fcntl.h> | |
60 | #include <signal.h> | |
61 | #include <stdio.h> | |
62 | #include <stdlib.h> | |
63 | #include <string.h> | |
64 | #include <unistd.h> | |
65 | #ifdef DEBUG | |
66 | #include <assert.h> | |
67 | #endif | |
68 | #include "un-namespace.h" | |
69 | ||
70 | #include <db.h> | |
71 | #include "hash.h" | |
72 | #include "page.h" | |
73 | #include "hash_extern.h" | |
74 | ||
75 | static u_int32_t *fetch_bitmap(HTAB *, int); | |
76 | static u_int32_t first_free(u_int32_t); | |
77 | static int open_temp(HTAB *); | |
78 | static u_int16_t overflow_page(HTAB *); | |
79 | static void putpair(char *, const DBT *, const DBT *); | |
80 | static void squeeze_key(u_int16_t *, const DBT *, const DBT *); | |
81 | static int ugly_split(HTAB *, u_int32_t, BUFHEAD *, BUFHEAD *, int, int); | |
82 | ||
83 | #define PAGE_INIT(P) { \ | |
84 | ((u_int16_t *)(P))[0] = 0; \ | |
85 | ((u_int16_t *)(P))[1] = hashp->BSIZE - 3 * sizeof(u_int16_t); \ | |
86 | ((u_int16_t *)(P))[2] = hashp->BSIZE; \ | |
87 | } | |
88 | ||
89 | /* | |
90 | * This is called AFTER we have verified that there is room on the page for | |
91 | * the pair (PAIRFITS has returned true) so we go right ahead and start moving | |
92 | * stuff on. | |
93 | */ | |
94 | static void | |
95 | putpair(char *p, const DBT *key, const DBT *val) | |
96 | { | |
97 | u_int16_t *bp, n, off; | |
98 | ||
99 | bp = (u_int16_t *)p; | |
100 | ||
101 | /* Enter the key first. */ | |
102 | n = bp[0]; | |
103 | ||
104 | off = OFFSET(bp) - key->size; | |
105 | memmove(p + off, key->data, key->size); | |
106 | bp[++n] = off; | |
107 | ||
108 | /* Now the data. */ | |
109 | off -= val->size; | |
110 | memmove(p + off, val->data, val->size); | |
111 | bp[++n] = off; | |
112 | ||
113 | /* Adjust page info. */ | |
114 | bp[0] = n; | |
115 | bp[n + 1] = off - ((n + 3) * sizeof(u_int16_t)); | |
116 | bp[n + 2] = off; | |
117 | } | |
118 | ||
119 | /* | |
120 | * Returns: | |
121 | * 0 OK | |
122 | * -1 error | |
123 | */ | |
124 | int | |
125 | __delpair(HTAB *hashp, BUFHEAD *bufp, int ndx) | |
126 | { | |
127 | u_int16_t *bp, newoff, pairlen; | |
128 | int n; | |
129 | ||
130 | bp = (u_int16_t *)bufp->page; | |
131 | n = bp[0]; | |
132 | ||
133 | if (bp[ndx + 1] < REAL_KEY) | |
134 | return (__big_delete(hashp, bufp)); | |
135 | if (ndx != 1) | |
136 | newoff = bp[ndx - 1]; | |
137 | else | |
138 | newoff = hashp->BSIZE; | |
139 | pairlen = newoff - bp[ndx + 1]; | |
140 | ||
141 | if (ndx != (n - 1)) { | |
142 | /* Hard Case -- need to shuffle keys */ | |
143 | int i; | |
144 | char *src = bufp->page + (int)OFFSET(bp); | |
145 | char *dst = src + (int)pairlen; | |
146 | memmove(dst, src, bp[ndx + 1] - OFFSET(bp)); | |
147 | ||
148 | /* Now adjust the pointers */ | |
149 | for (i = ndx + 2; i <= n; i += 2) { | |
150 | if (bp[i + 1] == OVFLPAGE) { | |
151 | bp[i - 2] = bp[i]; | |
152 | bp[i - 1] = bp[i + 1]; | |
153 | } else { | |
154 | bp[i - 2] = bp[i] + pairlen; | |
155 | bp[i - 1] = bp[i + 1] + pairlen; | |
156 | } | |
157 | } | |
158 | if (ndx == hashp->cndx) { | |
159 | /* | |
160 | * We just removed pair we were "pointing" to. | |
161 | * By moving back the cndx we ensure subsequent | |
162 | * hash_seq() calls won't skip over any entries. | |
163 | */ | |
164 | hashp->cndx -= 2; | |
165 | } | |
166 | } | |
167 | /* Finally adjust the page data */ | |
168 | bp[n] = OFFSET(bp) + pairlen; | |
169 | bp[n - 1] = bp[n + 1] + pairlen + 2 * sizeof(u_int16_t); | |
170 | bp[0] = n - 2; | |
171 | hashp->NKEYS--; | |
172 | ||
173 | bufp->flags |= BUF_MOD; | |
174 | return (0); | |
175 | } | |
176 | /* | |
177 | * Returns: | |
178 | * 0 ==> OK | |
179 | * -1 ==> Error | |
180 | */ | |
181 | int | |
182 | __split_page(HTAB *hashp, u_int32_t obucket, u_int32_t nbucket) | |
183 | { | |
184 | BUFHEAD *new_bufp, *old_bufp; | |
185 | u_int16_t *ino; | |
186 | char *np; | |
187 | DBT key, val; | |
188 | int n, ndx, retval; | |
189 | u_int16_t copyto, diff, off, moved; | |
190 | char *op; | |
191 | ||
192 | copyto = (u_int16_t)hashp->BSIZE; | |
193 | off = (u_int16_t)hashp->BSIZE; | |
194 | old_bufp = __get_buf(hashp, obucket, NULL, 0); | |
195 | if (old_bufp == NULL) | |
196 | return (-1); | |
197 | new_bufp = __get_buf(hashp, nbucket, NULL, 0); | |
198 | if (new_bufp == NULL) | |
199 | return (-1); | |
200 | ||
201 | old_bufp->flags |= (BUF_MOD | BUF_PIN); | |
202 | new_bufp->flags |= (BUF_MOD | BUF_PIN); | |
203 | ||
204 | ino = (u_int16_t *)(op = old_bufp->page); | |
205 | np = new_bufp->page; | |
206 | ||
207 | moved = 0; | |
208 | ||
209 | for (n = 1, ndx = 1; n < ino[0]; n += 2) { | |
210 | if (ino[n + 1] < REAL_KEY) { | |
211 | retval = ugly_split(hashp, obucket, old_bufp, new_bufp, | |
212 | (int)copyto, (int)moved); | |
213 | old_bufp->flags &= ~BUF_PIN; | |
214 | new_bufp->flags &= ~BUF_PIN; | |
215 | return (retval); | |
216 | ||
217 | } | |
218 | key.data = (u_char *)op + ino[n]; | |
219 | key.size = off - ino[n]; | |
220 | ||
221 | if (__call_hash(hashp, key.data, key.size) == obucket) { | |
222 | /* Don't switch page */ | |
223 | diff = copyto - off; | |
224 | if (diff) { | |
225 | copyto = ino[n + 1] + diff; | |
226 | memmove(op + copyto, op + ino[n + 1], | |
227 | off - ino[n + 1]); | |
228 | ino[ndx] = copyto + ino[n] - ino[n + 1]; | |
229 | ino[ndx + 1] = copyto; | |
230 | } else | |
231 | copyto = ino[n + 1]; | |
232 | ndx += 2; | |
233 | } else { | |
234 | /* Switch page */ | |
235 | val.data = (u_char *)op + ino[n + 1]; | |
236 | val.size = ino[n] - ino[n + 1]; | |
237 | putpair(np, &key, &val); | |
238 | moved += 2; | |
239 | } | |
240 | ||
241 | off = ino[n + 1]; | |
242 | } | |
243 | ||
244 | /* Now clean up the page */ | |
245 | ino[0] -= moved; | |
246 | FREESPACE(ino) = copyto - sizeof(u_int16_t) * (ino[0] + 3); | |
247 | OFFSET(ino) = copyto; | |
248 | ||
249 | #ifdef DEBUG3 | |
250 | (void)fprintf(stderr, "split %d/%d\n", | |
251 | ((u_int16_t *)np)[0] / 2, | |
252 | ((u_int16_t *)op)[0] / 2); | |
253 | #endif | |
254 | /* unpin both pages */ | |
255 | old_bufp->flags &= ~BUF_PIN; | |
256 | new_bufp->flags &= ~BUF_PIN; | |
257 | return (0); | |
258 | } | |
259 | ||
260 | /* | |
261 | * Called when we encounter an overflow or big key/data page during split | |
262 | * handling. This is special cased since we have to begin checking whether | |
263 | * the key/data pairs fit on their respective pages and because we may need | |
264 | * overflow pages for both the old and new pages. | |
265 | * | |
266 | * The first page might be a page with regular key/data pairs in which case | |
267 | * we have a regular overflow condition and just need to go on to the next | |
268 | * page or it might be a big key/data pair in which case we need to fix the | |
269 | * big key/data pair. | |
270 | * | |
271 | * Returns: | |
272 | * 0 ==> success | |
273 | * -1 ==> failure | |
274 | */ | |
275 | static int | |
276 | ugly_split(HTAB *hashp, | |
277 | u_int32_t obucket, /* Same as __split_page. */ | |
278 | BUFHEAD *old_bufp, | |
279 | BUFHEAD *new_bufp, | |
280 | int copyto, /* First byte on page which contains key/data values. */ | |
281 | int moved) /* Number of pairs moved to new page. */ | |
282 | { | |
283 | BUFHEAD *bufp; /* Buffer header for ino */ | |
284 | u_int16_t *ino; /* Page keys come off of */ | |
285 | u_int16_t *np; /* New page */ | |
286 | u_int16_t *op; /* Page keys go on to if they aren't moving */ | |
287 | ||
288 | BUFHEAD *last_bfp; /* Last buf header OVFL needing to be freed */ | |
289 | DBT key, val; | |
290 | SPLIT_RETURN ret; | |
291 | u_int16_t n, off, ov_addr, scopyto; | |
292 | char *cino; /* Character value of ino */ | |
293 | ||
294 | bufp = old_bufp; | |
295 | ino = (u_int16_t *)old_bufp->page; | |
296 | np = (u_int16_t *)new_bufp->page; | |
297 | op = (u_int16_t *)old_bufp->page; | |
298 | last_bfp = NULL; | |
299 | scopyto = (u_int16_t)copyto; /* ANSI */ | |
300 | ||
301 | n = ino[0] - 1; | |
302 | while (n < ino[0]) { | |
303 | if (ino[2] < REAL_KEY && ino[2] != OVFLPAGE) { | |
304 | if (__big_split(hashp, old_bufp, | |
305 | new_bufp, bufp, bufp->addr, obucket, &ret)) | |
306 | return (-1); | |
307 | old_bufp = ret.oldp; | |
308 | if (!old_bufp) | |
309 | return (-1); | |
310 | op = (u_int16_t *)old_bufp->page; | |
311 | new_bufp = ret.newp; | |
312 | if (!new_bufp) | |
313 | return (-1); | |
314 | np = (u_int16_t *)new_bufp->page; | |
315 | bufp = ret.nextp; | |
316 | if (!bufp) | |
317 | return (0); | |
318 | cino = (char *)bufp->page; | |
319 | ino = (u_int16_t *)cino; | |
320 | last_bfp = ret.nextp; | |
321 | } else if (ino[n + 1] == OVFLPAGE) { | |
322 | ov_addr = ino[n]; | |
323 | /* | |
324 | * Fix up the old page -- the extra 2 are the fields | |
325 | * which contained the overflow information. | |
326 | */ | |
327 | ino[0] -= (moved + 2); | |
328 | FREESPACE(ino) = | |
329 | scopyto - sizeof(u_int16_t) * (ino[0] + 3); | |
330 | OFFSET(ino) = scopyto; | |
331 | ||
332 | bufp = __get_buf(hashp, ov_addr, bufp, 0); | |
333 | if (!bufp) | |
334 | return (-1); | |
335 | ||
336 | ino = (u_int16_t *)bufp->page; | |
337 | n = 1; | |
338 | scopyto = hashp->BSIZE; | |
339 | moved = 0; | |
340 | ||
341 | if (last_bfp) | |
342 | __free_ovflpage(hashp, last_bfp); | |
343 | last_bfp = bufp; | |
344 | } | |
345 | /* Move regular sized pairs of there are any */ | |
346 | off = hashp->BSIZE; | |
347 | for (n = 1; (n < ino[0]) && (ino[n + 1] >= REAL_KEY); n += 2) { | |
348 | cino = (char *)ino; | |
349 | key.data = (u_char *)cino + ino[n]; | |
350 | key.size = off - ino[n]; | |
351 | val.data = (u_char *)cino + ino[n + 1]; | |
352 | val.size = ino[n] - ino[n + 1]; | |
353 | off = ino[n + 1]; | |
354 | ||
355 | if (__call_hash(hashp, key.data, key.size) == obucket) { | |
356 | /* Keep on old page */ | |
357 | if (PAIRFITS(op, (&key), (&val))) | |
358 | putpair((char *)op, &key, &val); | |
359 | else { | |
360 | old_bufp = | |
361 | __add_ovflpage(hashp, old_bufp); | |
362 | if (!old_bufp) | |
363 | return (-1); | |
364 | op = (u_int16_t *)old_bufp->page; | |
365 | putpair((char *)op, &key, &val); | |
366 | } | |
367 | old_bufp->flags |= BUF_MOD; | |
368 | } else { | |
369 | /* Move to new page */ | |
370 | if (PAIRFITS(np, (&key), (&val))) | |
371 | putpair((char *)np, &key, &val); | |
372 | else { | |
373 | new_bufp = | |
374 | __add_ovflpage(hashp, new_bufp); | |
375 | if (!new_bufp) | |
376 | return (-1); | |
377 | np = (u_int16_t *)new_bufp->page; | |
378 | putpair((char *)np, &key, &val); | |
379 | } | |
380 | new_bufp->flags |= BUF_MOD; | |
381 | } | |
382 | } | |
383 | } | |
384 | if (last_bfp) | |
385 | __free_ovflpage(hashp, last_bfp); | |
386 | return (0); | |
387 | } | |
388 | ||
389 | /* | |
390 | * Add the given pair to the page | |
391 | * | |
392 | * Returns: | |
393 | * 0 ==> OK | |
394 | * 1 ==> failure | |
395 | */ | |
396 | int | |
397 | __addel(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val) | |
398 | { | |
399 | u_int16_t *bp, *sop; | |
400 | int do_expand; | |
401 | ||
402 | bp = (u_int16_t *)bufp->page; | |
403 | do_expand = 0; | |
404 | while (bp[0] && (bp[2] < REAL_KEY || bp[bp[0]] < REAL_KEY)) | |
405 | /* Exception case */ | |
406 | if (bp[2] == FULL_KEY_DATA && bp[0] == 2) | |
407 | /* This is the last page of a big key/data pair | |
408 | and we need to add another page */ | |
409 | break; | |
410 | else if (bp[2] < REAL_KEY && bp[bp[0]] != OVFLPAGE) { | |
411 | bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); | |
412 | if (!bufp) | |
413 | return (-1); | |
414 | bp = (u_int16_t *)bufp->page; | |
415 | } else if (bp[bp[0]] != OVFLPAGE) { | |
416 | /* Short key/data pairs, no more pages */ | |
417 | break; | |
418 | } else { | |
419 | /* Try to squeeze key on this page */ | |
420 | if (bp[2] >= REAL_KEY && | |
421 | FREESPACE(bp) >= PAIRSIZE(key, val)) { | |
422 | squeeze_key(bp, key, val); | |
423 | goto stats; | |
424 | } else { | |
425 | bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); | |
426 | if (!bufp) | |
427 | return (-1); | |
428 | bp = (u_int16_t *)bufp->page; | |
429 | } | |
430 | } | |
431 | ||
432 | if (PAIRFITS(bp, key, val)) | |
433 | putpair(bufp->page, key, val); | |
434 | else { | |
435 | do_expand = 1; | |
436 | bufp = __add_ovflpage(hashp, bufp); | |
437 | if (!bufp) | |
438 | return (-1); | |
439 | sop = (u_int16_t *)bufp->page; | |
440 | ||
441 | if (PAIRFITS(sop, key, val)) | |
442 | putpair((char *)sop, key, val); | |
443 | else | |
444 | if (__big_insert(hashp, bufp, key, val)) | |
445 | return (-1); | |
446 | } | |
447 | stats: | |
448 | bufp->flags |= BUF_MOD; | |
449 | /* | |
450 | * If the average number of keys per bucket exceeds the fill factor, | |
451 | * expand the table. | |
452 | */ | |
453 | hashp->NKEYS++; | |
454 | if (do_expand || | |
455 | (hashp->NKEYS / (hashp->MAX_BUCKET + 1) > hashp->FFACTOR)) | |
456 | return (__expand_table(hashp)); | |
457 | return (0); | |
458 | } | |
459 | ||
460 | /* | |
461 | * | |
462 | * Returns: | |
463 | * pointer on success | |
464 | * NULL on error | |
465 | */ | |
466 | BUFHEAD * | |
467 | __add_ovflpage(HTAB *hashp, BUFHEAD *bufp) | |
468 | { | |
469 | u_int16_t *sp, ndx, ovfl_num; | |
470 | #ifdef DEBUG1 | |
471 | int tmp1, tmp2; | |
472 | #endif | |
473 | sp = (u_int16_t *)bufp->page; | |
474 | ||
475 | /* Check if we are dynamically determining the fill factor */ | |
476 | if (hashp->FFACTOR == DEF_FFACTOR) { | |
477 | hashp->FFACTOR = sp[0] >> 1; | |
478 | if (hashp->FFACTOR < MIN_FFACTOR) | |
479 | hashp->FFACTOR = MIN_FFACTOR; | |
480 | } | |
481 | bufp->flags |= BUF_MOD; | |
482 | ovfl_num = overflow_page(hashp); | |
483 | #ifdef DEBUG1 | |
484 | tmp1 = bufp->addr; | |
485 | tmp2 = bufp->ovfl ? bufp->ovfl->addr : 0; | |
486 | #endif | |
487 | if (!ovfl_num || !(bufp->ovfl = __get_buf(hashp, ovfl_num, bufp, 1))) | |
488 | return (NULL); | |
489 | bufp->ovfl->flags |= BUF_MOD; | |
490 | #ifdef DEBUG1 | |
491 | (void)fprintf(stderr, "ADDOVFLPAGE: %d->ovfl was %d is now %d\n", | |
492 | tmp1, tmp2, bufp->ovfl->addr); | |
493 | #endif | |
494 | ndx = sp[0]; | |
495 | /* | |
496 | * Since a pair is allocated on a page only if there's room to add | |
497 | * an overflow page, we know that the OVFL information will fit on | |
498 | * the page. | |
499 | */ | |
500 | sp[ndx + 4] = OFFSET(sp); | |
501 | sp[ndx + 3] = FREESPACE(sp) - OVFLSIZE; | |
502 | sp[ndx + 1] = ovfl_num; | |
503 | sp[ndx + 2] = OVFLPAGE; | |
504 | sp[0] = ndx + 2; | |
505 | #ifdef HASH_STATISTICS | |
506 | hash_overflows++; | |
507 | #endif | |
508 | return (bufp->ovfl); | |
509 | } | |
510 | ||
511 | /* | |
512 | * Returns: | |
513 | * 0 indicates SUCCESS | |
514 | * -1 indicates FAILURE | |
515 | */ | |
516 | int | |
517 | __get_page(HTAB *hashp, char *p, u_int32_t bucket, int is_bucket, int is_disk, | |
518 | int is_bitmap) | |
519 | { | |
520 | int fd, page, size, rsize; | |
521 | u_int16_t *bp; | |
522 | ||
523 | fd = hashp->fp; | |
524 | size = hashp->BSIZE; | |
525 | ||
526 | if ((fd == -1) || !is_disk) { | |
527 | PAGE_INIT(p); | |
528 | return (0); | |
529 | } | |
530 | if (is_bucket) | |
531 | page = BUCKET_TO_PAGE(bucket); | |
532 | else | |
533 | page = OADDR_TO_PAGE(bucket); | |
534 | if ((rsize = pread(fd, p, size, (off_t)page << hashp->BSHIFT)) == -1) | |
535 | return (-1); | |
536 | bp = (u_int16_t *)p; | |
537 | if (!rsize) | |
538 | bp[0] = 0; /* We hit the EOF, so initialize a new page */ | |
539 | else | |
540 | if (rsize != size) { | |
541 | errno = EFTYPE; | |
542 | return (-1); | |
543 | } | |
544 | if (!is_bitmap && !bp[0]) { | |
545 | PAGE_INIT(p); | |
546 | } else | |
547 | if (hashp->LORDER != BYTE_ORDER) { | |
548 | int i, max; | |
549 | ||
550 | if (is_bitmap) { | |
551 | max = hashp->BSIZE >> 2; /* divide by 4 */ | |
552 | for (i = 0; i < max; i++) | |
553 | M_32_SWAP(((int *)p)[i]); | |
554 | } else { | |
555 | M_16_SWAP(bp[0]); | |
556 | max = bp[0] + 2; | |
557 | for (i = 1; i <= max; i++) | |
558 | M_16_SWAP(bp[i]); | |
559 | } | |
560 | } | |
561 | return (0); | |
562 | } | |
563 | ||
564 | /* | |
565 | * Write page p to disk | |
566 | * | |
567 | * Returns: | |
568 | * 0 ==> OK | |
569 | * -1 ==>failure | |
570 | */ | |
571 | int | |
572 | __put_page(HTAB *hashp, char *p, u_int32_t bucket, int is_bucket, int is_bitmap) | |
573 | { | |
574 | int fd, page, size, wsize, max; | |
575 | ||
576 | size = hashp->BSIZE; | |
577 | if ((hashp->fp == -1) && open_temp(hashp)) | |
578 | return (-1); | |
579 | fd = hashp->fp; | |
580 | ||
581 | if (hashp->LORDER != BYTE_ORDER) { | |
582 | int i; | |
583 | ||
584 | if (is_bitmap) { | |
585 | max = hashp->BSIZE >> 2; /* divide by 4 */ | |
586 | for (i = 0; i < max; i++) | |
587 | M_32_SWAP(((int *)p)[i]); | |
588 | } else { | |
589 | max = ((u_int16_t *)p)[0] + 2; | |
590 | for (i = 0; i <= max; i++) | |
591 | M_16_SWAP(((u_int16_t *)p)[i]); | |
592 | } | |
593 | } | |
594 | if (is_bucket) | |
595 | page = BUCKET_TO_PAGE(bucket); | |
596 | else | |
597 | page = OADDR_TO_PAGE(bucket); | |
598 | if ((wsize = pwrite(fd, p, size, (off_t)page << hashp->BSHIFT)) == -1) | |
599 | /* Errno is set */ | |
600 | return (-1); | |
601 | if (wsize != size) { | |
602 | errno = EFTYPE; | |
603 | return (-1); | |
604 | } | |
605 | /* 4485533 - reswap the in-memory copy */ | |
606 | if (hashp->LORDER != BYTE_ORDER) { | |
607 | int i; | |
608 | ||
609 | if (is_bitmap) { | |
610 | for (i = 0; i < max; i++) | |
611 | M_32_SWAP(((int *)p)[i]); | |
612 | } else { | |
613 | for (i = 0; i <= max; i++) | |
614 | M_16_SWAP(((u_int16_t *)p)[i]); | |
615 | } | |
616 | } | |
617 | return (0); | |
618 | } | |
619 | ||
620 | #define BYTE_MASK ((1 << INT_BYTE_SHIFT) -1) | |
621 | /* | |
622 | * Initialize a new bitmap page. Bitmap pages are left in memory | |
623 | * once they are read in. | |
624 | */ | |
625 | int | |
626 | __ibitmap(HTAB *hashp, int pnum, int nbits, int ndx) | |
627 | { | |
628 | u_int32_t *ip; | |
629 | int clearbytes, clearints; | |
630 | ||
631 | if ((ip = (u_int32_t *)malloc(hashp->BSIZE)) == NULL) | |
632 | return (1); | |
633 | hashp->nmaps++; | |
634 | clearints = ((nbits - 1) >> INT_BYTE_SHIFT) + 1; | |
635 | clearbytes = clearints << INT_TO_BYTE; | |
636 | (void)memset((char *)ip, 0, clearbytes); | |
637 | (void)memset(((char *)ip) + clearbytes, 0xFF, | |
638 | hashp->BSIZE - clearbytes); | |
639 | ip[clearints - 1] = ALL_SET << (nbits & BYTE_MASK); | |
640 | SETBIT(ip, 0); | |
641 | hashp->BITMAPS[ndx] = (u_int16_t)pnum; | |
642 | hashp->mapp[ndx] = ip; | |
643 | return (0); | |
644 | } | |
645 | ||
646 | static u_int32_t | |
647 | first_free(u_int32_t map) | |
648 | { | |
649 | u_int32_t i, mask; | |
650 | ||
651 | mask = 0x1; | |
652 | for (i = 0; i < BITS_PER_MAP; i++) { | |
653 | if (!(mask & map)) | |
654 | return (i); | |
655 | mask = mask << 1; | |
656 | } | |
657 | return (i); | |
658 | } | |
659 | ||
660 | static u_int16_t | |
661 | overflow_page(HTAB *hashp) | |
662 | { | |
663 | u_int32_t *freep; | |
664 | int max_free, offset, splitnum; | |
665 | u_int16_t addr; | |
666 | int bit, first_page, free_bit, free_page, i, in_use_bits, j; | |
667 | #ifdef DEBUG2 | |
668 | int tmp1, tmp2; | |
669 | #endif | |
670 | splitnum = hashp->OVFL_POINT; | |
671 | max_free = hashp->SPARES[splitnum]; | |
672 | ||
673 | free_page = (max_free - 1) >> (hashp->BSHIFT + BYTE_SHIFT); | |
674 | free_bit = (max_free - 1) & ((hashp->BSIZE << BYTE_SHIFT) - 1); | |
675 | ||
676 | /* Look through all the free maps to find the first free block */ | |
677 | first_page = hashp->LAST_FREED >>(hashp->BSHIFT + BYTE_SHIFT); | |
678 | for ( i = first_page; i <= free_page; i++ ) { | |
679 | if (!(freep = (u_int32_t *)hashp->mapp[i]) && | |
680 | !(freep = fetch_bitmap(hashp, i))) | |
681 | return (0); | |
682 | if (i == free_page) | |
683 | in_use_bits = free_bit; | |
684 | else | |
685 | in_use_bits = (hashp->BSIZE << BYTE_SHIFT) - 1; | |
686 | ||
687 | if (i == first_page) { | |
688 | bit = hashp->LAST_FREED & | |
689 | ((hashp->BSIZE << BYTE_SHIFT) - 1); | |
690 | j = bit / BITS_PER_MAP; | |
691 | bit = bit & ~(BITS_PER_MAP - 1); | |
692 | } else { | |
693 | bit = 0; | |
694 | j = 0; | |
695 | } | |
696 | for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP) | |
697 | if (freep[j] != ALL_SET) | |
698 | goto found; | |
699 | } | |
700 | ||
701 | /* No Free Page Found */ | |
702 | hashp->LAST_FREED = hashp->SPARES[splitnum]; | |
703 | hashp->SPARES[splitnum]++; | |
704 | offset = hashp->SPARES[splitnum] - | |
705 | (splitnum ? hashp->SPARES[splitnum - 1] : 0); | |
706 | ||
707 | #define OVMSG "HASH: Out of overflow pages. Increase page size\n" | |
708 | if (offset > SPLITMASK) { | |
709 | if (++splitnum >= NCACHED) { | |
710 | (void)_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); | |
711 | errno = EFBIG; | |
712 | return (0); | |
713 | } | |
714 | hashp->OVFL_POINT = splitnum; | |
715 | hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1]; | |
716 | hashp->SPARES[splitnum-1]--; | |
717 | offset = 1; | |
718 | } | |
719 | ||
720 | /* Check if we need to allocate a new bitmap page */ | |
721 | if (free_bit == (hashp->BSIZE << BYTE_SHIFT) - 1) { | |
722 | free_page++; | |
723 | if (free_page >= NCACHED) { | |
724 | (void)_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); | |
725 | errno = EFBIG; | |
726 | return (0); | |
727 | } | |
728 | /* | |
729 | * This is tricky. The 1 indicates that you want the new page | |
730 | * allocated with 1 clear bit. Actually, you are going to | |
731 | * allocate 2 pages from this map. The first is going to be | |
732 | * the map page, the second is the overflow page we were | |
733 | * looking for. The init_bitmap routine automatically, sets | |
734 | * the first bit of itself to indicate that the bitmap itself | |
735 | * is in use. We would explicitly set the second bit, but | |
736 | * don't have to if we tell init_bitmap not to leave it clear | |
737 | * in the first place. | |
738 | */ | |
739 | if (__ibitmap(hashp, | |
740 | (int)OADDR_OF(splitnum, offset), 1, free_page)) | |
741 | return (0); | |
742 | hashp->SPARES[splitnum]++; | |
743 | #ifdef DEBUG2 | |
744 | free_bit = 2; | |
745 | #endif | |
746 | offset++; | |
747 | if (offset > SPLITMASK) { | |
748 | if (++splitnum >= NCACHED) { | |
749 | (void)_write(STDERR_FILENO, OVMSG, | |
750 | sizeof(OVMSG) - 1); | |
751 | errno = EFBIG; | |
752 | return (0); | |
753 | } | |
754 | hashp->OVFL_POINT = splitnum; | |
755 | hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1]; | |
756 | hashp->SPARES[splitnum-1]--; | |
757 | offset = 0; | |
758 | } | |
759 | } else { | |
760 | /* | |
761 | * Free_bit addresses the last used bit. Bump it to address | |
762 | * the first available bit. | |
763 | */ | |
764 | free_bit++; | |
765 | SETBIT(freep, free_bit); | |
766 | } | |
767 | ||
768 | /* Calculate address of the new overflow page */ | |
769 | addr = OADDR_OF(splitnum, offset); | |
770 | #ifdef DEBUG2 | |
771 | (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n", | |
772 | addr, free_bit, free_page); | |
773 | #endif | |
774 | return (addr); | |
775 | ||
776 | found: | |
777 | bit = bit + first_free(freep[j]); | |
778 | SETBIT(freep, bit); | |
779 | #ifdef DEBUG2 | |
780 | tmp1 = bit; | |
781 | tmp2 = i; | |
782 | #endif | |
783 | /* | |
784 | * Bits are addressed starting with 0, but overflow pages are addressed | |
785 | * beginning at 1. Bit is a bit addressnumber, so we need to increment | |
786 | * it to convert it to a page number. | |
787 | */ | |
788 | bit = 1 + bit + (i * (hashp->BSIZE << BYTE_SHIFT)); | |
789 | if (bit >= hashp->LAST_FREED) | |
790 | hashp->LAST_FREED = bit - 1; | |
791 | ||
792 | /* Calculate the split number for this page */ | |
793 | for (i = 0; (i < splitnum) && (bit > hashp->SPARES[i]); i++); | |
794 | offset = (i ? bit - hashp->SPARES[i - 1] : bit); | |
795 | if (offset >= SPLITMASK) { | |
796 | (void)_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); | |
797 | errno = EFBIG; | |
798 | return (0); /* Out of overflow pages */ | |
799 | } | |
800 | addr = OADDR_OF(i, offset); | |
801 | #ifdef DEBUG2 | |
802 | (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n", | |
803 | addr, tmp1, tmp2); | |
804 | #endif | |
805 | ||
806 | /* Allocate and return the overflow page */ | |
807 | return (addr); | |
808 | } | |
809 | ||
810 | /* | |
811 | * Mark this overflow page as free. | |
812 | */ | |
813 | void | |
814 | __free_ovflpage(HTAB *hashp, BUFHEAD *obufp) | |
815 | { | |
816 | u_int16_t addr; | |
817 | u_int32_t *freep; | |
818 | int bit_address, free_page, free_bit; | |
819 | u_int16_t ndx; | |
820 | ||
821 | addr = obufp->addr; | |
822 | #ifdef DEBUG1 | |
823 | (void)fprintf(stderr, "Freeing %d\n", addr); | |
824 | #endif | |
825 | ndx = (((u_int16_t)addr) >> SPLITSHIFT); | |
826 | bit_address = | |
827 | (ndx ? hashp->SPARES[ndx - 1] : 0) + (addr & SPLITMASK) - 1; | |
828 | if (bit_address < hashp->LAST_FREED) | |
829 | hashp->LAST_FREED = bit_address; | |
830 | free_page = (bit_address >> (hashp->BSHIFT + BYTE_SHIFT)); | |
831 | free_bit = bit_address & ((hashp->BSIZE << BYTE_SHIFT) - 1); | |
832 | ||
833 | if (!(freep = hashp->mapp[free_page])) | |
834 | freep = fetch_bitmap(hashp, free_page); | |
835 | #ifdef DEBUG | |
836 | /* | |
837 | * This had better never happen. It means we tried to read a bitmap | |
838 | * that has already had overflow pages allocated off it, and we | |
839 | * failed to read it from the file. | |
840 | */ | |
841 | if (!freep) | |
842 | assert(0); | |
843 | #endif | |
844 | CLRBIT(freep, free_bit); | |
845 | #ifdef DEBUG2 | |
846 | (void)fprintf(stderr, "FREE_OVFLPAGE: ADDR: %d BIT: %d PAGE %d\n", | |
847 | obufp->addr, free_bit, free_page); | |
848 | #endif | |
849 | __reclaim_buf(hashp, obufp); | |
850 | } | |
851 | ||
852 | /* | |
853 | * Returns: | |
854 | * 0 success | |
855 | * -1 failure | |
856 | */ | |
857 | static int | |
858 | open_temp(HTAB *hashp) | |
859 | { | |
860 | sigset_t set, oset; | |
861 | int len; | |
862 | char *envtmp = NULL; | |
863 | char path[MAXPATHLEN]; | |
864 | ||
865 | if (issetugid() == 0) | |
866 | envtmp = getenv("TMPDIR"); | |
867 | len = snprintf(path, | |
868 | sizeof(path), "%s/_hash.XXXXXX", envtmp ? envtmp : "/tmp"); | |
869 | if (len < 0 || len >= sizeof(path)) { | |
870 | errno = ENAMETOOLONG; | |
871 | return (-1); | |
872 | } | |
873 | ||
874 | /* Block signals; make sure file goes away at process exit. */ | |
875 | (void)sigfillset(&set); | |
876 | (void)_sigprocmask(SIG_BLOCK, &set, &oset); | |
877 | if ((hashp->fp = mkstemp(path)) != -1) { | |
878 | (void)unlink(path); | |
879 | (void)_fcntl(hashp->fp, F_SETFD, 1); | |
880 | } | |
881 | (void)_sigprocmask(SIG_SETMASK, &oset, (sigset_t *)NULL); | |
882 | return (hashp->fp != -1 ? 0 : -1); | |
883 | } | |
884 | ||
885 | /* | |
886 | * We have to know that the key will fit, but the last entry on the page is | |
887 | * an overflow pair, so we need to shift things. | |
888 | */ | |
889 | static void | |
890 | squeeze_key(u_int16_t *sp, const DBT *key, const DBT *val) | |
891 | { | |
892 | char *p; | |
893 | u_int16_t free_space, n, off, pageno; | |
894 | ||
895 | p = (char *)sp; | |
896 | n = sp[0]; | |
897 | free_space = FREESPACE(sp); | |
898 | off = OFFSET(sp); | |
899 | ||
900 | pageno = sp[n - 1]; | |
901 | off -= key->size; | |
902 | sp[n - 1] = off; | |
903 | memmove(p + off, key->data, key->size); | |
904 | off -= val->size; | |
905 | sp[n] = off; | |
906 | memmove(p + off, val->data, val->size); | |
907 | sp[0] = n + 2; | |
908 | sp[n + 1] = pageno; | |
909 | sp[n + 2] = OVFLPAGE; | |
910 | FREESPACE(sp) = free_space - PAIRSIZE(key, val); | |
911 | OFFSET(sp) = off; | |
912 | } | |
913 | ||
914 | static u_int32_t * | |
915 | fetch_bitmap(HTAB *hashp, int ndx) | |
916 | { | |
917 | if (ndx >= hashp->nmaps) | |
918 | return (NULL); | |
919 | if ((hashp->mapp[ndx] = (u_int32_t *)malloc(hashp->BSIZE)) == NULL) | |
920 | return (NULL); | |
921 | if (__get_page(hashp, | |
922 | (char *)hashp->mapp[ndx], hashp->BITMAPS[ndx], 0, 1, 1)) { | |
923 | free(hashp->mapp[ndx]); | |
924 | return (NULL); | |
925 | } | |
926 | return (hashp->mapp[ndx]); | |
927 | } | |
928 | ||
929 | #ifdef DEBUG4 | |
930 | int | |
931 | print_chain(int addr) | |
932 | { | |
933 | BUFHEAD *bufp; | |
934 | short *bp, oaddr; | |
935 | ||
936 | (void)fprintf(stderr, "%d ", addr); | |
937 | bufp = __get_buf(hashp, addr, NULL, 0); | |
938 | bp = (short *)bufp->page; | |
939 | while (bp[0] && ((bp[bp[0]] == OVFLPAGE) || | |
940 | ((bp[0] > 2) && bp[2] < REAL_KEY))) { | |
941 | oaddr = bp[bp[0] - 1]; | |
942 | (void)fprintf(stderr, "%d ", (int)oaddr); | |
943 | bufp = __get_buf(hashp, (int)oaddr, bufp, 0); | |
944 | bp = (short *)bufp->page; | |
945 | } | |
946 | (void)fprintf(stderr, "\n"); | |
947 | } | |
948 | #endif |