]> git.saurik.com Git - apple/libc.git/blob - db/mpool/mpool.c
55bdefe96aadbd1a45697aad0c81855fc8cbdd76
[apple/libc.git] / db / mpool / mpool.c
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /*-
24 * Copyright (c) 1990, 1993, 1994
25 * The Regents of the University of California. All rights reserved.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 * 1. Redistributions of source code must retain the above copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 * 3. All advertising materials mentioning features or use of this software
36 * must display the following acknowledgement:
37 * This product includes software developed by the University of
38 * California, Berkeley and its contributors.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 */
55
56 #if defined(LIBC_SCCS) && !defined(lint)
57 static char sccsid[] = "@(#)mpool.c 8.5 (Berkeley) 7/26/94";
58 #endif /* LIBC_SCCS and not lint */
59 #include <sys/cdefs.h>
60
61 #include <sys/param.h>
62 #include <sys/queue.h>
63 #include <sys/stat.h>
64
65 #include <errno.h>
66 #include <stdio.h>
67 #include <stdlib.h>
68 #include <string.h>
69 #include <unistd.h>
70
71 #include <db.h>
72
73 #define __MPOOLINTERFACE_PRIVATE
74 #include <mpool.h>
75
76 static BKT *mpool_bkt(MPOOL *);
77 static BKT *mpool_look(MPOOL *, pgno_t);
78 static int mpool_write(MPOOL *, BKT *);
79
80 /*
81 * mpool_open --
82 * Initialize a memory pool.
83 */
84 MPOOL *
85 mpool_open(key, fd, pagesize, maxcache)
86 void *key;
87 int fd;
88 pgno_t pagesize, maxcache;
89 {
90 struct stat sb;
91 MPOOL *mp;
92 int entry;
93
94 /*
95 * Get information about the file.
96 *
97 * XXX
98 * We don't currently handle pipes, although we should.
99 */
100 if (fstat(fd, &sb))
101 return (NULL);
102 if (!S_ISREG(sb.st_mode)) {
103 errno = ESPIPE;
104 return (NULL);
105 }
106
107 /* Allocate and initialize the MPOOL cookie. */
108 if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
109 return (NULL);
110 TAILQ_INIT(&mp->lqh);
111 for (entry = 0; entry < HASHSIZE; ++entry)
112 TAILQ_INIT(&mp->hqh[entry]);
113 mp->maxcache = maxcache;
114 mp->npages = sb.st_size / pagesize;
115 mp->pagesize = pagesize;
116 mp->fd = fd;
117 return (mp);
118 }
119
120 /*
121 * mpool_filter --
122 * Initialize input/output filters.
123 */
124 void
125 mpool_filter(mp, pgin, pgout, pgcookie)
126 MPOOL *mp;
127 void (*pgin)(void *, pgno_t, void *);
128 void (*pgout)(void *, pgno_t, void *);
129 void *pgcookie;
130 {
131 mp->pgin = pgin;
132 mp->pgout = pgout;
133 mp->pgcookie = pgcookie;
134 }
135
136 /*
137 * mpool_new --
138 * Get a new page of memory.
139 */
140 void *
141 mpool_new(mp, pgnoaddr)
142 MPOOL *mp;
143 pgno_t *pgnoaddr;
144 {
145 struct _hqh *head;
146 BKT *bp;
147
148 if (mp->npages == MAX_PAGE_NUMBER) {
149 (void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
150 abort();
151 }
152 #ifdef STATISTICS
153 ++mp->pagenew;
154 #endif
155 /*
156 * Get a BKT from the cache. Assign a new page number, attach
157 * it to the head of the hash chain, the tail of the lru chain,
158 * and return.
159 */
160 if ((bp = mpool_bkt(mp)) == NULL)
161 return (NULL);
162 *pgnoaddr = bp->pgno = mp->npages++;
163 bp->flags = MPOOL_PINNED;
164
165 head = &mp->hqh[HASHKEY(bp->pgno)];
166 TAILQ_INSERT_HEAD(head, bp, hq);
167 TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
168 return (bp->page);
169 }
170
171 /*
172 * mpool_get
173 * Get a page.
174 */
175 void *
176 mpool_get(mp, pgno, flags)
177 MPOOL *mp;
178 pgno_t pgno;
179 u_int flags; /* XXX not used? */
180 {
181 struct _hqh *head;
182 BKT *bp;
183 off_t off;
184 int nr;
185
186 /* Check for attempt to retrieve a non-existent page. */
187 if (pgno >= mp->npages) {
188 errno = EINVAL;
189 return (NULL);
190 }
191
192 #ifdef STATISTICS
193 ++mp->pageget;
194 #endif
195
196 /* Check for a page that is cached. */
197 if ((bp = mpool_look(mp, pgno)) != NULL) {
198 #ifdef DEBUG
199 if (bp->flags & MPOOL_PINNED) {
200 (void)fprintf(stderr,
201 "mpool_get: page %d already pinned\n", bp->pgno);
202 abort();
203 }
204 #endif
205 /*
206 * Move the page to the head of the hash chain and the tail
207 * of the lru chain.
208 */
209 head = &mp->hqh[HASHKEY(bp->pgno)];
210 TAILQ_REMOVE(head, bp, hq);
211 TAILQ_INSERT_HEAD(head, bp, hq);
212 TAILQ_REMOVE(&mp->lqh, bp, q);
213 TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
214
215 /* Return a pinned page. */
216 bp->flags |= MPOOL_PINNED;
217 return (bp->page);
218 }
219
220 /* Get a page from the cache. */
221 if ((bp = mpool_bkt(mp)) == NULL)
222 return (NULL);
223
224 /* Read in the contents. */
225 #ifdef STATISTICS
226 ++mp->pageread;
227 #endif
228 off = mp->pagesize * pgno;
229 if (lseek(mp->fd, off, SEEK_SET) != off)
230 return (NULL);
231 if ((nr = read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) {
232 if (nr >= 0)
233 errno = EFTYPE;
234 return (NULL);
235 }
236
237 /* Set the page number, pin the page. */
238 bp->pgno = pgno;
239 bp->flags = MPOOL_PINNED;
240
241 /*
242 * Add the page to the head of the hash chain and the tail
243 * of the lru chain.
244 */
245 head = &mp->hqh[HASHKEY(bp->pgno)];
246 TAILQ_INSERT_HEAD(head, bp, hq);
247 TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
248
249 /* Run through the user's filter. */
250 if (mp->pgin != NULL)
251 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
252
253 return (bp->page);
254 }
255
256 /*
257 * mpool_put
258 * Return a page.
259 */
260 int
261 mpool_put(mp, page, flags)
262 MPOOL *mp;
263 void *page;
264 u_int flags;
265 {
266 BKT *bp;
267
268 #ifdef STATISTICS
269 ++mp->pageput;
270 #endif
271 bp = (BKT *)((char *)page - sizeof(BKT));
272 #ifdef DEBUG
273 if (!(bp->flags & MPOOL_PINNED)) {
274 (void)fprintf(stderr,
275 "mpool_put: page %d not pinned\n", bp->pgno);
276 abort();
277 }
278 #endif
279 bp->flags &= ~MPOOL_PINNED;
280 bp->flags |= flags & MPOOL_DIRTY;
281 return (RET_SUCCESS);
282 }
283
284 /*
285 * mpool_close
286 * Close the buffer pool.
287 */
288 int
289 mpool_close(mp)
290 MPOOL *mp;
291 {
292 BKT *bp;
293
294 /* Free up any space allocated to the lru pages. */
295 while (!TAILQ_EMPTY(&mp->lqh)) {
296 bp = TAILQ_FIRST(&mp->lqh);
297 TAILQ_REMOVE(&mp->lqh, bp, q);
298 free(bp);
299 }
300
301 /* Free the MPOOL cookie. */
302 free(mp);
303 return (RET_SUCCESS);
304 }
305
306 /*
307 * mpool_sync
308 * Sync the pool to disk.
309 */
310 int
311 mpool_sync(mp)
312 MPOOL *mp;
313 {
314 BKT *bp;
315
316 /* Walk the lru chain, flushing any dirty pages to disk. */
317 TAILQ_FOREACH(bp, &mp->lqh, q)
318 if (bp->flags & MPOOL_DIRTY &&
319 mpool_write(mp, bp) == RET_ERROR)
320 return (RET_ERROR);
321
322 /* Sync the file descriptor. */
323 return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
324 }
325
326 /*
327 * mpool_bkt
328 * Get a page from the cache (or create one).
329 */
330 static BKT *
331 mpool_bkt(mp)
332 MPOOL *mp;
333 {
334 struct _hqh *head;
335 BKT *bp;
336
337 /* If under the max cached, always create a new page. */
338 if (mp->curcache < mp->maxcache)
339 goto new;
340
341 /*
342 * If the cache is max'd out, walk the lru list for a buffer we
343 * can flush. If we find one, write it (if necessary) and take it
344 * off any lists. If we don't find anything we grow the cache anyway.
345 * The cache never shrinks.
346 */
347 TAILQ_FOREACH(bp, &mp->lqh, q)
348 if (!(bp->flags & MPOOL_PINNED)) {
349 /* Flush if dirty. */
350 if (bp->flags & MPOOL_DIRTY &&
351 mpool_write(mp, bp) == RET_ERROR)
352 return (NULL);
353 #ifdef STATISTICS
354 ++mp->pageflush;
355 #endif
356 /* Remove from the hash and lru queues. */
357 head = &mp->hqh[HASHKEY(bp->pgno)];
358 TAILQ_REMOVE(head, bp, hq);
359 TAILQ_REMOVE(&mp->lqh, bp, q);
360 #ifdef DEBUG
361 { void *spage;
362 spage = bp->page;
363 memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
364 bp->page = spage;
365 }
366 #endif
367 return (bp);
368 }
369
370 new: if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
371 return (NULL);
372 #ifdef STATISTICS
373 ++mp->pagealloc;
374 #endif
375 #if defined(DEBUG) || defined(PURIFY)
376 memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
377 #endif
378 bp->page = (char *)bp + sizeof(BKT);
379 ++mp->curcache;
380 return (bp);
381 }
382
383 /*
384 * mpool_write
385 * Write a page to disk.
386 */
387 static int
388 mpool_write(mp, bp)
389 MPOOL *mp;
390 BKT *bp;
391 {
392 off_t off;
393
394 #ifdef STATISTICS
395 ++mp->pagewrite;
396 #endif
397
398 /* Run through the user's filter. */
399 if (mp->pgout)
400 (mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
401
402 off = mp->pagesize * bp->pgno;
403 if (lseek(mp->fd, off, SEEK_SET) != off)
404 return (RET_ERROR);
405 if (write(mp->fd, bp->page, mp->pagesize) != mp->pagesize)
406 return (RET_ERROR);
407
408 bp->flags &= ~MPOOL_DIRTY;
409 return (RET_SUCCESS);
410 }
411
412 /*
413 * mpool_look
414 * Lookup a page in the cache.
415 */
416 static BKT *
417 mpool_look(mp, pgno)
418 MPOOL *mp;
419 pgno_t pgno;
420 {
421 struct _hqh *head;
422 BKT *bp;
423
424 head = &mp->hqh[HASHKEY(pgno)];
425 TAILQ_FOREACH(bp, head, hq)
426 if (bp->pgno == pgno) {
427 #ifdef STATISTICS
428 ++mp->cachehit;
429 #endif
430 return (bp);
431 }
432 #ifdef STATISTICS
433 ++mp->cachemiss;
434 #endif
435 return (NULL);
436 }
437
438 #ifdef STATISTICS
439 /*
440 * mpool_stat
441 * Print out cache statistics.
442 */
443 void
444 mpool_stat(mp)
445 MPOOL *mp;
446 {
447 BKT *bp;
448 int cnt;
449 char *sep;
450
451 (void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
452 (void)fprintf(stderr,
453 "page size %lu, cacheing %lu pages of %lu page max cache\n",
454 mp->pagesize, mp->curcache, mp->maxcache);
455 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
456 mp->pageput, mp->pageget, mp->pagenew);
457 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
458 mp->pagealloc, mp->pageflush);
459 if (mp->cachehit + mp->cachemiss)
460 (void)fprintf(stderr,
461 "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
462 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
463 * 100, mp->cachehit, mp->cachemiss);
464 (void)fprintf(stderr, "%lu page reads, %lu page writes\n",
465 mp->pageread, mp->pagewrite);
466
467 sep = "";
468 cnt = 0;
469 TAILQ_FOREACH(bp, &mp->lqh, q) {
470 (void)fprintf(stderr, "%s%d", sep, bp->pgno);
471 if (bp->flags & MPOOL_DIRTY)
472 (void)fprintf(stderr, "d");
473 if (bp->flags & MPOOL_PINNED)
474 (void)fprintf(stderr, "P");
475 if (++cnt == 10) {
476 sep = "\n";
477 cnt = 0;
478 } else
479 sep = ", ";
480
481 }
482 (void)fprintf(stderr, "\n");
483 }
484 #endif