]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_bio.c
12199aaedf06017a38b83c42ccf25cf9ddd18150
[apple/xnu.git] / bsd / vfs / vfs_bio.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*-
24 * Copyright (c) 1994 Christopher G. Demetriou
25 * Copyright (c) 1982, 1986, 1989, 1993
26 * The Regents of the University of California. All rights reserved.
27 * (c) UNIX System Laboratories, Inc.
28 * All or some portions of this file are derived from material licensed
29 * to the University of California by American Telephone and Telegraph
30 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
31 * the permission of UNIX System Laboratories, Inc.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * The NEXTSTEP Software License Agreement specifies the terms
62 * and conditions for redistribution.
63 *
64 * @(#)vfs_bio.c 8.6 (Berkeley) 1/11/94
65 */
66
67 /*
68 * Some references:
69 * Bach: The Design of the UNIX Operating System (Prentice Hall, 1986)
70 * Leffler, et al.: The Design and Implementation of the 4.3BSD
71 * UNIX Operating System (Addison Welley, 1989)
72 */
73 #define ZALLOC_METADATA 1
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/buf.h>
79 #include <sys/vnode.h>
80 #include <sys/mount.h>
81 #include <sys/trace.h>
82 #include <sys/malloc.h>
83 #include <sys/resourcevar.h>
84 #include <miscfs/specfs/specdev.h>
85 #include <sys/ubc.h>
86 #include <vm/vm_pageout.h>
87 #if DIAGNOSTIC
88 #include <kern/assert.h>
89 #endif /* DIAGNOSTIC */
90 #include <kern/task.h>
91 #include <kern/zalloc.h>
92
93 #include <sys/kdebug.h>
94
95 extern void bufqinc(int q);
96 extern void bufqdec(int q);
97 extern void bufq_balance_thread_init();
98
99 extern void reassignbuf(struct buf *, struct vnode *);
100 static struct buf *getnewbuf(int slpflag, int slptimeo, int *queue);
101
102 extern int niobuf; /* The number of IO buffer headers for cluster IO */
103
104 #if TRACE
105 struct proc *traceproc;
106 int tracewhich, tracebuf[TRCSIZ];
107 u_int tracex;
108 char traceflags[TR_NFLAGS];
109 #endif /* TRACE */
110
111 /*
112 * Definitions for the buffer hash lists.
113 */
114 #define BUFHASH(dvp, lbn) \
115 (&bufhashtbl[((long)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash])
116 LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash;
117 u_long bufhash;
118
119 /* Definitions for the buffer stats. */
120 struct bufstats bufstats;
121
122 /*
123 * Insq/Remq for the buffer hash lists.
124 */
125 #if 0
126 #define binshash(bp, dp) LIST_INSERT_HEAD(dp, bp, b_hash)
127 #define bremhash(bp) LIST_REMOVE(bp, b_hash)
128 #endif /* 0 */
129
130
131 TAILQ_HEAD(ioqueue, buf) iobufqueue;
132 TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES];
133 int needbuffer;
134 int need_iobuffer;
135
136 /*
137 * Insq/Remq for the buffer free lists.
138 */
139 #define binsheadfree(bp, dp, whichq) do { \
140 TAILQ_INSERT_HEAD(dp, bp, b_freelist); \
141 bufqinc((whichq)); \
142 (bp)->b_whichq = whichq; \
143 (bp)->b_timestamp = time.tv_sec; \
144 } while (0)
145
146 #define binstailfree(bp, dp, whichq) do { \
147 TAILQ_INSERT_TAIL(dp, bp, b_freelist); \
148 bufqinc((whichq)); \
149 (bp)->b_whichq = whichq; \
150 (bp)->b_timestamp = time.tv_sec; \
151 } while (0)
152
153 #define BHASHENTCHECK(bp) \
154 if ((bp)->b_hash.le_prev != (struct buf **)0xdeadbeef) \
155 panic("%x: b_hash.le_prev is deadb", (bp));
156
157 #define BLISTNONE(bp) \
158 (bp)->b_hash.le_next = (struct buf *)0; \
159 (bp)->b_hash.le_prev = (struct buf **)0xdeadbeef;
160
161 simple_lock_data_t bufhashlist_slock; /* lock on buffer hash list */
162
163 /*
164 * Time in seconds before a buffer on a list is
165 * considered as a stale buffer
166 */
167 #define LRU_IS_STALE 120 /* default value for the LRU */
168 #define AGE_IS_STALE 60 /* default value for the AGE */
169 #define META_IS_STALE 180 /* default value for the BQ_META */
170
171 int lru_is_stale = LRU_IS_STALE;
172 int age_is_stale = AGE_IS_STALE;
173 int meta_is_stale = META_IS_STALE;
174
175 #if 1
176 void
177 blistenterhead(struct bufhashhdr * head, struct buf * bp)
178 {
179 if ((bp->b_hash.le_next = (head)->lh_first) != NULL)
180 (head)->lh_first->b_hash.le_prev = &(bp)->b_hash.le_next;
181 (head)->lh_first = bp;
182 bp->b_hash.le_prev = &(head)->lh_first;
183 if (bp->b_hash.le_prev == (struct buf **)0xdeadbeef)
184 panic("blistenterhead: le_prev is deadbeef");
185
186 }
187 #endif
188
189 #if 1
190 void
191 binshash(struct buf *bp, struct bufhashhdr *dp)
192 {
193 int s;
194
195 struct buf *nbp;
196
197 simple_lock(&bufhashlist_slock);
198 #if 0
199 if(incore(bp->b_vp, bp->b_lblkno)) {
200 panic("adding to queue already existing element");
201 }
202 #endif /* 0 */
203 BHASHENTCHECK(bp);
204
205 nbp = dp->lh_first;
206 for(; nbp != NULL; nbp = nbp->b_hash.le_next) {
207 if(nbp == bp)
208 panic("buf already in hashlist");
209 }
210
211 #if 0
212 LIST_INSERT_HEAD(dp, bp, b_hash);
213 #else
214 blistenterhead(dp, bp);
215 #endif
216 simple_unlock(&bufhashlist_slock);
217 }
218
219 void
220 bremhash(struct buf *bp)
221 {
222 int s;
223
224 simple_lock(&bufhashlist_slock);
225 if (bp->b_hash.le_prev == (struct buf **)0xdeadbeef)
226 panic("bremhash le_prev is deadbeef");
227 if (bp->b_hash.le_next == bp)
228 panic("bremhash: next points to self");
229
230 if (bp->b_hash.le_next != NULL)
231 bp->b_hash.le_next->b_hash.le_prev = bp->b_hash.le_prev;
232 *bp->b_hash.le_prev = (bp)->b_hash.le_next;
233 simple_unlock(&bufhashlist_slock);
234 }
235
236 #endif /* 1 */
237
238
239 /*
240 * Remove a buffer from the free list it's on
241 */
242 void
243 bremfree(bp)
244 struct buf *bp;
245 {
246 struct bqueues *dp = NULL;
247 int whichq = -1;
248
249 /*
250 * We only calculate the head of the freelist when removing
251 * the last element of the list as that is the only time that
252 * it is needed (e.g. to reset the tail pointer).
253 *
254 * NB: This makes an assumption about how tailq's are implemented.
255 */
256 if (bp->b_freelist.tqe_next == NULL) {
257 for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
258 if (dp->tqh_last == &bp->b_freelist.tqe_next)
259 break;
260 if (dp == &bufqueues[BQUEUES])
261 panic("bremfree: lost tail");
262 }
263 TAILQ_REMOVE(dp, bp, b_freelist);
264 whichq = bp->b_whichq;
265 bufqdec(whichq);
266 bp->b_whichq = -1;
267 bp->b_timestamp = 0;
268 }
269
270 /*
271 * Initialize buffers and hash links for buffers.
272 */
273 void
274 bufinit()
275 {
276 register struct buf *bp;
277 register struct bqueues *dp;
278 register int i;
279 int metabuf;
280 long whichq;
281 #if ZALLOC_METADATA
282 static void bufzoneinit();
283 #endif /* ZALLOC_METADATA */
284
285 /* Initialize the buffer queues ('freelists') and the hash table */
286 for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
287 TAILQ_INIT(dp);
288 bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash);
289
290 simple_lock_init(&bufhashlist_slock );
291
292 metabuf = nbuf/8; /* reserved for meta buf */
293
294 /* Initialize the buffer headers */
295 for (i = 0; i < nbuf; i++) {
296 bp = &buf[i];
297 bzero((char *)bp, sizeof *bp);
298 bp->b_dev = NODEV;
299 bp->b_rcred = NOCRED;
300 bp->b_wcred = NOCRED;
301 bp->b_vnbufs.le_next = NOLIST;
302 bp->b_flags = B_INVAL;
303 /*
304 * metabuf buffer headers on the meta-data list and
305 * rest of the buffer headers on the empty list
306 */
307 if (--metabuf )
308 whichq = BQ_META;
309 else
310 whichq = BQ_EMPTY;
311
312 BLISTNONE(bp);
313 dp = &bufqueues[whichq];
314 binsheadfree(bp, dp, whichq);
315 binshash(bp, &invalhash);
316 }
317
318 for (; i < nbuf + niobuf; i++) {
319 bp = &buf[i];
320 bzero((char *)bp, sizeof *bp);
321 bp->b_dev = NODEV;
322 bp->b_rcred = NOCRED;
323 bp->b_wcred = NOCRED;
324 bp->b_vnbufs.le_next = NOLIST;
325 bp->b_flags = B_INVAL;
326 binsheadfree(bp, &iobufqueue, -1);
327 }
328
329 printf("using %d buffer headers and %d cluster IO buffer headers\n",
330 nbuf, niobuf);
331
332 #if ZALLOC_METADATA
333 /* Set up zones for meta-data */
334 bufzoneinit();
335 #endif
336
337 #if XXX
338 /* create a thread to do dynamic buffer queue balancing */
339 bufq_balance_thread_init();
340 #endif /* XXX */
341 }
342
343 /* __inline */
344 struct buf *
345 bio_doread(vp, blkno, size, cred, async, queuetype)
346 struct vnode *vp;
347 daddr_t blkno;
348 int size;
349 struct ucred *cred;
350 int async;
351 int queuetype;
352 {
353 register struct buf *bp;
354 struct proc *p = current_proc();
355
356 bp = getblk(vp, blkno, size, 0, 0, queuetype);
357
358 /*
359 * If buffer does not have data valid, start a read.
360 * Note that if buffer is B_INVAL, getblk() won't return it.
361 * Therefore, it's valid if it's I/O has completed or been delayed.
362 */
363 if (!ISSET(bp->b_flags, (B_DONE | B_DELWRI))) {
364 /* Start I/O for the buffer (keeping credentials). */
365 SET(bp->b_flags, B_READ | async);
366 if (cred != NOCRED && bp->b_rcred == NOCRED) {
367 /*
368 * NFS has embedded ucred.
369 * Can not crhold() here as that causes zone corruption
370 */
371 bp->b_rcred = crdup(cred);
372 }
373 VOP_STRATEGY(bp);
374
375 trace(TR_BREADMISS, pack(vp, size), blkno);
376
377 /* Pay for the read. */
378 if (p && p->p_stats)
379 p->p_stats->p_ru.ru_inblock++; /* XXX */
380 } else if (async) {
381 brelse(bp);
382 }
383
384 trace(TR_BREADHIT, pack(vp, size), blkno);
385
386 return (bp);
387 }
388 /*
389 * Read a disk block.
390 * This algorithm described in Bach (p.54).
391 */
392 int
393 bread(vp, blkno, size, cred, bpp)
394 struct vnode *vp;
395 daddr_t blkno;
396 int size;
397 struct ucred *cred;
398 struct buf **bpp;
399 {
400 register struct buf *bp;
401
402 /* Get buffer for block. */
403 bp = *bpp = bio_doread(vp, blkno, size, cred, 0, BLK_READ);
404
405 /* Wait for the read to complete, and return result. */
406 return (biowait(bp));
407 }
408
409 /*
410 * Read a disk block. [bread() for meta-data]
411 * This algorithm described in Bach (p.54).
412 */
413 int
414 meta_bread(vp, blkno, size, cred, bpp)
415 struct vnode *vp;
416 daddr_t blkno;
417 int size;
418 struct ucred *cred;
419 struct buf **bpp;
420 {
421 register struct buf *bp;
422
423 /* Get buffer for block. */
424 bp = *bpp = bio_doread(vp, blkno, size, cred, 0, BLK_META);
425
426 /* Wait for the read to complete, and return result. */
427 return (biowait(bp));
428 }
429
430 /*
431 * Read-ahead multiple disk blocks. The first is sync, the rest async.
432 * Trivial modification to the breada algorithm presented in Bach (p.55).
433 */
434 int
435 breadn(vp, blkno, size, rablks, rasizes, nrablks, cred, bpp)
436 struct vnode *vp;
437 daddr_t blkno; int size;
438 daddr_t rablks[]; int rasizes[];
439 int nrablks;
440 struct ucred *cred;
441 struct buf **bpp;
442 {
443 register struct buf *bp;
444 int i;
445
446 bp = *bpp = bio_doread(vp, blkno, size, cred, 0, BLK_READ);
447
448 /*
449 * For each of the read-ahead blocks, start a read, if necessary.
450 */
451 for (i = 0; i < nrablks; i++) {
452 /* If it's in the cache, just go on to next one. */
453 if (incore(vp, rablks[i]))
454 continue;
455
456 /* Get a buffer for the read-ahead block */
457 (void) bio_doread(vp, rablks[i], rasizes[i], cred, B_ASYNC, BLK_READ);
458 }
459
460 /* Otherwise, we had to start a read for it; wait until it's valid. */
461 return (biowait(bp));
462 }
463
464 /*
465 * Read with single-block read-ahead. Defined in Bach (p.55), but
466 * implemented as a call to breadn().
467 * XXX for compatibility with old file systems.
468 */
469 int
470 breada(vp, blkno, size, rablkno, rabsize, cred, bpp)
471 struct vnode *vp;
472 daddr_t blkno; int size;
473 daddr_t rablkno; int rabsize;
474 struct ucred *cred;
475 struct buf **bpp;
476 {
477
478 return (breadn(vp, blkno, size, &rablkno, &rabsize, 1, cred, bpp));
479 }
480
481 /*
482 * Block write. Described in Bach (p.56)
483 */
484 int
485 bwrite(bp)
486 struct buf *bp;
487 {
488 int rv, sync, wasdelayed;
489 struct proc *p = current_proc();
490 upl_t upl;
491 upl_page_info_t *pl;
492 void * object;
493 kern_return_t kret;
494 struct vnode *vp = bp->b_vp;
495
496 /* Remember buffer type, to switch on it later. */
497 sync = !ISSET(bp->b_flags, B_ASYNC);
498 wasdelayed = ISSET(bp->b_flags, B_DELWRI);
499 CLR(bp->b_flags, (B_READ | B_DONE | B_ERROR | B_DELWRI));
500
501 if (!sync) {
502 /*
503 * If not synchronous, pay for the I/O operation and make
504 * sure the buf is on the correct vnode queue. We have
505 * to do this now, because if we don't, the vnode may not
506 * be properly notified that its I/O has completed.
507 */
508 if (wasdelayed)
509 reassignbuf(bp, vp);
510 else
511 if (p && p->p_stats)
512 p->p_stats->p_ru.ru_oublock++; /* XXX */
513 }
514
515 trace(TR_BWRITE, pack(vp, bp->b_bcount), bp->b_lblkno);
516
517 /* Initiate disk write. Make sure the appropriate party is charged. */
518 SET(bp->b_flags, B_WRITEINPROG);
519 vp->v_numoutput++;
520
521 VOP_STRATEGY(bp);
522
523 if (sync) {
524 /*
525 * If I/O was synchronous, wait for it to complete.
526 */
527 rv = biowait(bp);
528
529 /*
530 * Pay for the I/O operation, if it's not been paid for, and
531 * make sure it's on the correct vnode queue. (async operatings
532 * were payed for above.)
533 */
534 if (wasdelayed)
535 reassignbuf(bp, vp);
536 else
537 if (p && p->p_stats)
538 p->p_stats->p_ru.ru_oublock++; /* XXX */
539
540 /* Release the buffer. */
541 brelse(bp);
542
543 return (rv);
544 } else {
545 return (0);
546 }
547 }
548
549 int
550 vn_bwrite(ap)
551 struct vop_bwrite_args *ap;
552 {
553 return (bwrite(ap->a_bp));
554 }
555
556 /*
557 * Delayed write.
558 *
559 * The buffer is marked dirty, but is not queued for I/O.
560 * This routine should be used when the buffer is expected
561 * to be modified again soon, typically a small write that
562 * partially fills a buffer.
563 *
564 * NB: magnetic tapes cannot be delayed; they must be
565 * written in the order that the writes are requested.
566 *
567 * Described in Leffler, et al. (pp. 208-213).
568 */
569 void
570 bdwrite(bp)
571 struct buf *bp;
572 {
573 struct proc *p = current_proc();
574 kern_return_t kret;
575 upl_t upl;
576 upl_page_info_t *pl;
577
578 /*
579 * If the block hasn't been seen before:
580 * (1) Mark it as having been seen,
581 * (2) Charge for the write.
582 * (3) Make sure it's on its vnode's correct block list,
583 */
584 if (!ISSET(bp->b_flags, B_DELWRI)) {
585 SET(bp->b_flags, B_DELWRI);
586 if (p && p->p_stats)
587 p->p_stats->p_ru.ru_oublock++; /* XXX */
588
589 reassignbuf(bp, bp->b_vp);
590 }
591
592
593 /* If this is a tape block, write it the block now. */
594 if (ISSET(bp->b_flags, B_TAPE)) {
595 /* bwrite(bp); */
596 VOP_BWRITE(bp);
597 return;
598 }
599
600 /* Otherwise, the "write" is done, so mark and release the buffer. */
601 SET(bp->b_flags, B_DONE);
602 brelse(bp);
603 }
604
605 /*
606 * Asynchronous block write; just an asynchronous bwrite().
607 */
608 void
609 bawrite(bp)
610 struct buf *bp;
611 {
612
613 SET(bp->b_flags, B_ASYNC);
614 VOP_BWRITE(bp);
615 }
616
617 /*
618 * Release a buffer on to the free lists.
619 * Described in Bach (p. 46).
620 */
621 void
622 brelse(bp)
623 struct buf *bp;
624 {
625 struct bqueues *bufq;
626 int s;
627 long whichq;
628
629 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 388)) | DBG_FUNC_START,
630 bp->b_lblkno * PAGE_SIZE, bp, bp->b_data, bp->b_flags, 0);
631
632 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
633
634 /* IO is done. Cleanup the UPL state */
635 if (!ISSET(bp->b_flags, B_META)
636 && UBCINFOEXISTS(bp->b_vp) && bp->b_bufsize) {
637 kern_return_t kret;
638 upl_t upl;
639 int upl_flags;
640
641 if ( !ISSET(bp->b_flags, B_PAGELIST)) {
642 if ( !ISSET(bp->b_flags, B_INVAL)) {
643 kret = ubc_create_upl(bp->b_vp,
644 ubc_blktooff(bp->b_vp, bp->b_lblkno),
645 bp->b_bufsize,
646 &upl,
647 NULL,
648 UPL_PRECIOUS);
649 if (kret != KERN_SUCCESS)
650 panic("brelse: Failed to get pagelists");
651 #ifdef UBC_DEBUG
652 upl_ubc_alias_set(upl, bp, 5);
653 #endif /* UBC_DEBUG */
654 } else
655 upl = (upl_t) 0;
656 } else {
657 upl = bp->b_pagelist;
658 kret = ubc_upl_unmap(upl);
659
660 if (kret != KERN_SUCCESS)
661 panic("kernel_upl_unmap failed");
662 bp->b_data = 0;
663 }
664 if (upl) {
665 if (bp->b_flags & (B_ERROR | B_INVAL)) {
666 if (bp->b_flags & (B_READ | B_INVAL))
667 upl_flags = UPL_ABORT_DUMP_PAGES;
668 else
669 upl_flags = 0;
670 ubc_upl_abort(upl, upl_flags);
671 } else {
672 if (ISSET(bp->b_flags, (B_DELWRI | B_WASDIRTY)))
673 upl_flags = UPL_COMMIT_SET_DIRTY ;
674 else
675 upl_flags = UPL_COMMIT_CLEAR_DIRTY ;
676 ubc_upl_commit_range(upl, 0, bp->b_bufsize, upl_flags |
677 UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
678 }
679 s = splbio();
680 CLR(bp->b_flags, B_PAGELIST);
681 bp->b_pagelist = 0;
682 splx(s);
683 }
684 } else {
685 if(ISSET(bp->b_flags, B_PAGELIST))
686 panic("brelse: pagelist set for non VREG; vp=%x", bp->b_vp);
687 }
688
689 /* Wake up any processes waiting for any buffer to become free. */
690 if (needbuffer) {
691 needbuffer = 0;
692 wakeup(&needbuffer);
693 }
694
695 /* Wake up any proceeses waiting for _this_ buffer to become free. */
696 if (ISSET(bp->b_flags, B_WANTED)) {
697 CLR(bp->b_flags, B_WANTED);
698 wakeup(bp);
699 }
700
701 /* Block disk interrupts. */
702 s = splbio();
703
704 /*
705 * Determine which queue the buffer should be on, then put it there.
706 */
707
708 /* If it's locked, don't report an error; try again later. */
709 if (ISSET(bp->b_flags, (B_LOCKED|B_ERROR)) == (B_LOCKED|B_ERROR))
710 CLR(bp->b_flags, B_ERROR);
711
712 /* If it's not cacheable, or an error, mark it invalid. */
713 if (ISSET(bp->b_flags, (B_NOCACHE|B_ERROR)))
714 SET(bp->b_flags, B_INVAL);
715
716 if ((bp->b_bufsize <= 0) || ISSET(bp->b_flags, B_INVAL)) {
717 /*
718 * If it's invalid or empty, dissociate it from its vnode
719 * and put on the head of the appropriate queue.
720 */
721 if (bp->b_vp)
722 brelvp(bp);
723 CLR(bp->b_flags, B_DELWRI);
724 if (bp->b_bufsize <= 0)
725 whichq = BQ_EMPTY; /* no data */
726 else
727 whichq = BQ_AGE; /* invalid data */
728
729 bufq = &bufqueues[whichq];
730 binsheadfree(bp, bufq, whichq);
731 } else {
732 /*
733 * It has valid data. Put it on the end of the appropriate
734 * queue, so that it'll stick around for as long as possible.
735 */
736 if (ISSET(bp->b_flags, B_LOCKED))
737 whichq = BQ_LOCKED; /* locked in core */
738 else if (ISSET(bp->b_flags, B_META))
739 whichq = BQ_META; /* meta-data */
740 else if (ISSET(bp->b_flags, B_AGE))
741 whichq = BQ_AGE; /* stale but valid data */
742 else
743 whichq = BQ_LRU; /* valid data */
744
745 bufq = &bufqueues[whichq];
746 binstailfree(bp, bufq, whichq);
747 }
748
749 /* Unlock the buffer. */
750 CLR(bp->b_flags, (B_AGE | B_ASYNC | B_BUSY | B_NOCACHE));
751
752 /* Allow disk interrupts. */
753 splx(s);
754
755 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 388)) | DBG_FUNC_END,
756 bp, bp->b_data, bp->b_flags, 0, 0);
757 }
758
759 /*
760 * Determine if a block is in the cache.
761 * Just look on what would be its hash chain. If it's there, return
762 * a pointer to it, unless it's marked invalid. If it's marked invalid,
763 * we normally don't return the buffer, unless the caller explicitly
764 * wants us to.
765 */
766 struct buf *
767 incore(vp, blkno)
768 struct vnode *vp;
769 daddr_t blkno;
770 {
771 struct buf *bp;
772 int bufseen = 0;
773
774 bp = BUFHASH(vp, blkno)->lh_first;
775
776 /* Search hash chain */
777 for (; bp != NULL; bp = bp->b_hash.le_next, bufseen++) {
778 if (bp->b_lblkno == blkno && bp->b_vp == vp &&
779 !ISSET(bp->b_flags, B_INVAL))
780 return (bp);
781 if(bufseen >= nbuf)
782 panic("walked more than nbuf in incore");
783
784 }
785
786 return (0);
787 }
788
789 /* XXX FIXME -- Update the comment to reflect the UBC changes -- */
790 /*
791 * Get a block of requested size that is associated with
792 * a given vnode and block offset. If it is found in the
793 * block cache, mark it as having been found, make it busy
794 * and return it. Otherwise, return an empty block of the
795 * correct size. It is up to the caller to insure that the
796 * cached blocks be of the correct size.
797 */
798 struct buf *
799 getblk(vp, blkno, size, slpflag, slptimeo, operation)
800 register struct vnode *vp;
801 daddr_t blkno;
802 int size, slpflag, slptimeo, operation;
803 {
804 struct buf *bp;
805 int s, err;
806 upl_t upl;
807 upl_page_info_t *pl;
808 kern_return_t kret;
809 int error=0;
810 int pagedirty = 0;
811
812 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 386)) | DBG_FUNC_START,
813 blkno * PAGE_SIZE, size, operation, 0, 0);
814 start:
815
816 s = splbio();
817 if (bp = incore(vp, blkno)) {
818 /* Found in the Buffer Cache */
819 if (ISSET(bp->b_flags, B_BUSY)) {
820 /* but is busy */
821 switch (operation) {
822 case BLK_READ:
823 case BLK_WRITE:
824 case BLK_META:
825 SET(bp->b_flags, B_WANTED);
826 bufstats.bufs_busyincore++;
827 err = tsleep(bp, slpflag | (PRIBIO + 1), "getblk",
828 slptimeo);
829 splx(s);
830 /*
831 * Callers who call with PCATCH or timeout are
832 * willing to deal with the NULL pointer
833 */
834 if (err && ((slpflag & PCATCH) ||
835 ((err == EWOULDBLOCK) && slptimeo)))
836 return (NULL);
837 goto start;
838 /*NOTREACHED*/
839 break;
840
841 case BLK_PAGEIN:
842 /* pagein operation must not use getblk */
843 panic("getblk: pagein for incore busy buffer");
844 splx(s);
845 /*NOTREACHED*/
846 break;
847
848 case BLK_PAGEOUT:
849 /* pageout operation must not use getblk */
850 panic("getblk: pageout for incore busy buffer");
851 splx(s);
852 /*NOTREACHED*/
853 break;
854
855 default:
856 panic("getblk: %d unknown operation 1", operation);
857 /*NOTREACHED*/
858 break;
859 }
860 } else {
861 /* not busy */
862 SET(bp->b_flags, (B_BUSY | B_CACHE));
863 bremfree(bp);
864 bufstats.bufs_incore++;
865 splx(s);
866
867 allocbuf(bp, size);
868 if (ISSET(bp->b_flags, B_PAGELIST))
869 panic("pagelist buffer is not busy");
870
871 switch (operation) {
872 case BLK_READ:
873 case BLK_WRITE:
874 if (UBCISVALID(bp->b_vp) && bp->b_bufsize) {
875 kret = ubc_create_upl(vp,
876 ubc_blktooff(vp, bp->b_lblkno),
877 bp->b_bufsize,
878 &upl,
879 &pl,
880 UPL_PRECIOUS);
881 if (kret != KERN_SUCCESS)
882 panic("Failed to get pagelists");
883
884 SET(bp->b_flags, B_PAGELIST);
885 bp->b_pagelist = upl;
886
887 if ( !upl_valid_page(pl, 0))
888 panic("getblk: incore buffer without valid page");
889
890 if (upl_dirty_page(pl, 0))
891 SET(bp->b_flags, B_WASDIRTY);
892 else
893 CLR(bp->b_flags, B_WASDIRTY);
894
895 kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_data));
896 if (kret != KERN_SUCCESS) {
897 panic("getblk: ubc_upl_map() failed with (%d)",
898 kret);
899 }
900 if (bp->b_data == 0) panic("ubc_upl_map mapped 0");
901 }
902 break;
903
904 case BLK_META:
905 /*
906 * VM is not involved in IO for the meta data
907 * buffer already has valid data
908 */
909 if(bp->b_data == 0)
910 panic("bp->b_data null incore buf=%x", bp);
911 break;
912
913 case BLK_PAGEIN:
914 case BLK_PAGEOUT:
915 panic("getblk: paging operation 1");
916 break;
917
918 default:
919 panic("getblk: %d unknown operation 2", operation);
920 /*NOTREACHED*/
921 break;
922 }
923 }
924 } else { /* not incore() */
925 int queue = BQ_EMPTY; /* Start with no preference */
926 splx(s);
927
928 if ((operation == BLK_META) || (UBCINVALID(vp)) ||
929 !(UBCINFOEXISTS(vp))) {
930 operation = BLK_META;
931 }
932 if ((bp = getnewbuf(slpflag, slptimeo, &queue)) == NULL)
933 goto start;
934 if (incore(vp, blkno)) {
935 SET(bp->b_flags, B_INVAL);
936 binshash(bp, &invalhash);
937 brelse(bp);
938 goto start;
939 }
940
941 /*
942 * if it is meta, the queue may be set to other
943 * type so reset as well as mark it to be B_META
944 * so that when buffer is released it will goto META queue
945 * Also, if the vnode is not VREG, then it is META
946 */
947 if (operation == BLK_META) {
948 SET(bp->b_flags, B_META);
949 queue = BQ_META;
950 }
951 /*
952 * Insert in the hash so that incore() can find it
953 */
954 binshash(bp, BUFHASH(vp, blkno));
955
956 allocbuf(bp, size);
957
958 switch (operation) {
959 case BLK_META:
960 /* buffer data is invalid */
961
962 #if !ZALLOC_METADATA
963 if (bp->b_data)
964 panic("bp->b_data is not nul; %x",bp);
965 kret = kmem_alloc(kernel_map,
966 &bp->b_data, bp->b_bufsize);
967 if (kret != KERN_SUCCESS)
968 panic("getblk: kmem_alloc() returned %d", kret);
969 #endif /* ZALLOC_METADATA */
970
971 if(bp->b_data == 0)
972 panic("bp->b_data is null %x",bp);
973
974 bp->b_blkno = bp->b_lblkno = blkno;
975 s = splbio();
976 bgetvp(vp, bp);
977 bufstats.bufs_miss++;
978 splx(s);
979 if (bp->b_data == 0)
980 panic("b_data is 0: 2");
981
982 /* wakeup the buffer */
983 CLR(bp->b_flags, B_WANTED);
984 wakeup(bp);
985 break;
986
987 case BLK_READ:
988 case BLK_WRITE:
989
990 if (ISSET(bp->b_flags, B_PAGELIST))
991 panic("B_PAGELIST in bp=%x",bp);
992
993 kret = ubc_create_upl(vp,
994 ubc_blktooff(vp, blkno),
995 bp->b_bufsize,
996 &upl,
997 &pl,
998 UPL_PRECIOUS);
999 if (kret != KERN_SUCCESS)
1000 panic("Failed to get pagelists");
1001
1002 #ifdef UBC_DEBUG
1003 upl_ubc_alias_set(upl, bp, 4);
1004 #endif /* UBC_DEBUG */
1005 bp->b_blkno = bp->b_lblkno = blkno;
1006 bp->b_pagelist = upl;
1007
1008 SET(bp->b_flags, B_PAGELIST);
1009
1010 if (upl_valid_page(pl, 0)) {
1011 SET(bp->b_flags, B_CACHE | B_DONE);
1012 bufstats.bufs_vmhits++;
1013
1014 pagedirty = upl_dirty_page(pl, 0);
1015
1016 if (pagedirty)
1017 SET(bp->b_flags, B_WASDIRTY);
1018
1019 if (vp->v_tag == VT_NFS) {
1020 off_t f_offset;
1021 int valid_size;
1022
1023 bp->b_validoff = 0;
1024 bp->b_dirtyoff = 0;
1025
1026 f_offset = ubc_blktooff(vp, blkno);
1027
1028 if (f_offset > vp->v_ubcinfo->ui_size) {
1029 CLR(bp->b_flags, (B_CACHE|B_DONE|B_WASDIRTY));
1030 bp->b_validend = 0;
1031 bp->b_dirtyend = 0;
1032 } else {
1033 valid_size = min(((unsigned int)(vp->v_ubcinfo->ui_size - f_offset)), PAGE_SIZE);
1034 bp->b_validend = valid_size;
1035
1036 if (pagedirty)
1037 bp->b_dirtyend = valid_size;
1038 else
1039 bp->b_dirtyend = 0;
1040
1041 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 386)) | DBG_FUNC_NONE,
1042 bp->b_validend, bp->b_dirtyend,
1043 (int)vp->v_ubcinfo->ui_size, 0, 0);
1044 }
1045 } else {
1046 bp->b_validoff = 0;
1047 bp->b_dirtyoff = 0;
1048
1049 if (pagedirty) {
1050 /* page is dirty */
1051 bp->b_validend = bp->b_bcount;
1052 bp->b_dirtyend = bp->b_bcount;
1053 } else {
1054 /* page is clean */
1055 bp->b_validend = bp->b_bcount;
1056 bp->b_dirtyend = 0;
1057 }
1058 }
1059 if (error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL)) {
1060 panic("VOP_BMAP failed in getblk");
1061 /*NOTREACHED*/
1062 /*
1063 * XXX: We probably should invalidate the VM Page
1064 */
1065 bp->b_error = error;
1066 SET(bp->b_flags, (B_ERROR | B_INVAL));
1067 /* undo B_DONE that was set before upl_commit() */
1068 CLR(bp->b_flags, B_DONE);
1069 brelse(bp);
1070 return (0);
1071 }
1072 } else {
1073 bufstats.bufs_miss++;
1074 }
1075 kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_data));
1076 if (kret != KERN_SUCCESS) {
1077 panic("getblk: ubc_upl_map() "
1078 "failed with (%d)", kret);
1079 }
1080 if (bp->b_data == 0) panic("kernel_upl_map mapped 0");
1081
1082 s = splbio();
1083 bgetvp(vp, bp);
1084 splx(s);
1085
1086 break;
1087
1088 case BLK_PAGEIN:
1089 case BLK_PAGEOUT:
1090 panic("getblk: paging operation 2");
1091 break;
1092 default:
1093 panic("getblk: %d unknown operation 3", operation);
1094 /*NOTREACHED*/
1095 break;
1096 }
1097 }
1098
1099 if (bp->b_data == NULL)
1100 panic("getblk: bp->b_addr is null");
1101
1102 if (bp->b_bufsize & 0xfff) {
1103 #if ZALLOC_METADATA
1104 if (ISSET(bp->b_flags, B_META) && (bp->b_bufsize & 0x1ff))
1105 #endif /* ZALLOC_METADATA */
1106 panic("getblk: bp->b_bufsize = %d", bp->b_bufsize);
1107 }
1108
1109 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 386)) | DBG_FUNC_END,
1110 bp, bp->b_data, bp->b_flags, 3, 0);
1111
1112 return (bp);
1113 }
1114
1115 /*
1116 * Get an empty, disassociated buffer of given size.
1117 */
1118 struct buf *
1119 geteblk(size)
1120 int size;
1121 {
1122 struct buf *bp;
1123 int queue = BQ_EMPTY;
1124 #if !ZALLOC_METADATA
1125 kern_return_t kret;
1126 vm_size_t desired_size = roundup(size, CLBYTES);
1127
1128 if (desired_size > MAXBSIZE)
1129 panic("geteblk: buffer larger than MAXBSIZE requested");
1130 #endif /* ZALLOC_METADATA */
1131
1132 while ((bp = getnewbuf(0, 0, &queue)) == 0)
1133 ;
1134 #if ZALLOC_METADATA
1135 SET(bp->b_flags, (B_META|B_INVAL));
1136 #else
1137 SET(bp->b_flags, B_INVAL);
1138 #endif /* ZALLOC_METADATA */
1139
1140 #if DIAGNOSTIC
1141 assert(queue == BQ_EMPTY);
1142 #endif /* DIAGNOSTIC */
1143 /* XXX need to implement logic to deal with other queues */
1144
1145 #if !ZALLOC_METADATA
1146 /* Empty buffer - allocate pages */
1147 kret = kmem_alloc_aligned(kernel_map, &bp->b_data, desired_size);
1148 if (kret != KERN_SUCCESS)
1149 panic("geteblk: kmem_alloc_aligned returned %d", kret);
1150 #endif /* ZALLOC_METADATA */
1151
1152 binshash(bp, &invalhash);
1153 allocbuf(bp, size);
1154 bufstats.bufs_eblk++;
1155
1156 return (bp);
1157 }
1158
1159 #if ZALLOC_METADATA
1160 /*
1161 * Zones for the meta data buffers
1162 */
1163
1164 #define MINMETA 512
1165 #define MAXMETA 4096
1166
1167 struct meta_zone_entry {
1168 zone_t mz_zone;
1169 vm_size_t mz_size;
1170 vm_size_t mz_max;
1171 char *mz_name;
1172 };
1173
1174 struct meta_zone_entry meta_zones[] = {
1175 {NULL, (MINMETA * 1), 128 * (MINMETA * 1), "buf.512" },
1176 {NULL, (MINMETA * 2), 64 * (MINMETA * 2), "buf.1024" },
1177 {NULL, (MINMETA * 3), 16 * (MINMETA * 3), "buf.1536" },
1178 {NULL, (MINMETA * 4), 16 * (MINMETA * 4), "buf.2048" },
1179 {NULL, (MINMETA * 5), 16 * (MINMETA * 5), "buf.2560" },
1180 {NULL, (MINMETA * 6), 16 * (MINMETA * 6), "buf.3072" },
1181 {NULL, (MINMETA * 7), 16 * (MINMETA * 7), "buf.3584" },
1182 {NULL, (MINMETA * 8), 512 * (MINMETA * 8), "buf.4096" },
1183 {NULL, 0, 0, "" } /* End */
1184 };
1185
1186 /*
1187 * Initialize the meta data zones
1188 */
1189 static void
1190 bufzoneinit(void)
1191 {
1192 int i;
1193
1194 for (i = 0; meta_zones[i].mz_size != 0; i++) {
1195 meta_zones[i].mz_zone =
1196 zinit(meta_zones[i].mz_size,
1197 meta_zones[i].mz_max,
1198 PAGE_SIZE,
1199 meta_zones[i].mz_name);
1200 }
1201 }
1202
1203 static zone_t
1204 getbufzone(size_t size)
1205 {
1206 int i;
1207
1208 if (size % 512)
1209 panic("getbufzone: incorect size = %d", size);
1210
1211 i = (size / 512) - 1;
1212 return (meta_zones[i].mz_zone);
1213 }
1214 #endif /* ZALLOC_METADATA */
1215
1216 /*
1217 * With UBC, there is no need to expand / shrink the file data
1218 * buffer. The VM uses the same pages, hence no waste.
1219 * All the file data buffers can have one size.
1220 * In fact expand / shrink would be an expensive operation.
1221 *
1222 * Only exception to this is meta-data buffers. Most of the
1223 * meta data operations are smaller than PAGE_SIZE. Having the
1224 * meta-data buffers grow and shrink as needed, optimizes use
1225 * of the kernel wired memory.
1226 */
1227
1228 int
1229 allocbuf(bp, size)
1230 struct buf *bp;
1231 int size;
1232 {
1233 vm_size_t desired_size;
1234
1235 desired_size = roundup(size, CLBYTES);
1236
1237 if(desired_size < PAGE_SIZE)
1238 desired_size = PAGE_SIZE;
1239 if (desired_size > MAXBSIZE)
1240 panic("allocbuf: buffer larger than MAXBSIZE requested");
1241
1242 #if ZALLOC_METADATA
1243 if (ISSET(bp->b_flags, B_META)) {
1244 kern_return_t kret;
1245 zone_t zprev, z;
1246 size_t nsize = roundup(size, MINMETA);
1247
1248 if (bp->b_data) {
1249 vm_offset_t elem = (vm_offset_t)bp->b_data;
1250
1251 if (ISSET(bp->b_flags, B_ZALLOC))
1252 if (bp->b_bufsize <= MAXMETA) {
1253 if (bp->b_bufsize < nsize) {
1254 /* reallocate to a bigger size */
1255 desired_size = nsize;
1256
1257 zprev = getbufzone(bp->b_bufsize);
1258 z = getbufzone(nsize);
1259 bp->b_data = (caddr_t)zalloc(z);
1260 if(bp->b_data == 0)
1261 panic("allocbuf: zalloc() returned NULL");
1262 bcopy(elem, bp->b_data, bp->b_bufsize);
1263 zfree(zprev, elem);
1264 } else {
1265 desired_size = bp->b_bufsize;
1266 }
1267 } else
1268 panic("allocbuf: B_ZALLOC set incorrectly");
1269 else
1270 if (bp->b_bufsize < desired_size) {
1271 /* reallocate to a bigger size */
1272 kret = kmem_alloc(kernel_map, &bp->b_data, desired_size);
1273 if (kret != KERN_SUCCESS)
1274 panic("allocbuf: kmem_alloc() returned %d", kret);
1275 if(bp->b_data == 0)
1276 panic("allocbuf: null b_data");
1277 bcopy(elem, bp->b_data, bp->b_bufsize);
1278 kmem_free(kernel_map, elem, bp->b_bufsize);
1279 } else {
1280 desired_size = bp->b_bufsize;
1281 }
1282 } else {
1283 /* new allocation */
1284 if (nsize <= MAXMETA) {
1285 desired_size = nsize;
1286 z = getbufzone(nsize);
1287 bp->b_data = (caddr_t)zalloc(z);
1288 if(bp->b_data == 0)
1289 panic("allocbuf: zalloc() returned NULL 2");
1290 SET(bp->b_flags, B_ZALLOC);
1291 } else {
1292 kret = kmem_alloc(kernel_map, &bp->b_data, desired_size);
1293 if (kret != KERN_SUCCESS)
1294 panic("allocbuf: kmem_alloc() 2 returned %d", kret);
1295 if(bp->b_data == 0)
1296 panic("allocbuf: null b_data 2");
1297 }
1298 }
1299 }
1300
1301 if (ISSET(bp->b_flags, B_META) && (bp->b_data == 0))
1302 panic("allocbuf: bp->b_data is NULL");
1303 #endif /* ZALLOC_METADATA */
1304
1305 bp->b_bufsize = desired_size;
1306 bp->b_bcount = size;
1307 }
1308
1309 /*
1310 * Get a new buffer from one of the free lists.
1311 *
1312 * Request for a queue is passes in. The queue from which the buffer was taken
1313 * from is returned. Out of range queue requests get BQ_EMPTY. Request for
1314 * BQUEUE means no preference. Use heuristics in that case.
1315 * Heuristics is as follows:
1316 * Try BQ_AGE, BQ_LRU, BQ_EMPTY, BQ_META in that order.
1317 * If none available block till one is made available.
1318 * If buffers available on both BQ_AGE and BQ_LRU, check the timestamps.
1319 * Pick the most stale buffer.
1320 * If found buffer was marked delayed write, start the async. write
1321 * and restart the search.
1322 * Initialize the fields and disassociate the buffer from the vnode.
1323 * Remove the buffer from the hash. Return the buffer and the queue
1324 * on which it was found.
1325 */
1326
1327 static struct buf *
1328 getnewbuf(slpflag, slptimeo, queue)
1329 int slpflag, slptimeo;
1330 int *queue;
1331 {
1332 register struct buf *bp;
1333 register struct buf *lru_bp;
1334 register struct buf *age_bp;
1335 register struct buf *meta_bp;
1336 register int age_time, lru_time, bp_time, meta_time;
1337 int s;
1338 struct ucred *cred;
1339 int req = *queue; /* save it for restarts */
1340
1341 start:
1342 s = splbio();
1343
1344 /* invalid request gets empty queue */
1345 if ((*queue > BQUEUES) || (*queue < 0))
1346 *queue = BQ_EMPTY;
1347
1348 /* (*queue == BQUEUES) means no preference */
1349 if (*queue != BQUEUES) {
1350 /* Try for the requested queue first */
1351 bp = bufqueues[*queue].tqh_first;
1352 if (bp)
1353 goto found;
1354 }
1355
1356 /* Unable to use requested queue */
1357 age_bp = bufqueues[BQ_AGE].tqh_first;
1358 lru_bp = bufqueues[BQ_LRU].tqh_first;
1359 meta_bp = bufqueues[BQ_META].tqh_first;
1360
1361 if (!age_bp && !lru_bp && !meta_bp) { /* Unavailble on AGE or LRU */
1362 /* Try the empty list first */
1363 bp = bufqueues[BQ_EMPTY].tqh_first;
1364 if (bp) {
1365 *queue = BQ_EMPTY;
1366 goto found;
1367 }
1368 #if DIAGNOSTIC
1369 /* with UBC this is a fatal condition */
1370 panic("getnewbuf: No useful buffers");
1371 #else
1372 /* Log this error condition */
1373 printf("getnewbuf: No useful buffers");
1374 #endif /* DIAGNOSTIC */
1375
1376 /* wait for a free buffer of any kind */
1377 needbuffer = 1;
1378 bufstats.bufs_sleeps++;
1379 tsleep(&needbuffer, slpflag|(PRIBIO+1), "getnewbuf", slptimeo);
1380 splx(s);
1381 return (0);
1382 }
1383
1384 /* Buffer available either on AGE or LRU or META */
1385 bp = NULL;
1386 *queue = -1;
1387
1388 /* Buffer available either on AGE or LRU */
1389 if (!age_bp) {
1390 bp = lru_bp;
1391 *queue = BQ_LRU;
1392 } else if (!lru_bp) {
1393 bp = age_bp;
1394 *queue = BQ_AGE;
1395 } else { /* buffer available on both AGE and LRU */
1396 age_time = time.tv_sec - age_bp->b_timestamp;
1397 lru_time = time.tv_sec - lru_bp->b_timestamp;
1398 if ((age_time < 0) || (lru_time < 0)) { /* time set backwards */
1399 bp = age_bp;
1400 *queue = BQ_AGE;
1401 /*
1402 * we should probably re-timestamp eveything in the
1403 * queues at this point with the current time
1404 */
1405 } else {
1406 if ((lru_time >= lru_is_stale) && (age_time < age_is_stale)) {
1407 bp = lru_bp;
1408 *queue = BQ_LRU;
1409 } else {
1410 bp = age_bp;
1411 *queue = BQ_AGE;
1412 }
1413 }
1414 }
1415
1416 if (!bp) { /* Neither on AGE nor on LRU */
1417 bp = meta_bp;
1418 *queue = BQ_META;
1419 } else if (meta_bp) {
1420 bp_time = time.tv_sec - bp->b_timestamp;
1421 meta_time = time.tv_sec - meta_bp->b_timestamp;
1422
1423 if (!(bp_time < 0) && !(meta_time < 0)) {
1424 /* time not set backwards */
1425 int bp_is_stale;
1426 bp_is_stale = (*queue == BQ_LRU) ?
1427 lru_is_stale : age_is_stale;
1428
1429 if ((meta_time >= meta_is_stale) &&
1430 (bp_time < bp_is_stale)) {
1431 bp = meta_bp;
1432 *queue = BQ_META;
1433 }
1434 }
1435 }
1436
1437 if (bp == NULL)
1438 panic("getnewbuf: null bp");
1439
1440 found:
1441 if (bp->b_hash.le_prev == (struct buf **)0xdeadbeef)
1442 panic("getnewbuf: le_prev is deadbeef");
1443
1444 if(ISSET(bp->b_flags, B_BUSY))
1445 panic("getnewbuf reusing BUSY buf");
1446
1447 /* Clean it */
1448 if (bcleanbuf(bp)) {
1449 /* bawrite() issued, buffer not ready */
1450 splx(s);
1451 *queue = req;
1452 goto start;
1453 }
1454 splx(s);
1455 return (bp);
1456 }
1457 #include <mach/mach_types.h>
1458 #include <mach/memory_object_types.h>
1459
1460 /*
1461 * Clean a buffer.
1462 * Returns 0 is buffer is ready to use,
1463 * Returns 1 if issued a bawrite() to indicate
1464 * that the buffer is not ready.
1465 */
1466 int
1467 bcleanbuf(struct buf *bp)
1468 {
1469 int s;
1470 struct ucred *cred;
1471
1472 s = splbio();
1473
1474 /* Remove from the queue */
1475 bremfree(bp);
1476
1477 /* Buffer is no longer on free lists. */
1478 SET(bp->b_flags, B_BUSY);
1479
1480 if (bp->b_hash.le_prev == (struct buf **)0xdeadbeef)
1481 panic("bcleanbuf: le_prev is deadbeef");
1482
1483 /* If buffer was a delayed write, start it, and return 1 */
1484 if (ISSET(bp->b_flags, B_DELWRI)) {
1485 splx(s);
1486 bawrite (bp);
1487 return (1);
1488 }
1489
1490 if (bp->b_vp)
1491 brelvp(bp);
1492 bremhash(bp);
1493 BLISTNONE(bp);
1494
1495 splx(s);
1496
1497 if (ISSET(bp->b_flags, B_META)) {
1498 #if ZALLOC_METADATA
1499 vm_offset_t elem = (vm_offset_t)bp->b_data;
1500 if (elem == 0)
1501 panic("bcleanbuf: NULL bp->b_data B_META buffer");
1502
1503 if (ISSET(bp->b_flags, B_ZALLOC)) {
1504 if (bp->b_bufsize <= MAXMETA) {
1505 zone_t z;
1506
1507 z = getbufzone(bp->b_bufsize);
1508 bp->b_data = (caddr_t)0xdeadbeef;
1509 zfree(z, elem);
1510 CLR(bp->b_flags, B_ZALLOC);
1511 } else
1512 panic("bcleanbuf: B_ZALLOC set incorrectly");
1513 } else {
1514 bp->b_data = (caddr_t)0xdeadbeef;
1515 kmem_free(kernel_map, elem, bp->b_bufsize);
1516 }
1517 #else
1518 if (bp->b_data == 0)
1519 panic("bcleanbuf: bp->b_data == NULL for B_META buffer");
1520
1521 kmem_free(kernel_map, bp->b_data, bp->b_bufsize);
1522 #endif /* ZALLOC_METADATA */
1523 }
1524
1525 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
1526
1527 /* disassociate us from our vnode, if we had one... */
1528 s = splbio();
1529
1530 /* clear out various other fields */
1531 bp->b_bufsize = 0;
1532 bp->b_data = 0;
1533 bp->b_flags = B_BUSY;
1534 bp->b_dev = NODEV;
1535 bp->b_blkno = bp->b_lblkno = 0;
1536 bp->b_iodone = 0;
1537 bp->b_error = 0;
1538 bp->b_resid = 0;
1539 bp->b_bcount = 0;
1540 bp->b_dirtyoff = bp->b_dirtyend = 0;
1541 bp->b_validoff = bp->b_validend = 0;
1542
1543 /* nuke any credentials we were holding */
1544 cred = bp->b_rcred;
1545 if (cred != NOCRED) {
1546 bp->b_rcred = NOCRED;
1547 crfree(cred);
1548 }
1549 cred = bp->b_wcred;
1550 if (cred != NOCRED) {
1551 bp->b_wcred = NOCRED;
1552 crfree(cred);
1553 }
1554 splx(s);
1555 return (0);
1556 }
1557
1558
1559 /*
1560 * Wait for operations on the buffer to complete.
1561 * When they do, extract and return the I/O's error value.
1562 */
1563 int
1564 biowait(bp)
1565 struct buf *bp;
1566 {
1567 upl_t upl;
1568 upl_page_info_t *pl;
1569 int s;
1570 kern_return_t kret;
1571
1572 s = splbio();
1573 while (!ISSET(bp->b_flags, B_DONE))
1574 tsleep(bp, PRIBIO + 1, "biowait", 0);
1575 splx(s);
1576
1577 /* check for interruption of I/O (e.g. via NFS), then errors. */
1578 if (ISSET(bp->b_flags, B_EINTR)) {
1579 CLR(bp->b_flags, B_EINTR);
1580 return (EINTR);
1581 } else if (ISSET(bp->b_flags, B_ERROR))
1582 return (bp->b_error ? bp->b_error : EIO);
1583 else
1584 return (0);
1585 }
1586
1587 /*
1588 * Mark I/O complete on a buffer.
1589 *
1590 * If a callback has been requested, e.g. the pageout
1591 * daemon, do so. Otherwise, awaken waiting processes.
1592 *
1593 * [ Leffler, et al., says on p.247:
1594 * "This routine wakes up the blocked process, frees the buffer
1595 * for an asynchronous write, or, for a request by the pagedaemon
1596 * process, invokes a procedure specified in the buffer structure" ]
1597 *
1598 * In real life, the pagedaemon (or other system processes) wants
1599 * to do async stuff to, and doesn't want the buffer brelse()'d.
1600 * (for swap pager, that puts swap buffers on the free lists (!!!),
1601 * for the vn device, that puts malloc'd buffers on the free lists!)
1602 */
1603 void
1604 biodone(bp)
1605 struct buf *bp;
1606 {
1607 boolean_t funnel_state;
1608 int s;
1609
1610 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1611
1612 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 387)) | DBG_FUNC_START,
1613 bp, bp->b_data, bp->b_flags, 0, 0);
1614
1615 if (ISSET(bp->b_flags, B_DONE))
1616 panic("biodone already");
1617 SET(bp->b_flags, B_DONE); /* note that it's done */
1618 /*
1619 * I/O was done, so don't believe
1620 * the DIRTY state from VM anymore
1621 */
1622 CLR(bp->b_flags, B_WASDIRTY);
1623
1624 if (!ISSET(bp->b_flags, B_READ) && !ISSET(bp->b_flags, B_RAW))
1625 vwakeup(bp); /* wake up reader */
1626
1627 if (ISSET(bp->b_flags, B_CALL)) { /* if necessary, call out */
1628 CLR(bp->b_flags, B_CALL); /* but note callout done */
1629 (*bp->b_iodone)(bp);
1630 } else if (ISSET(bp->b_flags, B_ASYNC)) /* if async, release it */
1631 brelse(bp);
1632 else { /* or just wakeup the buffer */
1633 CLR(bp->b_flags, B_WANTED);
1634 wakeup(bp);
1635 }
1636
1637 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 387)) | DBG_FUNC_END,
1638 bp, bp->b_data, bp->b_flags, 0, 0);
1639
1640 thread_funnel_set(kernel_flock, funnel_state);
1641 }
1642
1643 /*
1644 * Return a count of buffers on the "locked" queue.
1645 */
1646 int
1647 count_lock_queue()
1648 {
1649 register struct buf *bp;
1650 register int n = 0;
1651
1652 for (bp = bufqueues[BQ_LOCKED].tqh_first; bp;
1653 bp = bp->b_freelist.tqe_next)
1654 n++;
1655 return (n);
1656 }
1657
1658 /*
1659 * Return a count of 'busy' buffers. Used at the time of shutdown.
1660 */
1661 int
1662 count_busy_buffers()
1663 {
1664 register struct buf *bp;
1665 register int nbusy = 0;
1666
1667 for (bp = &buf[nbuf]; --bp >= buf; )
1668 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY)
1669 nbusy++;
1670 return (nbusy);
1671 }
1672
1673 #if 1 /*DIAGNOSTIC */
1674 /*
1675 * Print out statistics on the current allocation of the buffer pool.
1676 * Can be enabled to print out on every ``sync'' by setting "syncprt"
1677 * in vfs_syscalls.c using sysctl.
1678 */
1679 void
1680 vfs_bufstats()
1681 {
1682 int s, i, j, count;
1683 register struct buf *bp;
1684 register struct bqueues *dp;
1685 int counts[MAXBSIZE/CLBYTES+1];
1686 static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY", "META" };
1687
1688 for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) {
1689 count = 0;
1690 for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
1691 counts[j] = 0;
1692 s = splbio();
1693 for (bp = dp->tqh_first; bp; bp = bp->b_freelist.tqe_next) {
1694 counts[bp->b_bufsize/CLBYTES]++;
1695 count++;
1696 }
1697 splx(s);
1698 printf("%s: total-%d", bname[i], count);
1699 for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
1700 if (counts[j] != 0)
1701 printf(", %d-%d", j * CLBYTES, counts[j]);
1702 printf("\n");
1703 }
1704 }
1705 #endif /* DIAGNOSTIC */
1706
1707 #define NRESERVEDIOBUFS 16
1708
1709 struct buf *
1710 alloc_io_buf(vp, priv)
1711 struct vnode *vp;
1712 int priv;
1713 {
1714 register struct buf *bp;
1715 int s;
1716
1717 s = splbio();
1718
1719 while (niobuf - NRESERVEDIOBUFS < bufstats.bufs_iobufinuse && !priv) {
1720 need_iobuffer = 1;
1721 bufstats.bufs_iobufsleeps++;
1722 (void) tsleep(&need_iobuffer, (PRIBIO+1), "alloc_io_buf", 0);
1723 }
1724
1725 while ((bp = iobufqueue.tqh_first) == NULL) {
1726 need_iobuffer = 1;
1727 bufstats.bufs_iobufsleeps++;
1728 (void) tsleep(&need_iobuffer, (PRIBIO+1), "alloc_io_buf1", 0);
1729 }
1730
1731 TAILQ_REMOVE(&iobufqueue, bp, b_freelist);
1732 bp->b_timestamp = 0;
1733
1734 /* clear out various fields */
1735 bp->b_flags = B_BUSY;
1736 bp->b_blkno = bp->b_lblkno = 0;
1737 bp->b_iodone = 0;
1738 bp->b_error = 0;
1739 bp->b_resid = 0;
1740 bp->b_bcount = 0;
1741 bp->b_bufsize = 0;
1742 bp->b_vp = vp;
1743
1744 if (vp->v_type == VBLK || vp->v_type == VCHR)
1745 bp->b_dev = vp->v_rdev;
1746 else
1747 bp->b_dev = NODEV;
1748 bufstats.bufs_iobufinuse++;
1749 if (bufstats.bufs_iobufinuse > bufstats.bufs_iobufmax)
1750 bufstats.bufs_iobufmax = bufstats.bufs_iobufinuse;
1751 splx(s);
1752
1753 return (bp);
1754 }
1755
1756 void
1757 free_io_buf(bp)
1758 struct buf *bp;
1759 {
1760 int s;
1761
1762 s = splbio();
1763 /* put buffer back on the head of the iobufqueue */
1764 bp->b_vp = NULL;
1765 bp->b_flags = B_INVAL;
1766
1767 binsheadfree(bp, &iobufqueue, -1);
1768
1769 /* Wake up any processes waiting for any buffer to become free. */
1770 if (need_iobuffer) {
1771 need_iobuffer = 0;
1772 wakeup(&need_iobuffer);
1773 }
1774 bufstats.bufs_iobufinuse--;
1775 splx(s);
1776 }
1777
1778
1779 /* not hookedup yet */
1780
1781 /* XXX move this to a separate file */
1782 /*
1783 * Dynamic Scaling of the Buffer Queues
1784 */
1785
1786 typedef long long blsize_t;
1787
1788 blsize_t MAXNBUF; /* initialize to (mem_size / PAGE_SIZE) */
1789 /* Global tunable limits */
1790 blsize_t nbufh; /* number of buffer headers */
1791 blsize_t nbuflow; /* minimum number of buffer headers required */
1792 blsize_t nbufhigh; /* maximum number of buffer headers allowed */
1793 blsize_t nbuftarget; /* preferred number of buffer headers */
1794
1795 /*
1796 * assertions:
1797 *
1798 * 1. 0 < nbuflow <= nbufh <= nbufhigh
1799 * 2. nbufhigh <= MAXNBUF
1800 * 3. 0 < nbuflow <= nbuftarget <= nbufhigh
1801 * 4. nbufh can not be set by sysctl().
1802 */
1803
1804 /* Per queue tunable limits */
1805
1806 struct bufqlim {
1807 blsize_t bl_nlow; /* minimum number of buffer headers required */
1808 blsize_t bl_num; /* number of buffer headers on the queue */
1809 blsize_t bl_nlhigh; /* maximum number of buffer headers allowed */
1810 blsize_t bl_target; /* preferred number of buffer headers */
1811 long bl_stale; /* Seconds after which a buffer is considered stale */
1812 } bufqlim[BQUEUES];
1813
1814 /*
1815 * assertions:
1816 *
1817 * 1. 0 <= bl_nlow <= bl_num <= bl_nlhigh
1818 * 2. bl_nlhigh <= MAXNBUF
1819 * 3. bufqlim[BQ_META].bl_nlow != 0
1820 * 4. bufqlim[BQ_META].bl_nlow > (number of possible concurrent
1821 * file system IO operations)
1822 * 5. bl_num can not be set by sysctl().
1823 * 6. bl_nhigh <= nbufhigh
1824 */
1825
1826 /*
1827 * Rationale:
1828 * ----------
1829 * Defining it blsize_t as long permits 2^31 buffer headers per queue.
1830 * Which can describe (2^31 * PAGE_SIZE) memory per queue.
1831 *
1832 * These limits are exported to by means of sysctl().
1833 * It was decided to define blsize_t as a 64 bit quantity.
1834 * This will make sure that we will not be required to change it
1835 * as long as we do not exceed 64 bit address space for the kernel.
1836 *
1837 * low and high numbers parameters initialized at compile time
1838 * and boot arguments can be used to override them. sysctl()
1839 * would not change the value. sysctl() can get all the values
1840 * but can set only target. num is the current level.
1841 *
1842 * Advantages of having a "bufqscan" thread doing the balancing are,
1843 * Keep enough bufs on BQ_EMPTY.
1844 * getnewbuf() by default will always select a buffer from the BQ_EMPTY.
1845 * getnewbuf() perfoms best if a buffer was found there.
1846 * Also this minimizes the possibility of starting IO
1847 * from getnewbuf(). That's a performance win, too.
1848 *
1849 * Localize complex logic [balancing as well as time aging]
1850 * to balancebufq().
1851 *
1852 * Simplify getnewbuf() logic by elimination of time aging code.
1853 */
1854
1855 /*
1856 * Algorithm:
1857 * -----------
1858 * The goal of the dynamic scaling of the buffer queues to to keep
1859 * the size of the LRU close to bl_target. Buffers on a queue would
1860 * be time aged.
1861 *
1862 * There would be a thread which will be responsible for "balancing"
1863 * the buffer cache queues.
1864 *
1865 * The scan order would be: AGE, LRU, META, EMPTY.
1866 */
1867
1868 long bufqscanwait = 0;
1869
1870 extern void bufqscan_thread();
1871 extern int balancebufq(int q);
1872 extern int btrimempty(int n);
1873 extern int initbufqscan(void);
1874 extern int nextbufq(int q);
1875 extern void buqlimprt(int all);
1876
1877 void
1878 bufq_balance_thread_init()
1879 {
1880
1881 if (bufqscanwait++ == 0) {
1882 int i;
1883
1884 /* Initalize globals */
1885 MAXNBUF = (mem_size / PAGE_SIZE);
1886 nbufh = nbuf;
1887 nbuflow = min(nbufh, 100);
1888 nbufhigh = min(MAXNBUF, max(nbufh, 2048));
1889 nbuftarget = (mem_size >> 5) / PAGE_SIZE;
1890 nbuftarget = max(nbuflow, nbuftarget);
1891 nbuftarget = min(nbufhigh, nbuftarget);
1892
1893 /*
1894 * Initialize the bufqlim
1895 */
1896
1897 /* LOCKED queue */
1898 bufqlim[BQ_LOCKED].bl_nlow = 0;
1899 bufqlim[BQ_LOCKED].bl_nlhigh = 32;
1900 bufqlim[BQ_LOCKED].bl_target = 0;
1901 bufqlim[BQ_LOCKED].bl_stale = 30;
1902
1903 /* LRU queue */
1904 bufqlim[BQ_LRU].bl_nlow = 0;
1905 bufqlim[BQ_LRU].bl_nlhigh = nbufhigh/4;
1906 bufqlim[BQ_LRU].bl_target = nbuftarget/4;
1907 bufqlim[BQ_LRU].bl_stale = LRU_IS_STALE;
1908
1909 /* AGE queue */
1910 bufqlim[BQ_AGE].bl_nlow = 0;
1911 bufqlim[BQ_AGE].bl_nlhigh = nbufhigh/4;
1912 bufqlim[BQ_AGE].bl_target = nbuftarget/4;
1913 bufqlim[BQ_AGE].bl_stale = AGE_IS_STALE;
1914
1915 /* EMPTY queue */
1916 bufqlim[BQ_EMPTY].bl_nlow = 0;
1917 bufqlim[BQ_EMPTY].bl_nlhigh = nbufhigh/4;
1918 bufqlim[BQ_EMPTY].bl_target = nbuftarget/4;
1919 bufqlim[BQ_EMPTY].bl_stale = 600000;
1920
1921 /* META queue */
1922 bufqlim[BQ_META].bl_nlow = 0;
1923 bufqlim[BQ_META].bl_nlhigh = nbufhigh/4;
1924 bufqlim[BQ_META].bl_target = nbuftarget/4;
1925 bufqlim[BQ_META].bl_stale = META_IS_STALE;
1926
1927 buqlimprt(1);
1928 }
1929
1930 /* create worker thread */
1931 kernel_thread(kernel_task, bufqscan_thread);
1932 }
1933
1934 /* The workloop for the buffer balancing thread */
1935 void
1936 bufqscan_thread()
1937 {
1938 boolean_t funnel_state;
1939 int moretodo = 0;
1940
1941 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1942
1943 for(;;) {
1944 do {
1945 int q; /* buffer queue to process */
1946
1947 for (q = initbufqscan(); q; ) {
1948 moretodo |= balancebufq(q);
1949 q = nextbufq(q);
1950 }
1951 } while (moretodo);
1952
1953 #if 1 || DIAGNOSTIC
1954 vfs_bufstats();
1955 buqlimprt(0);
1956 #endif
1957 (void)tsleep((void *)&bufqscanwait, PRIBIO, "bufqscanwait", 60 * hz);
1958 moretodo = 0;
1959 }
1960
1961 (void) thread_funnel_set(kernel_flock, FALSE);
1962 }
1963
1964 /* Seed for the buffer queue balancing */
1965 int
1966 initbufqscan()
1967 {
1968 /* Start with AGE queue */
1969 return (BQ_AGE);
1970 }
1971
1972 /* Pick next buffer queue to balance */
1973 int
1974 nextbufq(int q)
1975 {
1976 int order[] = { BQ_AGE, BQ_LRU, BQ_META, BQ_EMPTY, 0 };
1977
1978 q++;
1979 q %= sizeof(order);
1980 return (order[q]);
1981 }
1982
1983 /* function to balance the buffer queues */
1984 int
1985 balancebufq(int q)
1986 {
1987 int moretodo = 0;
1988 int s = splbio();
1989 int n;
1990
1991 /* reject invalid q */
1992 if ((q < 0) || (q >= BQUEUES))
1993 goto out;
1994
1995 /* LOCKED queue MUST not be balanced */
1996 if (q == BQ_LOCKED)
1997 goto out;
1998
1999 n = (bufqlim[q].bl_num - bufqlim[q].bl_target);
2000
2001 /* If queue has less than target nothing more to do */
2002 if (n < 0)
2003 goto out;
2004
2005 if ( n > 8 ) {
2006 /* Balance only a small amount (12.5%) at a time */
2007 n >>= 3;
2008 }
2009
2010 /* EMPTY queue needs special handling */
2011 if (q == BQ_EMPTY) {
2012 moretodo |= btrimempty(n);
2013 goto out;
2014 }
2015
2016 for (; n > 0; n--) {
2017 struct buf *bp = bufqueues[q].tqh_first;
2018 if (!bp)
2019 break;
2020
2021 /* check if it's stale */
2022 if ((time.tv_sec - bp->b_timestamp) > bufqlim[q].bl_stale) {
2023 if (bcleanbuf(bp)) {
2024 /* bawrite() issued, bp not ready */
2025 moretodo = 1;
2026 } else {
2027 /* release the cleaned buffer to BQ_EMPTY */
2028 SET(bp->b_flags, B_INVAL);
2029 brelse(bp);
2030 }
2031 } else
2032 break;
2033 }
2034
2035 out:
2036 splx(s);
2037 return (moretodo);
2038 }
2039
2040 int
2041 btrimempty(int n)
2042 {
2043 /*
2044 * When struct buf are allocated dynamically, this would
2045 * reclaim upto 'n' struct buf from the empty queue.
2046 */
2047
2048 return (0);
2049 }
2050
2051 void
2052 bufqinc(int q)
2053 {
2054 if ((q < 0) || (q >= BQUEUES))
2055 return;
2056
2057 bufqlim[q].bl_num++;
2058 return;
2059 }
2060
2061 void
2062 bufqdec(int q)
2063 {
2064 if ((q < 0) || (q >= BQUEUES))
2065 return;
2066
2067 bufqlim[q].bl_num--;
2068 return;
2069 }
2070
2071 void
2072 buqlimprt(int all)
2073 {
2074 int i;
2075 static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY", "META" };
2076
2077 if (all)
2078 for (i = 0; i < BQUEUES; i++) {
2079 printf("%s : ", bname[i]);
2080 printf("min = %d, ", (long)bufqlim[i].bl_nlow);
2081 printf("cur = %d, ", (long)bufqlim[i].bl_num);
2082 printf("max = %d, ", (long)bufqlim[i].bl_nlhigh);
2083 printf("target = %d, ", (long)bufqlim[i].bl_target);
2084 printf("stale after %d seconds\n", bufqlim[i].bl_stale);
2085 }
2086 else
2087 for (i = 0; i < BQUEUES; i++) {
2088 printf("%s : ", bname[i]);
2089 printf("cur = %d, ", (long)bufqlim[i].bl_num);
2090 }
2091 }