static struct buf *getnewbuf(int slpflag, int slptimeo, int *queue);
extern int niobuf; /* The number of IO buffer headers for cluster IO */
+int blaundrycnt;
#if TRACE
struct proc *traceproc;
#define BHASHENTCHECK(bp) \
if ((bp)->b_hash.le_prev != (struct buf **)0xdeadbeef) \
- panic("%x: b_hash.le_prev is deadb", (bp));
+ panic("%x: b_hash.le_prev is not deadbeef", (bp));
#define BLISTNONE(bp) \
(bp)->b_hash.le_next = (struct buf *)0; \
bp->b_timestamp = 0;
}
+static __inline__ void
+bufhdrinit(struct buf *bp)
+{
+ bzero((char *)bp, sizeof *bp);
+ bp->b_dev = NODEV;
+ bp->b_rcred = NOCRED;
+ bp->b_wcred = NOCRED;
+ bp->b_vnbufs.le_next = NOLIST;
+ bp->b_flags = B_INVAL;
+
+ return;
+}
+
/*
* Initialize buffers and hash links for buffers.
*/
register int i;
int metabuf;
long whichq;
-#if ZALLOC_METADATA
static void bufzoneinit();
-#endif /* ZALLOC_METADATA */
+ static void bcleanbuf_thread_init();
/* Initialize the buffer queues ('freelists') and the hash table */
for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
/* Initialize the buffer headers */
for (i = 0; i < nbuf; i++) {
bp = &buf[i];
- bzero((char *)bp, sizeof *bp);
- bp->b_dev = NODEV;
- bp->b_rcred = NOCRED;
- bp->b_wcred = NOCRED;
- bp->b_vnbufs.le_next = NOLIST;
- bp->b_flags = B_INVAL;
+ bufhdrinit(bp);
+
/*
* metabuf buffer headers on the meta-data list and
* rest of the buffer headers on the empty list
*/
- if (--metabuf )
+ if (--metabuf)
whichq = BQ_META;
else
whichq = BQ_EMPTY;
for (; i < nbuf + niobuf; i++) {
bp = &buf[i];
- bzero((char *)bp, sizeof *bp);
- bp->b_dev = NODEV;
- bp->b_rcred = NOCRED;
- bp->b_wcred = NOCRED;
- bp->b_vnbufs.le_next = NOLIST;
- bp->b_flags = B_INVAL;
+ bufhdrinit(bp);
binsheadfree(bp, &iobufqueue, -1);
}
printf("using %d buffer headers and %d cluster IO buffer headers\n",
nbuf, niobuf);
-#if ZALLOC_METADATA
- /* Set up zones for meta-data */
+ /* Set up zones used by the buffer cache */
bufzoneinit();
-#endif
-#if XXX
+ /* start the bcleanbuf() thread */
+ bcleanbuf_thread_init();
+
+#if 0 /* notyet */
/* create a thread to do dynamic buffer queue balancing */
bufq_balance_thread_init();
#endif /* XXX */
/* Start I/O for the buffer (keeping credentials). */
SET(bp->b_flags, B_READ | async);
if (cred != NOCRED && bp->b_rcred == NOCRED) {
- crhold(cred);
- bp->b_rcred = cred;
+ /*
+ * NFS has embedded ucred.
+ * Can not crhold() here as that causes zone corruption
+ */
+ bp->b_rcred = crdup(cred);
}
VOP_STRATEGY(bp);
&& UBCINFOEXISTS(bp->b_vp) && bp->b_bufsize) {
kern_return_t kret;
upl_t upl;
- upl_page_info_t *pl;
int upl_flags;
if ( !ISSET(bp->b_flags, B_PAGELIST)) {
if ( !ISSET(bp->b_flags, B_INVAL)) {
- void *object;
- off_t file_offset;
-
- object = ubc_getobject(bp->b_vp, UBC_NOREACTIVATE);
- if (object == (void *)NULL)
- panic("vmobject for vp is null");
- if (bp->b_bufsize & 0xfff)
- panic("list request is with less than 4k");
-
- file_offset = ubc_blktooff(bp->b_vp, bp->b_lblkno);
-
- kret = vm_fault_list_request(object,
- (vm_object_offset_t)file_offset, bp->b_bufsize,
- &upl, NULL, 0,
- (UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_PRECIOUS
- | UPL_SET_INTERNAL));
+ kret = ubc_create_upl(bp->b_vp,
+ ubc_blktooff(bp->b_vp, bp->b_lblkno),
+ bp->b_bufsize,
+ &upl,
+ NULL,
+ UPL_PRECIOUS);
if (kret != KERN_SUCCESS)
panic("brelse: Failed to get pagelists");
#ifdef UBC_DEBUG
upl_ubc_alias_set(upl, bp, 5);
#endif /* UBC_DEBUG */
} else
- upl = (upl_t) 0;
+ upl = (upl_t) 0;
} else {
- upl = bp->b_pagelist;
- kret = kernel_upl_unmap(kernel_map, upl);
+ upl = bp->b_pagelist;
+ kret = ubc_upl_unmap(upl);
if (kret != KERN_SUCCESS)
panic("kernel_upl_unmap failed");
bp->b_data = 0;
}
if (upl) {
- pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
-
if (bp->b_flags & (B_ERROR | B_INVAL)) {
- if (bp->b_flags & (B_READ | B_INVAL))
+ if (bp->b_flags & (B_READ | B_INVAL))
upl_flags = UPL_ABORT_DUMP_PAGES;
else
upl_flags = 0;
- kernel_upl_abort(upl, upl_flags);
+ ubc_upl_abort(upl, upl_flags);
} else {
- if (ISSET(bp->b_flags, (B_DELWRI | B_WASDIRTY)))
- upl_flags = UPL_COMMIT_SET_DIRTY | UPL_COMMIT_FREE_ON_EMPTY;
+ if (ISSET(bp->b_flags, (B_DELWRI | B_WASDIRTY)))
+ upl_flags = UPL_COMMIT_SET_DIRTY ;
else
- upl_flags = UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY;
- kernel_upl_commit_range(upl, 0, bp->b_bufsize,
- upl_flags
- | UPL_COMMIT_INACTIVATE,
- pl, MAX_UPL_TRANSFER);
+ upl_flags = UPL_COMMIT_CLEAR_DIRTY ;
+ ubc_upl_commit_range(upl, 0, bp->b_bufsize, upl_flags |
+ UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
}
s = splbio();
CLR(bp->b_flags, B_PAGELIST);
int s, err;
upl_t upl;
upl_page_info_t *pl;
- void * object;
kern_return_t kret;
- void *pager;
- off_t file_offset;
int error=0;
int pagedirty = 0;
-start:
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 386)) | DBG_FUNC_START,
blkno * PAGE_SIZE, size, operation, 0, 0);
+start:
s = splbio();
if (bp = incore(vp, blkno)) {
case BLK_READ:
case BLK_WRITE:
if (UBCISVALID(bp->b_vp) && bp->b_bufsize) {
-
- if (bp->b_bufsize & 0xfff)
- panic("list request is with less than 4k");
-
- object = ubc_getobject(vp, UBC_NOREACTIVATE);
- if (object == (void *)NULL)
- panic("vmobject for vp is null");
-
- file_offset = ubc_blktooff(vp, bp->b_lblkno);
-
- kret = vm_fault_list_request(object,
- (vm_object_offset_t)file_offset, bp->b_bufsize,
- &upl, NULL, 0,
- (UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_PRECIOUS | UPL_SET_INTERNAL));
-
+ kret = ubc_create_upl(vp,
+ ubc_blktooff(vp, bp->b_lblkno),
+ bp->b_bufsize,
+ &upl,
+ &pl,
+ UPL_PRECIOUS);
if (kret != KERN_SUCCESS)
panic("Failed to get pagelists");
SET(bp->b_flags, B_PAGELIST);
bp->b_pagelist = upl;
- pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
-
if ( !upl_valid_page(pl, 0))
panic("getblk: incore buffer without valid page");
else
CLR(bp->b_flags, B_WASDIRTY);
- kret = kernel_upl_map(kernel_map, upl, (vm_address_t *)&(bp->b_data));
+ kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_data));
if (kret != KERN_SUCCESS) {
- panic("getblk: kernel_upl_map() "
- "failed with (%d)", kret);
+ panic("getblk: ubc_upl_map() failed with (%d)",
+ kret);
}
- if (bp->b_data == 0) panic("kernel_upl_map mapped 0");
+ if (bp->b_data == 0) panic("ubc_upl_map mapped 0");
}
break;
}
if ((bp = getnewbuf(slpflag, slptimeo, &queue)) == NULL)
goto start;
+ if (incore(vp, blkno)) {
+ SET(bp->b_flags, B_INVAL);
+ binshash(bp, &invalhash);
+ brelse(bp);
+ goto start;
+ }
+
/*
* if it is meta, the queue may be set to other
* type so reset as well as mark it to be B_META
SET(bp->b_flags, B_META);
queue = BQ_META;
}
+ /*
+ * Insert in the hash so that incore() can find it
+ */
+ binshash(bp, BUFHASH(vp, blkno));
+
allocbuf(bp, size);
switch (operation) {
case BLK_META:
/* buffer data is invalid */
- /*
- * Insert in the hash so that incore() can find it
- */
- binshash(bp, BUFHASH(vp, blkno));
#if !ZALLOC_METADATA
if (bp->b_data)
panic("bp->b_data is not nul; %x",bp);
case BLK_READ:
case BLK_WRITE:
- /*
- * Insert in the hash so that incore() can find it
- */
- binshash(bp, BUFHASH(vp, blkno));
- pager = ubc_getpager(vp);
- file_offset = ubc_blktooff(vp, blkno);
-
- object = ubc_getobject(vp, UBC_NOREACTIVATE);
- if (object == (void *)NULL)
- panic("vmobject for vp is null");
- if (bp->b_bufsize & 0xfff)
- panic("list request is with less than 4k");
if (ISSET(bp->b_flags, B_PAGELIST))
panic("B_PAGELIST in bp=%x",bp);
- kret = vm_fault_list_request(object,
- (vm_object_offset_t)file_offset, bp->b_bufsize,
- &upl, NULL, 0,
- (UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_PRECIOUS | UPL_SET_INTERNAL));
-
+ kret = ubc_create_upl(vp,
+ ubc_blktooff(vp, blkno),
+ bp->b_bufsize,
+ &upl,
+ &pl,
+ UPL_PRECIOUS);
if (kret != KERN_SUCCESS)
panic("Failed to get pagelists");
bp->b_pagelist = upl;
SET(bp->b_flags, B_PAGELIST);
- pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
if (upl_valid_page(pl, 0)) {
SET(bp->b_flags, B_CACHE | B_DONE);
} else {
bufstats.bufs_miss++;
}
- kret = kernel_upl_map(kernel_map, upl, (vm_address_t *)&(bp->b_data));
+ kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_data));
if (kret != KERN_SUCCESS) {
- panic("getblk: kernel_upl_map() "
+ panic("getblk: ubc_upl_map() "
"failed with (%d)", kret);
}
if (bp->b_data == 0) panic("kernel_upl_map mapped 0");
{NULL, (MINMETA * 8), 512 * (MINMETA * 8), "buf.4096" },
{NULL, 0, 0, "" } /* End */
};
+#endif /* ZALLOC_METADATA */
+
+zone_t buf_hdr_zone;
+int buf_hdr_count;
/*
* Initialize the meta data zones
static void
bufzoneinit(void)
{
+#if ZALLOC_METADATA
int i;
for (i = 0; meta_zones[i].mz_size != 0; i++) {
PAGE_SIZE,
meta_zones[i].mz_name);
}
+#endif /* ZALLOC_METADATA */
+ buf_hdr_zone = zinit(sizeof(struct buf), 32, PAGE_SIZE, "buf headers");
}
+#if ZALLOC_METADATA
static zone_t
getbufzone(size_t size)
{
s = splbio();
/* invalid request gets empty queue */
- if ((*queue > BQUEUES) || (*queue < 0))
+ if ((*queue > BQUEUES) || (*queue < 0)
+ || (*queue == BQ_LAUNDRY) || (*queue == BQ_LOCKED))
*queue = BQ_EMPTY;
/* (*queue == BQUEUES) means no preference */
*queue = BQ_EMPTY;
goto found;
}
-#if DIAGNOSTIC
- /* with UBC this is a fatal condition */
- panic("getnewbuf: No useful buffers");
-#else
+
+ /* Create a new temparory buffer header */
+ bp = (struct buf *)zalloc(buf_hdr_zone);
+
+ if (bp) {
+ bufhdrinit(bp);
+ BLISTNONE(bp);
+ binshash(bp, &invalhash);
+ SET(bp->b_flags, B_HDRALLOC);
+ *queue = BQ_EMPTY;
+ binsheadfree(bp, &bufqueues[BQ_EMPTY], BQ_EMPTY);
+ buf_hdr_count++;
+ goto found;
+ }
+
/* Log this error condition */
printf("getnewbuf: No useful buffers");
-#endif /* DIAGNOSTIC */
-
+
/* wait for a free buffer of any kind */
needbuffer = 1;
bufstats.bufs_sleeps++;
if (bp->b_hash.le_prev == (struct buf **)0xdeadbeef)
panic("bcleanbuf: le_prev is deadbeef");
- /* If buffer was a delayed write, start it, and return 1 */
+ /*
+ * If buffer was a delayed write, start the IO by queuing
+ * it on the LAUNDRY queue, and return 1
+ */
if (ISSET(bp->b_flags, B_DELWRI)) {
splx(s);
- bawrite (bp);
+ binstailfree(bp, &bufqueues[BQ_LAUNDRY], BQ_LAUNDRY);
+ blaundrycnt++;
+ wakeup(&blaundrycnt);
return (1);
}
s = splbio();
/* clear out various other fields */
+ bp->b_bufsize = 0;
bp->b_data = 0;
bp->b_flags = B_BUSY;
bp->b_dev = NODEV;
register struct buf *bp;
register struct bqueues *dp;
int counts[MAXBSIZE/CLBYTES+1];
- static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY", "META" };
+ static char *bname[BQUEUES] =
+ { "LOCKED", "LRU", "AGE", "EMPTY", "META", "LAUNDRY" };
for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) {
count = 0;
}
#endif /* DIAGNOSTIC */
+#define NRESERVEDIOBUFS 16
struct buf *
-alloc_io_buf(vp)
+alloc_io_buf(vp, priv)
struct vnode *vp;
+ int priv;
{
register struct buf *bp;
int s;
s = splbio();
+ while (niobuf - NRESERVEDIOBUFS < bufstats.bufs_iobufinuse && !priv) {
+ need_iobuffer = 1;
+ bufstats.bufs_iobufsleeps++;
+ (void) tsleep(&need_iobuffer, (PRIBIO+1), "alloc_io_buf", 0);
+ }
+
while ((bp = iobufqueue.tqh_first) == NULL) {
need_iobuffer = 1;
bufstats.bufs_iobufsleeps++;
- tsleep(&need_iobuffer, (PRIBIO+1), "alloc_io_buf", 0);
+ (void) tsleep(&need_iobuffer, (PRIBIO+1), "alloc_io_buf1", 0);
}
+
TAILQ_REMOVE(&iobufqueue, bp, b_freelist);
bp->b_timestamp = 0;
bufqlim[BQ_META].bl_target = nbuftarget/4;
bufqlim[BQ_META].bl_stale = META_IS_STALE;
+ /* LAUNDRY queue */
+ bufqlim[BQ_LOCKED].bl_nlow = 0;
+ bufqlim[BQ_LOCKED].bl_nlhigh = 32;
+ bufqlim[BQ_LOCKED].bl_target = 0;
+ bufqlim[BQ_LOCKED].bl_stale = 30;
+
buqlimprt(1);
}
if ((q < 0) || (q >= BQUEUES))
goto out;
- /* LOCKED queue MUST not be balanced */
- if (q == BQ_LOCKED)
+ /* LOCKED or LAUNDRY queue MUST not be balanced */
+ if ((q == BQ_LOCKED) || (q == BQ_LAUNDRY))
goto out;
n = (bufqlim[q].bl_num - bufqlim[q].bl_target);
buqlimprt(int all)
{
int i;
- static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY", "META" };
+ static char *bname[BQUEUES] =
+ { "LOCKED", "LRU", "AGE", "EMPTY", "META", "LAUNDRY" };
if (all)
for (i = 0; i < BQUEUES; i++) {
printf("cur = %d, ", (long)bufqlim[i].bl_num);
}
}
+
+/*
+ * If the getnewbuf() calls bcleanbuf() on the same thread
+ * there is a potential for stack overrun and deadlocks.
+ * So we always handoff the work to worker thread for completion
+ */
+
+static void
+bcleanbuf_thread_init()
+{
+ static void bcleanbuf_thread();
+
+ /* create worker thread */
+ kernel_thread(kernel_task, bcleanbuf_thread);
+}
+
+static void
+bcleanbuf_thread()
+{
+ boolean_t funnel_state;
+ struct buf *bp;
+
+ funnel_state = thread_funnel_set(kernel_flock, TRUE);
+
+doit:
+ while (blaundrycnt == 0)
+ (void)tsleep((void *)&blaundrycnt, PRIBIO, "blaundry", 60 * hz);
+ bp = TAILQ_FIRST(&bufqueues[BQ_LAUNDRY]);
+ /* Remove from the queue */
+ bremfree(bp);
+ blaundrycnt--;
+ /* do the IO */
+ bawrite(bp);
+ /* start again */
+ goto doit;
+
+ (void) thread_funnel_set(kernel_flock, funnel_state);
+}
+