+int
+bufattr_meta(bufattr_t bap)
+{
+ if ((bap->ba_flags & BA_META)) {
+ return 1;
+ }
+ return 0;
+}
+
+void
+bufattr_markmeta(bufattr_t bap)
+{
+ SET(bap->ba_flags, BA_META);
+}
+
+int
+#if !CONFIG_EMBEDDED
+bufattr_delayidlesleep(bufattr_t bap)
+#else /* !CONFIG_EMBEDDED */
+bufattr_delayidlesleep(__unused bufattr_t bap)
+#endif /* !CONFIG_EMBEDDED */
+{
+#if !CONFIG_EMBEDDED
+ if ((bap->ba_flags & BA_DELAYIDLESLEEP)) {
+ return 1;
+ }
+#endif /* !CONFIG_EMBEDDED */
+ return 0;
+}
+
+bufattr_t
+buf_attr(buf_t bp)
+{
+ return &bp->b_attr;
+}
+
+void
+buf_markstatic(buf_t bp __unused)
+{
+ SET(bp->b_flags, B_STATICCONTENT);
+}
+
+int
+buf_static(buf_t bp)
+{
+ if ((bp->b_flags & B_STATICCONTENT)) {
+ return 1;
+ }
+ return 0;
+}
+
+void
+bufattr_markgreedymode(bufattr_t bap)
+{
+ SET(bap->ba_flags, BA_GREEDY_MODE);
+}
+
+int
+bufattr_greedymode(bufattr_t bap)
+{
+ if ((bap->ba_flags & BA_GREEDY_MODE)) {
+ return 1;
+ }
+ return 0;
+}
+
+void
+bufattr_markisochronous(bufattr_t bap)
+{
+ SET(bap->ba_flags, BA_ISOCHRONOUS);
+}
+
+int
+bufattr_isochronous(bufattr_t bap)
+{
+ if ((bap->ba_flags & BA_ISOCHRONOUS)) {
+ return 1;
+ }
+ return 0;
+}
+
+void
+bufattr_markquickcomplete(bufattr_t bap)
+{
+ SET(bap->ba_flags, BA_QUICK_COMPLETE);
+}
+
+int
+bufattr_quickcomplete(bufattr_t bap)
+{
+ if ((bap->ba_flags & BA_QUICK_COMPLETE)) {
+ return 1;
+ }
+ return 0;
+}
+
+void
+bufattr_markioscheduled(bufattr_t bap)
+{
+ SET(bap->ba_flags, BA_IO_SCHEDULED);
+}
+
+
+int
+bufattr_ioscheduled(bufattr_t bap)
+{
+ if ((bap->ba_flags & BA_IO_SCHEDULED)) {
+ return 1;
+ }
+ return 0;
+}
+
+errno_t
+buf_error(buf_t bp)
+{
+ return bp->b_error;
+}
+
+void
+buf_seterror(buf_t bp, errno_t error)
+{
+ if ((bp->b_error = error)) {
+ SET(bp->b_flags, B_ERROR);
+ } else {
+ CLR(bp->b_flags, B_ERROR);
+ }
+}
+
+void
+buf_setflags(buf_t bp, int32_t flags)
+{
+ SET(bp->b_flags, (flags & BUF_X_WRFLAGS));
+}
+
+void
+buf_clearflags(buf_t bp, int32_t flags)
+{
+ CLR(bp->b_flags, (flags & BUF_X_WRFLAGS));
+}
+
+int32_t
+buf_flags(buf_t bp)
+{
+ return bp->b_flags & BUF_X_RDFLAGS;
+}
+
+void
+buf_reset(buf_t bp, int32_t io_flags)
+{
+ CLR(bp->b_flags, (B_READ | B_WRITE | B_ERROR | B_DONE | B_INVAL | B_ASYNC | B_NOCACHE | B_FUA));
+ SET(bp->b_flags, (io_flags & (B_ASYNC | B_READ | B_WRITE | B_NOCACHE)));
+
+ bp->b_error = 0;
+}
+
+uint32_t
+buf_count(buf_t bp)
+{
+ return bp->b_bcount;
+}
+
+void
+buf_setcount(buf_t bp, uint32_t bcount)
+{
+ bp->b_bcount = bcount;
+}
+
+uint32_t
+buf_size(buf_t bp)
+{
+ return bp->b_bufsize;
+}
+
+void
+buf_setsize(buf_t bp, uint32_t bufsize)
+{
+ bp->b_bufsize = bufsize;
+}
+
+uint32_t
+buf_resid(buf_t bp)
+{
+ return bp->b_resid;
+}
+
+void
+buf_setresid(buf_t bp, uint32_t resid)
+{
+ bp->b_resid = resid;
+}
+
+uint32_t
+buf_dirtyoff(buf_t bp)
+{
+ return bp->b_dirtyoff;
+}
+
+uint32_t
+buf_dirtyend(buf_t bp)
+{
+ return bp->b_dirtyend;
+}
+
+void
+buf_setdirtyoff(buf_t bp, uint32_t dirtyoff)
+{
+ bp->b_dirtyoff = dirtyoff;
+}
+
+void
+buf_setdirtyend(buf_t bp, uint32_t dirtyend)
+{
+ bp->b_dirtyend = dirtyend;
+}
+
+uintptr_t
+buf_dataptr(buf_t bp)
+{
+ return bp->b_datap;
+}
+
+void
+buf_setdataptr(buf_t bp, uintptr_t data)
+{
+ bp->b_datap = data;
+}
+
+vnode_t
+buf_vnode(buf_t bp)
+{
+ return bp->b_vp;
+}
+
+void
+buf_setvnode(buf_t bp, vnode_t vp)
+{
+ bp->b_vp = vp;
+}
+
+
+void *
+buf_callback(buf_t bp)
+{
+ if (!(bp->b_flags & B_CALL)) {
+ return (void *) NULL;
+ }
+
+ return (void *)bp->b_iodone;
+}
+
+
+errno_t
+buf_setcallback(buf_t bp, void (*callback)(buf_t, void *), void *transaction)
+{
+ assert(!ISSET(bp->b_flags, B_FILTER) && ISSET(bp->b_lflags, BL_BUSY));
+
+ if (callback) {
+ bp->b_flags |= (B_CALL | B_ASYNC);
+ } else {
+ bp->b_flags &= ~B_CALL;
+ }
+ bp->b_transaction = transaction;
+ bp->b_iodone = callback;
+
+ return 0;
+}
+
+errno_t
+buf_setupl(buf_t bp, upl_t upl, uint32_t offset)
+{
+ if (!(bp->b_lflags & BL_IOBUF)) {
+ return EINVAL;
+ }
+
+ if (upl) {
+ bp->b_flags |= B_CLUSTER;
+ } else {
+ bp->b_flags &= ~B_CLUSTER;
+ }
+ bp->b_upl = upl;
+ bp->b_uploffset = offset;
+
+ return 0;
+}
+
+buf_t
+buf_clone(buf_t bp, int io_offset, int io_size, void (*iodone)(buf_t, void *), void *arg)
+{
+ buf_t io_bp;
+
+ if (io_offset < 0 || io_size < 0) {
+ return NULL;
+ }
+
+ if ((unsigned)(io_offset + io_size) > (unsigned)bp->b_bcount) {
+ return NULL;
+ }
+
+ if (bp->b_flags & B_CLUSTER) {
+ if (io_offset && ((bp->b_uploffset + io_offset) & PAGE_MASK)) {
+ return NULL;
+ }
+
+ if (((bp->b_uploffset + io_offset + io_size) & PAGE_MASK) && ((io_offset + io_size) < bp->b_bcount)) {
+ return NULL;
+ }
+ }
+ io_bp = alloc_io_buf(bp->b_vp, 0);
+
+ io_bp->b_flags = bp->b_flags & (B_COMMIT_UPL | B_META | B_PAGEIO | B_CLUSTER | B_PHYS | B_RAW | B_ASYNC | B_READ | B_FUA);
+
+ if (iodone) {
+ io_bp->b_transaction = arg;
+ io_bp->b_iodone = iodone;
+ io_bp->b_flags |= B_CALL;
+ }
+ if (bp->b_flags & B_CLUSTER) {
+ io_bp->b_upl = bp->b_upl;
+ io_bp->b_uploffset = bp->b_uploffset + io_offset;
+ } else {
+ io_bp->b_datap = (uintptr_t)(((char *)bp->b_datap) + io_offset);
+ }
+ io_bp->b_bcount = io_size;
+
+ return io_bp;
+}
+
+
+int
+buf_shadow(buf_t bp)
+{
+ if (bp->b_lflags & BL_SHADOW) {
+ return 1;
+ }
+ return 0;
+}
+
+
+buf_t
+buf_create_shadow_priv(buf_t bp, boolean_t force_copy, uintptr_t external_storage, void (*iodone)(buf_t, void *), void *arg)
+{
+ return buf_create_shadow_internal(bp, force_copy, external_storage, iodone, arg, 1);
+}
+
+buf_t
+buf_create_shadow(buf_t bp, boolean_t force_copy, uintptr_t external_storage, void (*iodone)(buf_t, void *), void *arg)
+{
+ return buf_create_shadow_internal(bp, force_copy, external_storage, iodone, arg, 0);
+}
+
+
+static buf_t
+buf_create_shadow_internal(buf_t bp, boolean_t force_copy, uintptr_t external_storage, void (*iodone)(buf_t, void *), void *arg, int priv)
+{
+ buf_t io_bp;
+
+ KERNEL_DEBUG(0xbbbbc000 | DBG_FUNC_START, bp, 0, 0, 0, 0);
+
+ if (!(bp->b_flags & B_META) || (bp->b_lflags & BL_IOBUF)) {
+ KERNEL_DEBUG(0xbbbbc000 | DBG_FUNC_END, bp, 0, 0, 0, 0);
+ return NULL;
+ }
+#ifdef BUF_MAKE_PRIVATE
+ if (bp->b_shadow_ref && bp->b_data_ref == 0 && external_storage == 0) {
+ panic("buf_create_shadow: %p is in the private state (%d, %d)", bp, bp->b_shadow_ref, bp->b_data_ref);
+ }
+#endif
+ io_bp = alloc_io_buf(bp->b_vp, priv);
+
+ io_bp->b_flags = bp->b_flags & (B_META | B_ZALLOC | B_ASYNC | B_READ | B_FUA);
+ io_bp->b_blkno = bp->b_blkno;
+ io_bp->b_lblkno = bp->b_lblkno;
+
+ if (iodone) {
+ io_bp->b_transaction = arg;
+ io_bp->b_iodone = iodone;
+ io_bp->b_flags |= B_CALL;
+ }
+ if (force_copy == FALSE) {
+ io_bp->b_bcount = bp->b_bcount;
+ io_bp->b_bufsize = bp->b_bufsize;
+
+ if (external_storage) {
+ io_bp->b_datap = external_storage;
+#ifdef BUF_MAKE_PRIVATE
+ io_bp->b_data_store = NULL;
+#endif
+ } else {
+ io_bp->b_datap = bp->b_datap;
+#ifdef BUF_MAKE_PRIVATE
+ io_bp->b_data_store = bp;
+#endif
+ }
+ *(buf_t *)(&io_bp->b_orig) = bp;
+
+ lck_mtx_lock_spin(buf_mtxp);
+
+ io_bp->b_lflags |= BL_SHADOW;
+ io_bp->b_shadow = bp->b_shadow;
+ bp->b_shadow = io_bp;
+ bp->b_shadow_ref++;
+
+#ifdef BUF_MAKE_PRIVATE
+ if (external_storage) {
+ io_bp->b_lflags |= BL_EXTERNAL;
+ } else {
+ bp->b_data_ref++;
+ }
+#endif
+ lck_mtx_unlock(buf_mtxp);
+ } else {
+ if (external_storage) {
+#ifdef BUF_MAKE_PRIVATE
+ io_bp->b_lflags |= BL_EXTERNAL;
+#endif
+ io_bp->b_bcount = bp->b_bcount;
+ io_bp->b_bufsize = bp->b_bufsize;
+ io_bp->b_datap = external_storage;
+ } else {
+ allocbuf(io_bp, bp->b_bcount);
+
+ io_bp->b_lflags |= BL_IOBUF_ALLOC;
+ }
+ bcopy((caddr_t)bp->b_datap, (caddr_t)io_bp->b_datap, bp->b_bcount);
+
+#ifdef BUF_MAKE_PRIVATE
+ io_bp->b_data_store = NULL;
+#endif
+ }
+ KERNEL_DEBUG(0xbbbbc000 | DBG_FUNC_END, bp, bp->b_shadow_ref, 0, io_bp, 0);
+
+ return io_bp;
+}
+
+
+#ifdef BUF_MAKE_PRIVATE
+errno_t
+buf_make_private(buf_t bp)
+{
+ buf_t ds_bp;
+ buf_t t_bp;
+ struct buf my_buf;
+
+ KERNEL_DEBUG(0xbbbbc004 | DBG_FUNC_START, bp, bp->b_shadow_ref, 0, 0, 0);
+
+ if (bp->b_shadow_ref == 0 || bp->b_data_ref == 0 || ISSET(bp->b_lflags, BL_SHADOW)) {
+ KERNEL_DEBUG(0xbbbbc004 | DBG_FUNC_END, bp, bp->b_shadow_ref, 0, EINVAL, 0);
+ return EINVAL;
+ }
+ my_buf.b_flags = B_META;
+ my_buf.b_datap = (uintptr_t)NULL;
+ allocbuf(&my_buf, bp->b_bcount);
+
+ bcopy((caddr_t)bp->b_datap, (caddr_t)my_buf.b_datap, bp->b_bcount);
+
+ lck_mtx_lock_spin(buf_mtxp);
+
+ for (t_bp = bp->b_shadow; t_bp; t_bp = t_bp->b_shadow) {
+ if (!ISSET(bp->b_lflags, BL_EXTERNAL)) {
+ break;
+ }
+ }
+ ds_bp = t_bp;
+
+ if (ds_bp == NULL && bp->b_data_ref) {
+ panic("buf_make_private: b_data_ref != 0 && ds_bp == NULL");
+ }
+
+ if (ds_bp && (bp->b_data_ref == 0 || bp->b_shadow_ref == 0)) {
+ panic("buf_make_private: ref_count == 0 && ds_bp != NULL");
+ }
+
+ if (ds_bp == NULL) {
+ lck_mtx_unlock(buf_mtxp);
+
+ buf_free_meta_store(&my_buf);
+
+ KERNEL_DEBUG(0xbbbbc004 | DBG_FUNC_END, bp, bp->b_shadow_ref, 0, EINVAL, 0);
+ return EINVAL;
+ }
+ for (t_bp = bp->b_shadow; t_bp; t_bp = t_bp->b_shadow) {
+ if (!ISSET(t_bp->b_lflags, BL_EXTERNAL)) {
+ t_bp->b_data_store = ds_bp;
+ }
+ }
+ ds_bp->b_data_ref = bp->b_data_ref;
+
+ bp->b_data_ref = 0;
+ bp->b_datap = my_buf.b_datap;
+
+ lck_mtx_unlock(buf_mtxp);
+
+ KERNEL_DEBUG(0xbbbbc004 | DBG_FUNC_END, bp, bp->b_shadow_ref, 0, 0, 0);
+ return 0;
+}
+#endif
+
+
+void
+buf_setfilter(buf_t bp, void (*filter)(buf_t, void *), void *transaction,
+ void(**old_iodone)(buf_t, void *), void **old_transaction)
+{
+ assert(ISSET(bp->b_lflags, BL_BUSY));
+
+ if (old_iodone) {
+ *old_iodone = bp->b_iodone;
+ }
+ if (old_transaction) {
+ *old_transaction = bp->b_transaction;
+ }
+
+ bp->b_transaction = transaction;
+ bp->b_iodone = filter;
+ if (filter) {
+ bp->b_flags |= B_FILTER;
+ } else {
+ bp->b_flags &= ~B_FILTER;
+ }
+}
+
+
+daddr64_t
+buf_blkno(buf_t bp)
+{
+ return bp->b_blkno;
+}
+
+daddr64_t
+buf_lblkno(buf_t bp)
+{
+ return bp->b_lblkno;
+}
+
+void
+buf_setblkno(buf_t bp, daddr64_t blkno)
+{
+ bp->b_blkno = blkno;
+}
+
+void
+buf_setlblkno(buf_t bp, daddr64_t lblkno)
+{
+ bp->b_lblkno = lblkno;
+}
+
+dev_t
+buf_device(buf_t bp)
+{
+ return bp->b_dev;
+}
+
+errno_t
+buf_setdevice(buf_t bp, vnode_t vp)
+{
+ if ((vp->v_type != VBLK) && (vp->v_type != VCHR)) {
+ return EINVAL;
+ }
+ bp->b_dev = vp->v_rdev;
+
+ return 0;
+}
+
+
+void *
+buf_drvdata(buf_t bp)
+{
+ return bp->b_drvdata;
+}
+
+void
+buf_setdrvdata(buf_t bp, void *drvdata)
+{
+ bp->b_drvdata = drvdata;
+}
+
+void *
+buf_fsprivate(buf_t bp)
+{
+ return bp->b_fsprivate;
+}
+
+void
+buf_setfsprivate(buf_t bp, void *fsprivate)
+{
+ bp->b_fsprivate = fsprivate;
+}
+
+kauth_cred_t
+buf_rcred(buf_t bp)
+{
+ return bp->b_rcred;
+}
+
+kauth_cred_t
+buf_wcred(buf_t bp)
+{
+ return bp->b_wcred;
+}
+
+void *
+buf_upl(buf_t bp)
+{
+ return bp->b_upl;
+}
+
+uint32_t
+buf_uploffset(buf_t bp)
+{
+ return (uint32_t)(bp->b_uploffset);
+}
+
+proc_t
+buf_proc(buf_t bp)
+{
+ return bp->b_proc;
+}
+
+
+errno_t
+buf_map(buf_t bp, caddr_t *io_addr)
+{
+ buf_t real_bp;
+ vm_offset_t vaddr;
+ kern_return_t kret;
+
+ if (!(bp->b_flags & B_CLUSTER)) {
+ *io_addr = (caddr_t)bp->b_datap;
+ return 0;
+ }
+ real_bp = (buf_t)(bp->b_real_bp);
+
+ if (real_bp && real_bp->b_datap) {
+ /*
+ * b_real_bp is only valid if B_CLUSTER is SET
+ * if it's non-zero, than someone did a cluster_bp call
+ * if the backing physical pages were already mapped
+ * in before the call to cluster_bp (non-zero b_datap),
+ * than we just use that mapping
+ */
+ *io_addr = (caddr_t)real_bp->b_datap;
+ return 0;
+ }
+ kret = ubc_upl_map(bp->b_upl, &vaddr); /* Map it in */
+
+ if (kret != KERN_SUCCESS) {
+ *io_addr = NULL;
+
+ return ENOMEM;
+ }
+ vaddr += bp->b_uploffset;
+
+ *io_addr = (caddr_t)vaddr;
+
+ return 0;
+}
+
+errno_t
+buf_unmap(buf_t bp)
+{
+ buf_t real_bp;
+ kern_return_t kret;
+
+ if (!(bp->b_flags & B_CLUSTER)) {
+ return 0;
+ }
+ /*
+ * see buf_map for the explanation
+ */
+ real_bp = (buf_t)(bp->b_real_bp);
+
+ if (real_bp && real_bp->b_datap) {
+ return 0;
+ }
+
+ if ((bp->b_lflags & BL_IOBUF) &&
+ ((bp->b_flags & (B_PAGEIO | B_READ)) != (B_PAGEIO | B_READ))) {
+ /*
+ * ignore pageins... the 'right' thing will
+ * happen due to the way we handle speculative
+ * clusters...
+ *
+ * when we commit these pages, we'll hit
+ * it with UPL_COMMIT_INACTIVE which
+ * will clear the reference bit that got
+ * turned on when we touched the mapping
+ */
+ bp->b_flags |= B_AGE;
+ }
+ kret = ubc_upl_unmap(bp->b_upl);
+
+ if (kret != KERN_SUCCESS) {
+ return EINVAL;
+ }
+ return 0;
+}
+
+
+void
+buf_clear(buf_t bp)
+{
+ caddr_t baddr;
+
+ if (buf_map(bp, &baddr) == 0) {
+ bzero(baddr, bp->b_bcount);
+ buf_unmap(bp);
+ }
+ bp->b_resid = 0;
+}
+
+/*
+ * Read or write a buffer that is not contiguous on disk.
+ * buffer is marked done/error at the conclusion
+ */
+static int
+buf_strategy_fragmented(vnode_t devvp, buf_t bp, off_t f_offset, size_t contig_bytes)
+{
+ vnode_t vp = buf_vnode(bp);
+ buf_t io_bp; /* For reading or writing a single block */
+ int io_direction;
+ int io_resid;
+ size_t io_contig_bytes;
+ daddr64_t io_blkno;
+ int error = 0;
+ int bmap_flags;
+
+ /*
+ * save our starting point... the bp was already mapped
+ * in buf_strategy before we got called
+ * no sense doing it again.
+ */
+ io_blkno = bp->b_blkno;
+ /*
+ * Make sure we redo this mapping for the next I/O
+ * i.e. this can never be a 'permanent' mapping
+ */
+ bp->b_blkno = bp->b_lblkno;
+
+ /*
+ * Get an io buffer to do the deblocking
+ */
+ io_bp = alloc_io_buf(devvp, 0);
+
+ io_bp->b_lblkno = bp->b_lblkno;
+ io_bp->b_datap = bp->b_datap;
+ io_resid = bp->b_bcount;
+ io_direction = bp->b_flags & B_READ;
+ io_contig_bytes = contig_bytes;
+
+ if (bp->b_flags & B_READ) {
+ bmap_flags = VNODE_READ;
+ } else {
+ bmap_flags = VNODE_WRITE;
+ }
+
+ for (;;) {
+ if (io_blkno == -1) {
+ /*
+ * this is unexepected, but we'll allow for it
+ */
+ bzero((caddr_t)io_bp->b_datap, (int)io_contig_bytes);
+ } else {
+ io_bp->b_bcount = io_contig_bytes;
+ io_bp->b_bufsize = io_contig_bytes;
+ io_bp->b_resid = io_contig_bytes;
+ io_bp->b_blkno = io_blkno;
+
+ buf_reset(io_bp, io_direction);
+
+ /*
+ * Call the device to do the I/O and wait for it. Make sure the appropriate party is charged for write
+ */
+
+ if (!ISSET(bp->b_flags, B_READ)) {
+ OSAddAtomic(1, &devvp->v_numoutput);
+ }
+
+ if ((error = VNOP_STRATEGY(io_bp))) {
+ break;
+ }
+ if ((error = (int)buf_biowait(io_bp))) {
+ break;
+ }
+ if (io_bp->b_resid) {
+ io_resid -= (io_contig_bytes - io_bp->b_resid);
+ break;
+ }
+ }
+ if ((io_resid -= io_contig_bytes) == 0) {
+ break;
+ }
+ f_offset += io_contig_bytes;
+ io_bp->b_datap += io_contig_bytes;
+
+ /*
+ * Map the current position to a physical block number
+ */
+ if ((error = VNOP_BLOCKMAP(vp, f_offset, io_resid, &io_blkno, &io_contig_bytes, NULL, bmap_flags, NULL))) {
+ break;
+ }
+ }
+ buf_free(io_bp);
+
+ if (error) {
+ buf_seterror(bp, error);
+ }
+ bp->b_resid = io_resid;
+ /*
+ * This I/O is now complete
+ */
+ buf_biodone(bp);
+
+ return error;
+}
+
+
+/*
+ * struct vnop_strategy_args {
+ * struct buf *a_bp;
+ * } *ap;
+ */
+errno_t
+buf_strategy(vnode_t devvp, void *ap)
+{
+ buf_t bp = ((struct vnop_strategy_args *)ap)->a_bp;
+ vnode_t vp = bp->b_vp;
+ int bmap_flags;
+ errno_t error;
+#if CONFIG_DTRACE
+ int dtrace_io_start_flag = 0; /* We only want to trip the io:::start
+ * probe once, with the true physical
+ * block in place (b_blkno)
+ */
+
+#endif
+
+ if (vp == NULL || vp->v_type == VCHR || vp->v_type == VBLK) {
+ panic("buf_strategy: b_vp == NULL || vtype == VCHR | VBLK\n");
+ }
+ /*
+ * associate the physical device with
+ * with this buf_t even if we don't
+ * end up issuing the I/O...
+ */
+ bp->b_dev = devvp->v_rdev;
+
+ if (bp->b_flags & B_READ) {
+ bmap_flags = VNODE_READ;
+ } else {
+ bmap_flags = VNODE_WRITE;
+ }
+
+ if (!(bp->b_flags & B_CLUSTER)) {
+ if ((bp->b_upl)) {
+ /*
+ * we have a UPL associated with this bp
+ * go through cluster_bp which knows how
+ * to deal with filesystem block sizes
+ * that aren't equal to the page size
+ */
+ DTRACE_IO1(start, buf_t, bp);
+ return cluster_bp(bp);
+ }
+ if (bp->b_blkno == bp->b_lblkno) {
+ off_t f_offset;
+ size_t contig_bytes;
+
+ if ((error = VNOP_BLKTOOFF(vp, bp->b_lblkno, &f_offset))) {
+ DTRACE_IO1(start, buf_t, bp);
+ buf_seterror(bp, error);
+ buf_biodone(bp);
+
+ return error;
+ }
+
+ if ((error = VNOP_BLOCKMAP(vp, f_offset, bp->b_bcount, &bp->b_blkno, &contig_bytes, NULL, bmap_flags, NULL))) {
+ DTRACE_IO1(start, buf_t, bp);
+ buf_seterror(bp, error);
+ buf_biodone(bp);
+
+ return error;
+ }
+
+ DTRACE_IO1(start, buf_t, bp);
+#if CONFIG_DTRACE
+ dtrace_io_start_flag = 1;
+#endif /* CONFIG_DTRACE */
+
+ if ((bp->b_blkno == -1) || (contig_bytes == 0)) {
+ /* Set block number to force biodone later */
+ bp->b_blkno = -1;
+ buf_clear(bp);
+ } else if ((long)contig_bytes < bp->b_bcount) {
+ return buf_strategy_fragmented(devvp, bp, f_offset, contig_bytes);
+ }
+ }
+
+#if CONFIG_DTRACE
+ if (dtrace_io_start_flag == 0) {
+ DTRACE_IO1(start, buf_t, bp);
+ dtrace_io_start_flag = 1;
+ }
+#endif /* CONFIG_DTRACE */
+
+ if (bp->b_blkno == -1) {
+ buf_biodone(bp);
+ return 0;
+ }
+ }
+
+#if CONFIG_DTRACE
+ if (dtrace_io_start_flag == 0) {
+ DTRACE_IO1(start, buf_t, bp);
+ }
+#endif /* CONFIG_DTRACE */
+
+#if CONFIG_PROTECT
+ /* Capture f_offset in the bufattr*/
+ cpx_t cpx = bufattr_cpx(buf_attr(bp));
+ if (cpx) {
+ /* No need to go here for older EAs */
+ if (cpx_use_offset_for_iv(cpx) && !cpx_synthetic_offset_for_iv(cpx)) {
+ off_t f_offset;
+ if ((error = VNOP_BLKTOOFF(bp->b_vp, bp->b_lblkno, &f_offset))) {
+ return error;
+ }
+
+ /*
+ * Attach the file offset to this buffer. The
+ * bufattr attributes will be passed down the stack
+ * until they reach the storage driver (whether
+ * IOFlashStorage, ASP, or IONVMe). The driver
+ * will retain the offset in a local variable when it
+ * issues its I/Os to the NAND controller.
+ *
+ * Note that LwVM may end up splitting this I/O
+ * into sub-I/Os if it crosses a chunk boundary. In this
+ * case, LwVM will update this field when it dispatches
+ * each I/O to IOFlashStorage. But from our perspective
+ * we have only issued a single I/O.
+ *
+ * In the case of APFS we do not bounce through another
+ * intermediate layer (such as CoreStorage). APFS will
+ * issue the I/Os directly to the block device / IOMedia
+ * via buf_strategy on the specfs node.
+ */
+ buf_setcpoff(bp, f_offset);
+ CP_DEBUG((CPDBG_OFFSET_IO | DBG_FUNC_NONE), (uint32_t) f_offset, (uint32_t) bp->b_lblkno, (uint32_t) bp->b_blkno, (uint32_t) bp->b_bcount, 0);
+ }
+ }
+#endif
+
+ /*
+ * we can issue the I/O because...
+ * either B_CLUSTER is set which
+ * means that the I/O is properly set
+ * up to be a multiple of the page size, or
+ * we were able to successfully set up the
+ * physical block mapping
+ */
+ error = VOCALL(devvp->v_op, VOFFSET(vnop_strategy), ap);
+ DTRACE_FSINFO(strategy, vnode_t, vp);
+ return error;
+}
+
+
+
+buf_t
+buf_alloc(vnode_t vp)
+{
+ return alloc_io_buf(vp, is_vm_privileged());
+}
+
+void
+buf_free(buf_t bp)
+{
+ free_io_buf(bp);
+}
+
+
+/*
+ * iterate buffers for the specified vp.
+ * if BUF_SCAN_DIRTY is set, do the dirty list
+ * if BUF_SCAN_CLEAN is set, do the clean list
+ * if neither flag is set, default to BUF_SCAN_DIRTY
+ * if BUF_NOTIFY_BUSY is set, call the callout function using a NULL bp for busy pages
+ */
+
+struct buf_iterate_info_t {
+ int flag;
+ struct buflists *listhead;
+};
+
+void
+buf_iterate(vnode_t vp, int (*callout)(buf_t, void *), int flags, void *arg)
+{
+ buf_t bp;
+ int retval;
+ struct buflists local_iterblkhd;
+ int lock_flags = BAC_NOWAIT | BAC_REMOVE;
+ int notify_busy = flags & BUF_NOTIFY_BUSY;
+ struct buf_iterate_info_t list[2];
+ int num_lists, i;
+
+ if (flags & BUF_SKIP_LOCKED) {
+ lock_flags |= BAC_SKIP_LOCKED;
+ }
+ if (flags & BUF_SKIP_NONLOCKED) {
+ lock_flags |= BAC_SKIP_NONLOCKED;
+ }
+
+ if (!(flags & (BUF_SCAN_DIRTY | BUF_SCAN_CLEAN))) {
+ flags |= BUF_SCAN_DIRTY;
+ }
+
+ num_lists = 0;
+
+ if (flags & BUF_SCAN_DIRTY) {
+ list[num_lists].flag = VBI_DIRTY;
+ list[num_lists].listhead = &vp->v_dirtyblkhd;
+ num_lists++;
+ }
+ if (flags & BUF_SCAN_CLEAN) {
+ list[num_lists].flag = VBI_CLEAN;
+ list[num_lists].listhead = &vp->v_cleanblkhd;
+ num_lists++;
+ }
+
+ for (i = 0; i < num_lists; i++) {
+ lck_mtx_lock(buf_mtxp);
+
+ if (buf_iterprepare(vp, &local_iterblkhd, list[i].flag)) {
+ lck_mtx_unlock(buf_mtxp);
+ continue;
+ }
+ while (!LIST_EMPTY(&local_iterblkhd)) {
+ bp = LIST_FIRST(&local_iterblkhd);
+ LIST_REMOVE(bp, b_vnbufs);
+ LIST_INSERT_HEAD(list[i].listhead, bp, b_vnbufs);
+
+ if (buf_acquire_locked(bp, lock_flags, 0, 0)) {
+ if (notify_busy) {
+ bp = NULL;
+ } else {
+ continue;
+ }
+ }
+
+ lck_mtx_unlock(buf_mtxp);
+
+ retval = callout(bp, arg);
+
+ switch (retval) {
+ case BUF_RETURNED:
+ if (bp) {
+ buf_brelse(bp);
+ }
+ break;
+ case BUF_CLAIMED:
+ break;
+ case BUF_RETURNED_DONE:
+ if (bp) {
+ buf_brelse(bp);
+ }
+ lck_mtx_lock(buf_mtxp);
+ goto out;
+ case BUF_CLAIMED_DONE:
+ lck_mtx_lock(buf_mtxp);
+ goto out;
+ }
+ lck_mtx_lock(buf_mtxp);
+ } /* while list has more nodes */
+out:
+ buf_itercomplete(vp, &local_iterblkhd, list[i].flag);
+ lck_mtx_unlock(buf_mtxp);
+ } /* for each list */
+} /* buf_iterate */
+
+
+/*
+ * Flush out and invalidate all buffers associated with a vnode.
+ */
+int
+buf_invalidateblks(vnode_t vp, int flags, int slpflag, int slptimeo)
+{
+ buf_t bp;
+ int aflags;
+ int error = 0;
+ int must_rescan = 1;
+ struct buflists local_iterblkhd;
+
+
+ if (LIST_EMPTY(&vp->v_cleanblkhd) && LIST_EMPTY(&vp->v_dirtyblkhd)) {
+ return 0;
+ }
+
+ lck_mtx_lock(buf_mtxp);
+
+ for (;;) {
+ if (must_rescan == 0) {
+ /*
+ * the lists may not be empty, but all that's left at this
+ * point are metadata or B_LOCKED buffers which are being
+ * skipped... we know this because we made it through both
+ * the clean and dirty lists without dropping buf_mtxp...
+ * each time we drop buf_mtxp we bump "must_rescan"
+ */
+ break;
+ }
+ if (LIST_EMPTY(&vp->v_cleanblkhd) && LIST_EMPTY(&vp->v_dirtyblkhd)) {
+ break;
+ }
+ must_rescan = 0;
+ /*
+ * iterate the clean list
+ */
+ if (buf_iterprepare(vp, &local_iterblkhd, VBI_CLEAN)) {
+ goto try_dirty_list;
+ }
+ while (!LIST_EMPTY(&local_iterblkhd)) {
+ bp = LIST_FIRST(&local_iterblkhd);
+
+ LIST_REMOVE(bp, b_vnbufs);
+ LIST_INSERT_HEAD(&vp->v_cleanblkhd, bp, b_vnbufs);
+
+ /*
+ * some filesystems distinguish meta data blocks with a negative logical block #
+ */
+ if ((flags & BUF_SKIP_META) && (bp->b_lblkno < 0 || ISSET(bp->b_flags, B_META))) {
+ continue;
+ }
+
+ aflags = BAC_REMOVE;
+
+ if (!(flags & BUF_INVALIDATE_LOCKED)) {
+ aflags |= BAC_SKIP_LOCKED;
+ }
+
+ if ((error = (int)buf_acquire_locked(bp, aflags, slpflag, slptimeo))) {
+ if (error == EDEADLK) {
+ /*
+ * this buffer was marked B_LOCKED...
+ * we didn't drop buf_mtxp, so we
+ * we don't need to rescan
+ */
+ continue;
+ }
+ if (error == EAGAIN) {
+ /*
+ * found a busy buffer... we blocked and
+ * dropped buf_mtxp, so we're going to
+ * need to rescan after this pass is completed
+ */
+ must_rescan++;
+ continue;
+ }
+ /*
+ * got some kind of 'real' error out of the msleep
+ * in buf_acquire_locked, terminate the scan and return the error
+ */
+ buf_itercomplete(vp, &local_iterblkhd, VBI_CLEAN);
+
+ lck_mtx_unlock(buf_mtxp);
+ return error;
+ }
+ lck_mtx_unlock(buf_mtxp);
+
+ if (bp->b_flags & B_LOCKED) {
+ KERNEL_DEBUG(0xbbbbc038, bp, 0, 0, 0, 0);
+ }
+
+ CLR(bp->b_flags, B_LOCKED);
+ SET(bp->b_flags, B_INVAL);
+ buf_brelse(bp);
+
+ lck_mtx_lock(buf_mtxp);
+
+ /*
+ * by dropping buf_mtxp, we allow new
+ * buffers to be added to the vnode list(s)
+ * we'll have to rescan at least once more
+ * if the queues aren't empty
+ */
+ must_rescan++;
+ }
+ buf_itercomplete(vp, &local_iterblkhd, VBI_CLEAN);
+
+try_dirty_list:
+ /*
+ * Now iterate on dirty blks
+ */
+ if (buf_iterprepare(vp, &local_iterblkhd, VBI_DIRTY)) {
+ continue;
+ }
+ while (!LIST_EMPTY(&local_iterblkhd)) {
+ bp = LIST_FIRST(&local_iterblkhd);
+
+ LIST_REMOVE(bp, b_vnbufs);
+ LIST_INSERT_HEAD(&vp->v_dirtyblkhd, bp, b_vnbufs);
+
+ /*
+ * some filesystems distinguish meta data blocks with a negative logical block #
+ */
+ if ((flags & BUF_SKIP_META) && (bp->b_lblkno < 0 || ISSET(bp->b_flags, B_META))) {
+ continue;
+ }
+
+ aflags = BAC_REMOVE;
+
+ if (!(flags & BUF_INVALIDATE_LOCKED)) {
+ aflags |= BAC_SKIP_LOCKED;
+ }
+
+ if ((error = (int)buf_acquire_locked(bp, aflags, slpflag, slptimeo))) {
+ if (error == EDEADLK) {
+ /*
+ * this buffer was marked B_LOCKED...
+ * we didn't drop buf_mtxp, so we
+ * we don't need to rescan
+ */
+ continue;
+ }
+ if (error == EAGAIN) {
+ /*
+ * found a busy buffer... we blocked and
+ * dropped buf_mtxp, so we're going to
+ * need to rescan after this pass is completed
+ */
+ must_rescan++;
+ continue;
+ }
+ /*
+ * got some kind of 'real' error out of the msleep
+ * in buf_acquire_locked, terminate the scan and return the error
+ */
+ buf_itercomplete(vp, &local_iterblkhd, VBI_DIRTY);
+
+ lck_mtx_unlock(buf_mtxp);
+ return error;
+ }
+ lck_mtx_unlock(buf_mtxp);
+
+ if (bp->b_flags & B_LOCKED) {
+ KERNEL_DEBUG(0xbbbbc038, bp, 0, 0, 1, 0);
+ }
+
+ CLR(bp->b_flags, B_LOCKED);
+ SET(bp->b_flags, B_INVAL);
+
+ if (ISSET(bp->b_flags, B_DELWRI) && (flags & BUF_WRITE_DATA)) {
+ (void) VNOP_BWRITE(bp);
+ } else {
+ buf_brelse(bp);
+ }
+
+ lck_mtx_lock(buf_mtxp);
+ /*
+ * by dropping buf_mtxp, we allow new
+ * buffers to be added to the vnode list(s)
+ * we'll have to rescan at least once more
+ * if the queues aren't empty
+ */
+ must_rescan++;
+ }
+ buf_itercomplete(vp, &local_iterblkhd, VBI_DIRTY);
+ }
+ lck_mtx_unlock(buf_mtxp);
+
+ return 0;
+}
+
+void
+buf_flushdirtyblks(vnode_t vp, int wait, int flags, const char *msg)
+{
+ (void) buf_flushdirtyblks_skipinfo(vp, wait, flags, msg);
+ return;
+}
+
+int
+buf_flushdirtyblks_skipinfo(vnode_t vp, int wait, int flags, const char *msg)
+{
+ buf_t bp;
+ int writes_issued = 0;
+ errno_t error;
+ int busy = 0;
+ struct buflists local_iterblkhd;
+ int lock_flags = BAC_NOWAIT | BAC_REMOVE;
+ int any_locked = 0;
+
+ if (flags & BUF_SKIP_LOCKED) {
+ lock_flags |= BAC_SKIP_LOCKED;
+ }
+ if (flags & BUF_SKIP_NONLOCKED) {
+ lock_flags |= BAC_SKIP_NONLOCKED;
+ }
+loop:
+ lck_mtx_lock(buf_mtxp);
+
+ if (buf_iterprepare(vp, &local_iterblkhd, VBI_DIRTY) == 0) {
+ while (!LIST_EMPTY(&local_iterblkhd)) {
+ bp = LIST_FIRST(&local_iterblkhd);
+ LIST_REMOVE(bp, b_vnbufs);
+ LIST_INSERT_HEAD(&vp->v_dirtyblkhd, bp, b_vnbufs);
+
+ if ((error = buf_acquire_locked(bp, lock_flags, 0, 0)) == EBUSY) {
+ busy++;
+ }
+ if (error) {
+ /*
+ * If we passed in BUF_SKIP_LOCKED or BUF_SKIP_NONLOCKED,
+ * we may want to do somethign differently if a locked or unlocked
+ * buffer was encountered (depending on the arg specified).
+ * In this case, we know that one of those two was set, and the
+ * buf acquisition failed above.
+ *
+ * If it failed with EDEADLK, then save state which can be emitted
+ * later on to the caller. Most callers should not care.
+ */
+ if (error == EDEADLK) {
+ any_locked++;
+ }
+ continue;
+ }
+ lck_mtx_unlock(buf_mtxp);
+
+ bp->b_flags &= ~B_LOCKED;
+
+ /*
+ * Wait for I/O associated with indirect blocks to complete,
+ * since there is no way to quickly wait for them below.
+ */
+ if ((bp->b_vp == vp) || (wait == 0)) {
+ (void) buf_bawrite(bp);
+ } else {
+ (void) VNOP_BWRITE(bp);
+ }
+ writes_issued++;
+
+ lck_mtx_lock(buf_mtxp);
+ }
+ buf_itercomplete(vp, &local_iterblkhd, VBI_DIRTY);
+ }
+ lck_mtx_unlock(buf_mtxp);
+
+ if (wait) {
+ (void)vnode_waitforwrites(vp, 0, 0, 0, msg);
+
+ if (vp->v_dirtyblkhd.lh_first && busy) {
+ /*
+ * we had one or more BUSY buffers on
+ * the dirtyblock list... most likely
+ * these are due to delayed writes that
+ * were moved to the bclean queue but
+ * have not yet been 'written'.
+ * if we issued some writes on the
+ * previous pass, we try again immediately
+ * if we didn't, we'll sleep for some time
+ * to allow the state to change...
+ */
+ if (writes_issued == 0) {
+ (void)tsleep((caddr_t)&vp->v_numoutput,
+ PRIBIO + 1, "vnode_flushdirtyblks", hz / 20);
+ }
+ writes_issued = 0;
+ busy = 0;
+
+ goto loop;
+ }
+ }
+
+ return any_locked;
+}
+
+
+/*
+ * called with buf_mtxp held...
+ * this lock protects the queue manipulation
+ */
+static int
+buf_iterprepare(vnode_t vp, struct buflists *iterheadp, int flags)
+{
+ struct buflists * listheadp;
+
+ if (flags & VBI_DIRTY) {
+ listheadp = &vp->v_dirtyblkhd;
+ } else {
+ listheadp = &vp->v_cleanblkhd;
+ }
+
+ while (vp->v_iterblkflags & VBI_ITER) {
+ vp->v_iterblkflags |= VBI_ITERWANT;
+ msleep(&vp->v_iterblkflags, buf_mtxp, 0, "buf_iterprepare", NULL);
+ }
+ if (LIST_EMPTY(listheadp)) {
+ LIST_INIT(iterheadp);
+ return EINVAL;
+ }
+ vp->v_iterblkflags |= VBI_ITER;
+
+ iterheadp->lh_first = listheadp->lh_first;
+ listheadp->lh_first->b_vnbufs.le_prev = &iterheadp->lh_first;
+ LIST_INIT(listheadp);
+
+ return 0;
+}
+
+/*
+ * called with buf_mtxp held...
+ * this lock protects the queue manipulation
+ */
+static void
+buf_itercomplete(vnode_t vp, struct buflists *iterheadp, int flags)
+{
+ struct buflists * listheadp;
+ buf_t bp;
+
+ if (flags & VBI_DIRTY) {
+ listheadp = &vp->v_dirtyblkhd;
+ } else {
+ listheadp = &vp->v_cleanblkhd;
+ }
+
+ while (!LIST_EMPTY(iterheadp)) {
+ bp = LIST_FIRST(iterheadp);
+ LIST_REMOVE(bp, b_vnbufs);
+ LIST_INSERT_HEAD(listheadp, bp, b_vnbufs);
+ }
+ vp->v_iterblkflags &= ~VBI_ITER;
+
+ if (vp->v_iterblkflags & VBI_ITERWANT) {
+ vp->v_iterblkflags &= ~VBI_ITERWANT;
+ wakeup(&vp->v_iterblkflags);
+ }
+}
+
+
+static void
+bremfree_locked(buf_t bp)
+{
+ struct bqueues *dp = NULL;
+ int whichq;
+
+ whichq = bp->b_whichq;
+
+ if (whichq == -1) {
+ if (bp->b_shadow_ref == 0) {
+ panic("bremfree_locked: %p not on freelist", bp);
+ }
+ /*
+ * there are clones pointing to 'bp'...
+ * therefore, it was not put on a freelist
+ * when buf_brelse was last called on 'bp'
+ */
+ return;
+ }
+ /*
+ * We only calculate the head of the freelist when removing
+ * the last element of the list as that is the only time that
+ * it is needed (e.g. to reset the tail pointer).
+ *
+ * NB: This makes an assumption about how tailq's are implemented.
+ */
+ if (bp->b_freelist.tqe_next == NULL) {
+ dp = &bufqueues[whichq];
+
+ if (dp->tqh_last != &bp->b_freelist.tqe_next) {
+ panic("bremfree: lost tail");
+ }
+ }
+ TAILQ_REMOVE(dp, bp, b_freelist);
+
+ if (whichq == BQ_LAUNDRY) {
+ blaundrycnt--;
+ }
+
+ bp->b_whichq = -1;
+ bp->b_timestamp = 0;
+ bp->b_shadow = 0;
+}
+
+/*
+ * Associate a buffer with a vnode.
+ * buf_mtxp must be locked on entry
+ */
+static void
+bgetvp_locked(vnode_t vp, buf_t bp)
+{
+ if (bp->b_vp != vp) {
+ panic("bgetvp_locked: not free");
+ }
+
+ if (vp->v_type == VBLK || vp->v_type == VCHR) {
+ bp->b_dev = vp->v_rdev;
+ } else {
+ bp->b_dev = NODEV;
+ }
+ /*
+ * Insert onto list for new vnode.
+ */
+ bufinsvn(bp, &vp->v_cleanblkhd);
+}
+
+/*
+ * Disassociate a buffer from a vnode.
+ * buf_mtxp must be locked on entry
+ */
+static void
+brelvp_locked(buf_t bp)
+{
+ /*
+ * Delete from old vnode list, if on one.
+ */
+ if (bp->b_vnbufs.le_next != NOLIST) {
+ bufremvn(bp);
+ }
+
+ bp->b_vp = (vnode_t)NULL;
+}
+
+/*
+ * Reassign a buffer from one vnode to another.
+ * Used to assign file specific control information
+ * (indirect blocks) to the vnode to which they belong.
+ */
+static void
+buf_reassign(buf_t bp, vnode_t newvp)
+{
+ struct buflists *listheadp;
+
+ if (newvp == NULL) {
+ printf("buf_reassign: NULL");
+ return;
+ }
+ lck_mtx_lock_spin(buf_mtxp);
+
+ /*
+ * Delete from old vnode list, if on one.
+ */
+ if (bp->b_vnbufs.le_next != NOLIST) {
+ bufremvn(bp);
+ }
+ /*
+ * If dirty, put on list of dirty buffers;
+ * otherwise insert onto list of clean buffers.
+ */
+ if (ISSET(bp->b_flags, B_DELWRI)) {
+ listheadp = &newvp->v_dirtyblkhd;
+ } else {
+ listheadp = &newvp->v_cleanblkhd;
+ }
+ bufinsvn(bp, listheadp);
+
+ lck_mtx_unlock(buf_mtxp);
+}
+
+static __inline__ void
+bufhdrinit(buf_t bp)
+{
+ bzero((char *)bp, sizeof *bp);
+ bp->b_dev = NODEV;
+ bp->b_rcred = NOCRED;
+ bp->b_wcred = NOCRED;
+ bp->b_vnbufs.le_next = NOLIST;
+ bp->b_flags = B_INVAL;
+
+ return;
+}
+
+/*
+ * Initialize buffers and hash links for buffers.
+ */
+__private_extern__ void
+bufinit(void)
+{
+ buf_t bp;
+ struct bqueues *dp;
+ int i;
+
+ nbuf_headers = 0;
+ /* Initialize the buffer queues ('freelists') and the hash table */
+ for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) {
+ TAILQ_INIT(dp);
+ }
+ bufhashtbl = hashinit(nbuf_hashelements, M_CACHE, &bufhash);
+
+ buf_busycount = 0;
+
+ /* Initialize the buffer headers */
+ for (i = 0; i < max_nbuf_headers; i++) {
+ nbuf_headers++;
+ bp = &buf_headers[i];
+ bufhdrinit(bp);
+
+ BLISTNONE(bp);
+ dp = &bufqueues[BQ_EMPTY];
+ bp->b_whichq = BQ_EMPTY;
+ bp->b_timestamp = buf_timestamp();
+ binsheadfree(bp, dp, BQ_EMPTY);
+ binshash(bp, &invalhash);
+ }
+ boot_nbuf_headers = nbuf_headers;
+
+ TAILQ_INIT(&iobufqueue);
+ TAILQ_INIT(&delaybufqueue);
+
+ for (; i < nbuf_headers + niobuf_headers; i++) {
+ bp = &buf_headers[i];
+ bufhdrinit(bp);
+ bp->b_whichq = -1;
+ binsheadfree(bp, &iobufqueue, -1);
+ }
+
+ /*
+ * allocate lock group attribute and group
+ */
+ buf_mtx_grp_attr = lck_grp_attr_alloc_init();
+ buf_mtx_grp = lck_grp_alloc_init("buffer cache", buf_mtx_grp_attr);
+
+ /*
+ * allocate the lock attribute
+ */
+ buf_mtx_attr = lck_attr_alloc_init();
+
+ /*
+ * allocate and initialize mutex's for the buffer and iobuffer pools
+ */
+ buf_mtxp = lck_mtx_alloc_init(buf_mtx_grp, buf_mtx_attr);
+ iobuffer_mtxp = lck_mtx_alloc_init(buf_mtx_grp, buf_mtx_attr);
+ buf_gc_callout = lck_mtx_alloc_init(buf_mtx_grp, buf_mtx_attr);
+
+ if (iobuffer_mtxp == NULL) {
+ panic("couldn't create iobuffer mutex");
+ }
+
+ if (buf_mtxp == NULL) {
+ panic("couldn't create buf mutex");
+ }
+
+ if (buf_gc_callout == NULL) {
+ panic("couldn't create buf_gc_callout mutex");
+ }
+
+ /*
+ * allocate and initialize cluster specific global locks...
+ */
+ cluster_init();
+
+ printf("using %d buffer headers and %d cluster IO buffer headers\n",
+ nbuf_headers, niobuf_headers);
+
+ /* Set up zones used by the buffer cache */
+ bufzoneinit();
+
+ /* start the bcleanbuf() thread */
+ bcleanbuf_thread_init();
+
+ /* Register a callout for relieving vm pressure */
+ if (vm_set_buffer_cleanup_callout(buffer_cache_gc) != KERN_SUCCESS) {
+ panic("Couldn't register buffer cache callout for vm pressure!\n");
+ }
+}
+
+/*
+ * Zones for the meta data buffers