X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/fe8ab488e9161c46dd9885d58fc52996dc0249ff..d26ffc64f583ab2d29df48f13518685602bc8832:/bsd/vfs/vfs_bio.c diff --git a/bsd/vfs/vfs_bio.c b/bsd/vfs/vfs_bio.c index c6e919d9e..c1019a327 100644 --- a/bsd/vfs/vfs_bio.c +++ b/bsd/vfs/vfs_bio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2016 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -95,6 +95,7 @@ #include #include /* fslog_io_error() */ +#include /* dk_error_description_t */ #include #include @@ -110,7 +111,6 @@ #include #include -#include int bcleanbuf(buf_t bp, boolean_t discard); static int brecover_data(buf_t bp); @@ -130,7 +130,9 @@ static buf_t buf_create_shadow_internal(buf_t bp, boolean_t force_copy, uintptr_t external_storage, void (*iodone)(buf_t, void *), void *arg, int priv); -__private_extern__ int bdwrite_internal(buf_t, int); +int bdwrite_internal(buf_t, int); + +extern void disk_conditioner_delay(buf_t, int, int, uint64_t); /* zone allocated buffer headers */ static void bufzoneinit(void); @@ -171,9 +173,18 @@ static lck_attr_t *buf_mtx_attr; static lck_grp_attr_t *buf_mtx_grp_attr; static lck_mtx_t *iobuffer_mtxp; static lck_mtx_t *buf_mtxp; +static lck_mtx_t *buf_gc_callout; static int buf_busycount; +#define FS_BUFFER_CACHE_GC_CALLOUTS_MAX_SIZE 16 +typedef struct { + void (* callout)(int, void *); + void *context; +} fs_buffer_cache_gc_callout_t; + +fs_buffer_cache_gc_callout_t fs_callouts[FS_BUFFER_CACHE_GC_CALLOUTS_MAX_SIZE] = { {NULL, NULL} }; + static __inline__ int buf_timestamp(void) { @@ -366,9 +377,14 @@ buf_markfua(buf_t bp) { } #if CONFIG_PROTECT -void -buf_setcpaddr(buf_t bp, struct cprotect *entry) { - bp->b_attr.ba_cpentry = entry; +cpx_t bufattr_cpx(bufattr_t bap) +{ + return bap->ba_cpx; +} + +void bufattr_setcpx(bufattr_t bap, cpx_t cpx) +{ + bap->ba_cpx = cpx; } void @@ -376,46 +392,38 @@ buf_setcpoff (buf_t bp, uint64_t foffset) { bp->b_attr.ba_cp_file_off = foffset; } -void * -bufattr_cpaddr(bufattr_t bap) { - return (bap->ba_cpentry); -} - uint64_t bufattr_cpoff(bufattr_t bap) { - return (bap->ba_cp_file_off); -} - -void -bufattr_setcpaddr(bufattr_t bap, void *cp_entry_addr) { - bap->ba_cpentry = cp_entry_addr; + return bap->ba_cp_file_off; } void bufattr_setcpoff(bufattr_t bap, uint64_t foffset) { - bap->ba_cp_file_off = foffset; + bap->ba_cp_file_off = foffset; } -#else -void * -bufattr_cpaddr(bufattr_t bap __unused) { - return NULL; -} +#else // !CONTECT_PROTECT uint64_t bufattr_cpoff(bufattr_t bap __unused) { return 0; } -void -bufattr_setcpaddr(bufattr_t bap __unused, void *cp_entry_addr __unused) { -} - void bufattr_setcpoff(__unused bufattr_t bap, __unused uint64_t foffset) { return; } -#endif /* CONFIG_PROTECT */ + +struct cpx *bufattr_cpx(__unused bufattr_t bap) +{ + return NULL; +} + +void bufattr_setcpx(__unused bufattr_t bap, __unused struct cpx *cpx) +{ +} + +#endif /* !CONFIG_PROTECT */ bufattr_t bufattr_alloc() { @@ -485,10 +493,16 @@ bufattr_markmeta(bufattr_t bap) { } int +#if !CONFIG_EMBEDDED bufattr_delayidlesleep(bufattr_t bap) +#else /* !CONFIG_EMBEDDED */ +bufattr_delayidlesleep(__unused bufattr_t bap) +#endif /* !CONFIG_EMBEDDED */ { +#if !CONFIG_EMBEDDED if ( (bap->ba_flags & BA_DELAYIDLESLEEP) ) return 1; +#endif /* !CONFIG_EMBEDDED */ return 0; } @@ -685,6 +699,8 @@ buf_callback(buf_t bp) errno_t buf_setcallback(buf_t bp, void (*callback)(buf_t, void *), void *transaction) { + assert(!ISSET(bp->b_flags, B_FILTER) && ISSET(bp->b_lflags, BL_BUSY)); + if (callback) bp->b_flags |= (B_CALL | B_ASYNC); else @@ -920,6 +936,8 @@ void buf_setfilter(buf_t bp, void (*filter)(buf_t, void *), void *transaction, void (**old_iodone)(buf_t, void *), void **old_transaction) { + assert(ISSET(bp->b_lflags, BL_BUSY)); + if (old_iodone) *old_iodone = bp->b_iodone; if (old_transaction) @@ -1317,9 +1335,10 @@ buf_strategy(vnode_t devvp, void *ap) #if CONFIG_PROTECT /* Capture f_offset in the bufattr*/ - if (bp->b_attr.ba_cpentry != 0) { + cpx_t cpx = bufattr_cpx(buf_attr(bp)); + if (cpx) { /* No need to go here for older EAs */ - if(bp->b_attr.ba_cpentry->cp_flags & CP_OFF_IV_ENABLED) { + if(cpx_use_offset_for_iv(cpx) && !cpx_synthetic_offset_for_iv(cpx)) { off_t f_offset; if ((error = VNOP_BLKTOOFF(bp->b_vp, bp->b_lblkno, &f_offset))) return error; @@ -1327,7 +1346,8 @@ buf_strategy(vnode_t devvp, void *ap) /* * Attach the file offset to this buffer. The * bufattr attributes will be passed down the stack - * until they reach IOFlashStorage. IOFlashStorage + * until they reach the storage driver (whether + * IOFlashStorage, ASP, or IONVMe). The driver * will retain the offset in a local variable when it * issues its I/Os to the NAND controller. * @@ -1336,8 +1356,13 @@ buf_strategy(vnode_t devvp, void *ap) * case, LwVM will update this field when it dispatches * each I/O to IOFlashStorage. But from our perspective * we have only issued a single I/O. + * + * In the case of APFS we do not bounce through another + * intermediate layer (such as CoreStorage). APFS will + * issue the I/Os directly to the block device / IOMedia + * via buf_strategy on the specfs node. */ - bufattr_setcpoff (&(bp->b_attr), (u_int64_t)f_offset); + buf_setcpoff(bp, f_offset); CP_DEBUG((CPDBG_OFFSET_IO | DBG_FUNC_NONE), (uint32_t) f_offset, (uint32_t) bp->b_lblkno, (uint32_t) bp->b_blkno, (uint32_t) bp->b_bcount, 0); } } @@ -1361,7 +1386,7 @@ buf_strategy(vnode_t devvp, void *ap) buf_t buf_alloc(vnode_t vp) { - return(alloc_io_buf(vp, 0)); + return(alloc_io_buf(vp, is_vm_privileged())); } void @@ -1989,6 +2014,7 @@ bufinit(void) */ buf_mtxp = lck_mtx_alloc_init(buf_mtx_grp, buf_mtx_attr); iobuffer_mtxp = lck_mtx_alloc_init(buf_mtx_grp, buf_mtx_attr); + buf_gc_callout = lck_mtx_alloc_init(buf_mtx_grp, buf_mtx_attr); if (iobuffer_mtxp == NULL) panic("couldn't create iobuffer mutex"); @@ -1996,6 +2022,9 @@ bufinit(void) if (buf_mtxp == NULL) panic("couldn't create buf mutex"); + if (buf_gc_callout == NULL) + panic("couldn't create buf_gc_callout mutex"); + /* * allocate and initialize cluster specific global locks... */ @@ -2022,7 +2051,7 @@ bufinit(void) */ #define MINMETA 512 -#define MAXMETA 8192 +#define MAXMETA 16384 struct meta_zone_entry { zone_t mz_zone; @@ -2037,6 +2066,7 @@ struct meta_zone_entry meta_zones[] = { {NULL, (MINMETA * 4), 16 * (MINMETA * 4), "buf.2048" }, {NULL, (MINMETA * 8), 512 * (MINMETA * 8), "buf.4096" }, {NULL, (MINMETA * 16), 512 * (MINMETA * 16), "buf.8192" }, + {NULL, (MINMETA * 32), 512 * (MINMETA * 32), "buf.16384" }, {NULL, 0, 0, "" } /* End */ }; @@ -2275,12 +2305,7 @@ buf_bwrite(buf_t bp) } /* Release the buffer. */ - // XXXdbg - only if the unused bit is set - if (!ISSET(bp->b_flags, B_NORELSE)) { - buf_brelse(bp); - } else { - CLR(bp->b_flags, B_NORELSE); - } + buf_brelse(bp); return (rv); } else { @@ -2313,7 +2338,7 @@ vn_bwrite(struct vnop_bwrite_args *ap) * buffers faster than the disks can service. Doing a buf_bawrite() in * cases where we have "too many" outstanding buf_bdwrite()s avoids that. */ -__private_extern__ int +int bdwrite_internal(buf_t bp, int return_error) { proc_t p = current_proc(); @@ -2447,7 +2472,7 @@ buf_brelse_shadow(buf_t bp) lck_mtx_lock_spin(buf_mtxp); - bp_head = (buf_t)bp->b_orig; + __IGNORE_WCASTALIGN(bp_head = (buf_t)bp->b_orig); if (bp_head->b_whichq != -1) panic("buf_brelse_shadow: bp_head on freelist %d\n", bp_head->b_whichq); @@ -2612,12 +2637,13 @@ buf_brelse(buf_t bp) if (upl == NULL) { if ( !ISSET(bp->b_flags, B_INVAL)) { - kret = ubc_create_upl(bp->b_vp, + kret = ubc_create_upl_kernel(bp->b_vp, ubc_blktooff(bp->b_vp, bp->b_lblkno), bp->b_bufsize, &upl, NULL, - UPL_PRECIOUS); + UPL_PRECIOUS, + VM_KERN_MEMORY_FILE); if (kret != KERN_SUCCESS) panic("brelse: Failed to create UPL"); @@ -2939,7 +2965,6 @@ start: return (NULL); goto start; /*NOTREACHED*/ - break; default: /* @@ -2950,6 +2975,8 @@ start: break; } } else { + int clear_bdone; + /* * buffer in core and not busy */ @@ -2968,8 +2995,41 @@ start: if ( (bp->b_upl) ) panic("buffer has UPL, but not marked BUSY: %p", bp); - if ( !ret_only_valid && bp->b_bufsize != size) - allocbuf(bp, size); + clear_bdone = FALSE; + if (!ret_only_valid) { + /* + * If the number bytes that are valid is going + * to increase (even if we end up not doing a + * reallocation through allocbuf) we have to read + * the new size first. + * + * This is required in cases where we doing a read + * modify write of a already valid data on disk but + * in cases where the data on disk beyond (blkno + b_bcount) + * is invalid, we may end up doing extra I/O. + */ + if (operation == BLK_META && bp->b_bcount < size) { + /* + * Since we are going to read in the whole size first + * we first have to ensure that any pending delayed write + * is flushed to disk first. + */ + if (ISSET(bp->b_flags, B_DELWRI)) { + CLR(bp->b_flags, B_CACHE); + buf_bwrite(bp); + goto start; + } + /* + * clear B_DONE before returning from + * this function so that the caller can + * can issue a read for the new size. + */ + clear_bdone = TRUE; + } + + if (bp->b_bufsize != size) + allocbuf(bp, size); + } upl_flags = 0; switch (operation) { @@ -2983,12 +3043,13 @@ start: case BLK_READ: upl_flags |= UPL_PRECIOUS; if (UBCINFOEXISTS(bp->b_vp) && bp->b_bufsize) { - kret = ubc_create_upl(vp, + kret = ubc_create_upl_kernel(vp, ubc_blktooff(vp, bp->b_lblkno), bp->b_bufsize, &upl, &pl, - upl_flags); + upl_flags, + VM_KERN_MEMORY_FILE); if (kret != KERN_SUCCESS) panic("Failed to create UPL"); @@ -3021,6 +3082,9 @@ start: /*NOTREACHED*/ break; } + + if (clear_bdone) + CLR(bp->b_flags, B_DONE); } } else { /* not incore() */ int queue = BQ_EMPTY; /* Start with no preference */ @@ -3104,18 +3168,38 @@ start: size_t contig_bytes; int bmap_flags; +#if DEVELOPMENT || DEBUG + /* + * Apple implemented file systems use UBC excludively; they should + * not call in here." + */ + const char* excldfs[] = {"hfs", "afpfs", "smbfs", "acfs", + "exfat", "msdos", "webdav", NULL}; + + for (int i = 0; excldfs[i] != NULL; i++) { + if (vp->v_mount && + !strcmp(vp->v_mount->mnt_vfsstat.f_fstypename, + excldfs[i])) { + panic("%s %s calls buf_getblk", + excldfs[i], + operation == BLK_READ ? "BLK_READ" : "BLK_WRITE"); + } + } +#endif + if ( (bp->b_upl) ) panic("bp already has UPL: %p",bp); f_offset = ubc_blktooff(vp, blkno); upl_flags |= UPL_PRECIOUS; - kret = ubc_create_upl(vp, + kret = ubc_create_upl_kernel(vp, f_offset, bp->b_bufsize, &upl, &pl, - upl_flags); + upl_flags, + VM_KERN_MEMORY_FILE); if (kret != KERN_SUCCESS) panic("Failed to create UPL"); @@ -3355,7 +3439,7 @@ allocbuf(buf_t bp, int size) *(void **)(&bp->b_datap) = grab_memory_for_meta_buf(nsize); } else { bp->b_datap = (uintptr_t)NULL; - kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); + kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size, VM_KERN_MEMORY_FILE); CLR(bp->b_flags, B_ZALLOC); } bcopy((void *)elem, (caddr_t)bp->b_datap, bp->b_bufsize); @@ -3368,7 +3452,7 @@ allocbuf(buf_t bp, int size) if ((vm_size_t)bp->b_bufsize < desired_size) { /* reallocate to a bigger size */ bp->b_datap = (uintptr_t)NULL; - kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); + kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size, VM_KERN_MEMORY_FILE); bcopy((const void *)elem, (caddr_t)bp->b_datap, bp->b_bufsize); kmem_free(kernel_map, elem, bp->b_bufsize); } else { @@ -3384,7 +3468,7 @@ allocbuf(buf_t bp, int size) *(void **)(&bp->b_datap) = grab_memory_for_meta_buf(nsize); SET(bp->b_flags, B_ZALLOC); } else - kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); + kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size, VM_KERN_MEMORY_FILE); } if (bp->b_datap == 0) @@ -3660,8 +3744,6 @@ bcleanbuf(buf_t bp, boolean_t discard) buf_release_credentials(bp); - bp->b_redundancy_flags = 0; - /* If discarding, just move to the empty queue */ if (discard) { lck_mtx_lock_spin(buf_mtxp); @@ -3676,6 +3758,7 @@ bcleanbuf(buf_t bp, boolean_t discard) bp->b_bufsize = 0; bp->b_datap = (uintptr_t)NULL; bp->b_upl = (void *)NULL; + bp->b_fsprivate = (void *)NULL; /* * preserve the state of whether this buffer * was allocated on the fly or not... @@ -3688,6 +3771,7 @@ bcleanbuf(buf_t bp, boolean_t discard) #endif bp->b_lflags = BL_BUSY; bp->b_flags = (bp->b_flags & B_HDRALLOC); + bp->b_redundancy_flags = 0; bp->b_dev = NODEV; bp->b_blkno = bp->b_lblkno = 0; bp->b_iodone = NULL; @@ -3895,6 +3979,8 @@ buf_biodone(buf_t bp) { mount_t mp; struct bufattr *bap; + struct timeval real_elapsed; + uint64_t real_elapsed_usec = 0; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 387)) | DBG_FUNC_START, bp, bp->b_datap, bp->b_flags, 0, 0); @@ -3910,6 +3996,16 @@ buf_biodone(buf_t bp) mp = NULL; } + if (ISSET(bp->b_flags, B_ERROR)) { + if (mp && (MNT_ROOTFS & mp->mnt_flag)) { + dk_error_description_t desc; + bzero(&desc, sizeof(desc)); + desc.description = panic_disk_error_description; + desc.description_size = panic_disk_error_description_size; + VNOP_IOCTL(mp->mnt_devvp, DKIOCGETERRORDESCRIPTION, (caddr_t)&desc, 0, vfs_context_kernel()); + } + } + if (mp && (bp->b_flags & B_READ) == 0) { update_last_io_time(mp); INCR_PENDING_IO(-(pending_io_t)buf_count(bp), mp->mnt_pending_write_size); @@ -3917,6 +4013,8 @@ buf_biodone(buf_t bp) INCR_PENDING_IO(-(pending_io_t)buf_count(bp), mp->mnt_pending_read_size); } + throttle_info_end_io(bp); + if (kdebug_enable) { int code = DKIO_DONE; int io_tier = GET_BUFATTR_IO_TIER(bap); @@ -3942,10 +4040,19 @@ buf_biodone(buf_t bp) if (bap->ba_flags & BA_NOCACHE) code |= DKIO_NOCACHE; + if (bap->ba_flags & BA_IO_TIER_UPGRADE) { + code |= DKIO_TIER_UPGRADE; + } + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_COMMON, FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE, buf_kernel_addrperm_addr(bp), (uintptr_t)VM_KERNEL_ADDRPERM(bp->b_vp), bp->b_resid, bp->b_error, 0); } + microuptime(&real_elapsed); + timevalsub(&real_elapsed, &bp->b_timestamp_tv); + real_elapsed_usec = real_elapsed.tv_sec * USEC_PER_SEC + real_elapsed.tv_usec; + disk_conditioner_delay(bp, 1, bp->b_bcount, real_elapsed_usec); + /* * I/O was done, so don't believe * the DIRTY state from VM anymore... @@ -3953,7 +4060,7 @@ buf_biodone(buf_t bp) * indicators */ CLR(bp->b_flags, (B_WASDIRTY | B_PASSIVE)); - CLR(bap->ba_flags, (BA_META | BA_NOCACHE | BA_DELAYIDLESLEEP)); + CLR(bap->ba_flags, (BA_META | BA_NOCACHE | BA_DELAYIDLESLEEP | BA_IO_TIER_UPGRADE)); SET_BUFATTR_IO_TIER(bap, 0); @@ -4113,20 +4220,48 @@ vfs_bufstats() #define NRESERVEDIOBUFS 128 +#define MNT_VIRTUALDEV_MAX_IOBUFS 16 +#define VIRTUALDEV_MAX_IOBUFS ((40*niobuf_headers)/100) buf_t alloc_io_buf(vnode_t vp, int priv) { buf_t bp; + mount_t mp = NULL; + int alloc_for_virtualdev = FALSE; lck_mtx_lock_spin(iobuffer_mtxp); + /* + * We subject iobuf requests for diskimages to additional restrictions. + * + * a) A single diskimage mount cannot use up more than + * MNT_VIRTUALDEV_MAX_IOBUFS. However,vm privileged (pageout) requests + * are not subject to this restriction. + * b) iobuf headers used by all diskimage headers by all mount + * points cannot exceed VIRTUALDEV_MAX_IOBUFS. + */ + if (vp && ((mp = vp->v_mount)) && mp != dead_mountp && + mp->mnt_kern_flag & MNTK_VIRTUALDEV) { + alloc_for_virtualdev = TRUE; + while ((!priv && mp->mnt_iobufinuse > MNT_VIRTUALDEV_MAX_IOBUFS) || + bufstats.bufs_iobufinuse_vdev > VIRTUALDEV_MAX_IOBUFS) { + bufstats.bufs_iobufsleeps++; + + need_iobuffer = 1; + (void)msleep(&need_iobuffer, iobuffer_mtxp, + PSPIN | (PRIBIO+1), (const char *)"alloc_io_buf (1)", + NULL); + } + } + while (((niobuf_headers - NRESERVEDIOBUFS < bufstats.bufs_iobufinuse) && !priv) || (bp = iobufqueue.tqh_first) == NULL) { bufstats.bufs_iobufsleeps++; need_iobuffer = 1; - (void) msleep(&need_iobuffer, iobuffer_mtxp, PSPIN | (PRIBIO+1), (const char *)"alloc_io_buf", NULL); + (void)msleep(&need_iobuffer, iobuffer_mtxp, PSPIN | (PRIBIO+1), + (const char *)"alloc_io_buf (2)", NULL); } TAILQ_REMOVE(&iobufqueue, bp, b_freelist); @@ -4134,6 +4269,11 @@ alloc_io_buf(vnode_t vp, int priv) if (bufstats.bufs_iobufinuse > bufstats.bufs_iobufmax) bufstats.bufs_iobufmax = bufstats.bufs_iobufinuse; + if (alloc_for_virtualdev) { + mp->mnt_iobufinuse++; + bufstats.bufs_iobufinuse_vdev++; + } + lck_mtx_unlock(iobuffer_mtxp); /* @@ -4148,6 +4288,8 @@ alloc_io_buf(vnode_t vp, int priv) bp->b_datap = 0; bp->b_flags = 0; bp->b_lflags = BL_BUSY | BL_IOBUF; + if (alloc_for_virtualdev) + bp->b_lflags |= BL_IOBUF_VDEV; bp->b_redundancy_flags = 0; bp->b_blkno = bp->b_lblkno = 0; #ifdef JOE_DEBUG @@ -4160,6 +4302,7 @@ alloc_io_buf(vnode_t vp, int priv) bp->b_bcount = 0; bp->b_bufsize = 0; bp->b_upl = NULL; + bp->b_fsprivate = (void *)NULL; bp->b_vp = vp; bzero(&bp->b_attr, sizeof(struct bufattr)); @@ -4175,7 +4318,16 @@ alloc_io_buf(vnode_t vp, int priv) void free_io_buf(buf_t bp) { - int need_wakeup = 0; + int need_wakeup = 0; + int free_for_virtualdev = FALSE; + mount_t mp = NULL; + + /* Was this iobuf for a diskimage ? */ + if (bp->b_lflags & BL_IOBUF_VDEV) { + free_for_virtualdev = TRUE; + if (bp->b_vp) + mp = bp->b_vp->v_mount; + } /* * put buffer back on the head of the iobufqueue @@ -4208,6 +4360,12 @@ free_io_buf(buf_t bp) bufstats.bufs_iobufinuse--; + if (free_for_virtualdev) { + bufstats.bufs_iobufinuse_vdev--; + if (mp && mp != dead_mountp) + mp->mnt_iobufinuse--; + } + lck_mtx_unlock(iobuffer_mtxp); if (need_wakeup) @@ -4246,6 +4404,7 @@ bcleanbuf_thread_init(void) typedef int (*bcleanbufcontinuation)(int); +__attribute__((noreturn)) static void bcleanbuf_thread(void) { @@ -4343,12 +4502,13 @@ brecover_data(buf_t bp) upl_flags |= UPL_WILL_MODIFY; } - kret = ubc_create_upl(vp, + kret = ubc_create_upl_kernel(vp, ubc_blktooff(vp, bp->b_lblkno), bp->b_bufsize, &upl, &pl, - upl_flags); + upl_flags, + VM_KERN_MEMORY_FILE); if (kret != KERN_SUCCESS) panic("Failed to create UPL"); @@ -4375,6 +4535,50 @@ dump_buffer: return(0); } +int +fs_buffer_cache_gc_register(void (* callout)(int, void *), void *context) +{ + lck_mtx_lock(buf_gc_callout); + for (int i = 0; i < FS_BUFFER_CACHE_GC_CALLOUTS_MAX_SIZE; i++) { + if (fs_callouts[i].callout == NULL) { + fs_callouts[i].callout = callout; + fs_callouts[i].context = context; + lck_mtx_unlock(buf_gc_callout); + return 0; + } + } + + lck_mtx_unlock(buf_gc_callout); + return ENOMEM; +} + +int +fs_buffer_cache_gc_unregister(void (* callout)(int, void *), void *context) +{ + lck_mtx_lock(buf_gc_callout); + for (int i = 0; i < FS_BUFFER_CACHE_GC_CALLOUTS_MAX_SIZE; i++) { + if (fs_callouts[i].callout == callout && + fs_callouts[i].context == context) { + fs_callouts[i].callout = NULL; + fs_callouts[i].context = NULL; + } + } + lck_mtx_unlock(buf_gc_callout); + return 0; +} + +static void +fs_buffer_cache_gc_dispatch_callouts(int all) +{ + lck_mtx_lock(buf_gc_callout); + for(int i = 0; i < FS_BUFFER_CACHE_GC_CALLOUTS_MAX_SIZE; i++) { + if (fs_callouts[i].callout != NULL) { + fs_callouts[i].callout(all, fs_callouts[i].context); + } + } + lck_mtx_unlock(buf_gc_callout); +} + boolean_t buffer_cache_gc(int all) { @@ -4504,6 +4708,8 @@ buffer_cache_gc(int all) lck_mtx_unlock(buf_mtxp); + fs_buffer_cache_gc_dispatch_callouts(all); + return did_large_zfree; }