]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/vfs/vfs_cluster.c
xnu-7195.50.7.100.1.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_cluster.c
index 181614fcbf156111948e4acfa2341bab8b4e8f2f..5de58feb966e3e887b83e425af9674d8b5d54383 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -67,7 +67,7 @@
 #include <sys/mount_internal.h>
 #include <sys/vnode_internal.h>
 #include <sys/trace.h>
-#include <sys/malloc.h>
+#include <kern/kalloc.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/resourcevar.h>
@@ -157,12 +157,15 @@ struct cl_direct_read_lock {
 static LIST_HEAD(cl_direct_read_locks, cl_direct_read_lock)
 cl_direct_read_locks[CL_DIRECT_READ_LOCK_BUCKETS];
 
-static lck_spin_t cl_direct_read_spin_lock;
+static LCK_GRP_DECLARE(cl_mtx_grp, "cluster I/O");
+static LCK_MTX_DECLARE(cl_transaction_mtxp, &cl_mtx_grp);
+static LCK_SPIN_DECLARE(cl_direct_read_spin_lock, &cl_mtx_grp);
 
-static lck_grp_t        *cl_mtx_grp;
-static lck_attr_t       *cl_mtx_attr;
-static lck_grp_attr_t   *cl_mtx_grp_attr;
-static lck_mtx_t        *cl_transaction_mtxp;
+static ZONE_DECLARE(cl_rd_zone, "cluster_read",
+    sizeof(struct cl_readahead), ZC_ZFREE_CLEARMEM | ZC_NOENCRYPT);
+
+static ZONE_DECLARE(cl_wr_zone, "cluster_write",
+    sizeof(struct cl_writebehind), ZC_ZFREE_CLEARMEM | ZC_NOENCRYPT);
 
 #define IO_UNKNOWN      0
 #define IO_DIRECT       1
@@ -194,18 +197,18 @@ static void cluster_read_upl_release(upl_t upl, int start_pg, int last_pg, int t
 static int cluster_copy_ubc_data_internal(vnode_t vp, struct uio *uio, int *io_resid, int mark_dirty, int take_reference);
 
 static int cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t filesize, int flags,
-    int (*)(buf_t, void *), void *callback_arg);
+    int (*)(buf_t, void *), void *callback_arg) __attribute__((noinline));
 static int cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, u_int32_t *read_length,
-    int flags, int (*)(buf_t, void *), void *callback_arg);
+    int flags, int (*)(buf_t, void *), void *callback_arg) __attribute__((noinline));
 static int cluster_read_contig(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, u_int32_t *read_length,
-    int (*)(buf_t, void *), void *callback_arg, int flags);
+    int (*)(buf_t, void *), void *callback_arg, int flags) __attribute__((noinline));
 
 static int cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t oldEOF, off_t newEOF,
-    off_t headOff, off_t tailOff, int flags, int (*)(buf_t, void *), void *callback_arg);
+    off_t headOff, off_t tailOff, int flags, int (*)(buf_t, void *), void *callback_arg) __attribute__((noinline));
 static int cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF,
-    int *write_type, u_int32_t *write_length, int flags, int (*)(buf_t, void *), void *callback_arg);
+    int *write_type, u_int32_t *write_length, int flags, int (*)(buf_t, void *), void *callback_arg) __attribute__((noinline));
 static int cluster_write_contig(vnode_t vp, struct uio *uio, off_t newEOF,
-    int *write_type, u_int32_t *write_length, int (*)(buf_t, void *), void *callback_arg, int bflag);
+    int *write_type, u_int32_t *write_length, int (*)(buf_t, void *), void *callback_arg, int bflag) __attribute__((noinline));
 
 static void cluster_update_state_internal(vnode_t vp, struct cl_extent *cl, int flags, boolean_t defer_writes, boolean_t *first_pass,
     off_t write_off, int write_cnt, off_t newEOF, int (*callback)(buf_t, void *), void *callback_arg, boolean_t vm_initiated);
@@ -276,17 +279,17 @@ int (*bootcache_contains_block)(dev_t device, u_int64_t blkno) = NULL;
 #define WRITE_BEHIND            1
 #define WRITE_BEHIND_SSD        1
 
-#if CONFIG_EMBEDDED
+#if !defined(XNU_TARGET_OS_OSX)
 #define PREFETCH                1
 #define PREFETCH_SSD            1
 uint32_t speculative_prefetch_max = (2048 * 1024);              /* maximum bytes in a specluative read-ahead */
 uint32_t speculative_prefetch_max_iosize = (512 * 1024);        /* maximum I/O size to use in a specluative read-ahead */
-#else
+#else /* XNU_TARGET_OS_OSX */
 #define PREFETCH                3
 #define PREFETCH_SSD            2
 uint32_t speculative_prefetch_max = (MAX_UPL_SIZE_BYTES * 3);   /* maximum bytes in a specluative read-ahead */
 uint32_t speculative_prefetch_max_iosize = (512 * 1024);        /* maximum I/O size to use in a specluative read-ahead on SSDs*/
-#endif
+#endif /* ! XNU_TARGET_OS_OSX */
 
 
 #define IO_SCALE(vp, base)              (vp->v_mount->mnt_ioscale * (base))
@@ -312,25 +315,6 @@ SYSCTL_INT(_debug, OID_AUTO, lowpri_throttle_max_iosize, CTLFLAG_RW | CTLFLAG_LO
 void
 cluster_init(void)
 {
-       /*
-        * allocate lock group attribute and group
-        */
-       cl_mtx_grp_attr = lck_grp_attr_alloc_init();
-       cl_mtx_grp = lck_grp_alloc_init("cluster I/O", cl_mtx_grp_attr);
-
-       /*
-        * allocate the lock attribute
-        */
-       cl_mtx_attr = lck_attr_alloc_init();
-
-       cl_transaction_mtxp = lck_mtx_alloc_init(cl_mtx_grp, cl_mtx_attr);
-
-       if (cl_transaction_mtxp == NULL) {
-               panic("cluster_init: failed to allocate cl_transaction_mtxp");
-       }
-
-       lck_spin_init(&cl_direct_read_spin_lock, cl_mtx_grp, cl_mtx_attr);
-
        for (int i = 0; i < CL_DIRECT_READ_LOCK_BUCKETS; ++i) {
                LIST_INIT(&cl_direct_read_locks[i]);
        }
@@ -414,19 +398,17 @@ cluster_get_rap(vnode_t vp)
        ubc = vp->v_ubcinfo;
 
        if ((rap = ubc->cl_rahead) == NULL) {
-               MALLOC_ZONE(rap, struct cl_readahead *, sizeof *rap, M_CLRDAHEAD, M_WAITOK);
-
-               bzero(rap, sizeof *rap);
+               rap = zalloc_flags(cl_rd_zone, Z_WAITOK | Z_ZERO);
                rap->cl_lastr = -1;
-               lck_mtx_init(&rap->cl_lockr, cl_mtx_grp, cl_mtx_attr);
+               lck_mtx_init(&rap->cl_lockr, &cl_mtx_grp, LCK_ATTR_NULL);
 
                vnode_lock(vp);
 
                if (ubc->cl_rahead == NULL) {
                        ubc->cl_rahead = rap;
                } else {
-                       lck_mtx_destroy(&rap->cl_lockr, cl_mtx_grp);
-                       FREE_ZONE(rap, sizeof *rap, M_CLRDAHEAD);
+                       lck_mtx_destroy(&rap->cl_lockr, &cl_mtx_grp);
+                       zfree(cl_rd_zone, rap);
                        rap = ubc->cl_rahead;
                }
                vnode_unlock(vp);
@@ -465,18 +447,17 @@ cluster_get_wbp(vnode_t vp, int flags)
                        return (struct cl_writebehind *)NULL;
                }
 
-               MALLOC_ZONE(wbp, struct cl_writebehind *, sizeof *wbp, M_CLWRBEHIND, M_WAITOK);
+               wbp = zalloc_flags(cl_wr_zone, Z_WAITOK | Z_ZERO);
 
-               bzero(wbp, sizeof *wbp);
-               lck_mtx_init(&wbp->cl_lockw, cl_mtx_grp, cl_mtx_attr);
+               lck_mtx_init(&wbp->cl_lockw, &cl_mtx_grp, LCK_ATTR_NULL);
 
                vnode_lock(vp);
 
                if (ubc->cl_wbehind == NULL) {
                        ubc->cl_wbehind = wbp;
                } else {
-                       lck_mtx_destroy(&wbp->cl_lockw, cl_mtx_grp);
-                       FREE_ZONE(wbp, sizeof *wbp, M_CLWRBEHIND);
+                       lck_mtx_destroy(&wbp->cl_lockw, &cl_mtx_grp);
+                       zfree(cl_wr_zone, wbp);
                        wbp = ubc->cl_wbehind;
                }
                vnode_unlock(vp);
@@ -767,7 +748,7 @@ cluster_iodone(buf_t bp, void *callback_arg)
            cbp_head, bp->b_lblkno, bp->b_bcount, bp->b_flags, 0);
 
        if (cbp_head->b_trans_next || !(cbp_head->b_flags & B_EOT)) {
-               lck_mtx_lock_spin(cl_transaction_mtxp);
+               lck_mtx_lock_spin(&cl_transaction_mtxp);
 
                bp->b_flags |= B_TDONE;
 
@@ -780,7 +761,7 @@ cluster_iodone(buf_t bp, void *callback_arg)
                                KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
                                    cbp_head, cbp, cbp->b_bcount, cbp->b_flags, 0);
 
-                               lck_mtx_unlock(cl_transaction_mtxp);
+                               lck_mtx_unlock(&cl_transaction_mtxp);
 
                                return 0;
                        }
@@ -789,7 +770,7 @@ cluster_iodone(buf_t bp, void *callback_arg)
                                KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
                                    cbp_head, cbp, cbp->b_bcount, cbp->b_flags, 0);
 
-                               lck_mtx_unlock(cl_transaction_mtxp);
+                               lck_mtx_unlock(&cl_transaction_mtxp);
                                wakeup(cbp);
 
                                return 0;
@@ -799,7 +780,7 @@ cluster_iodone(buf_t bp, void *callback_arg)
                                transaction_complete = TRUE;
                        }
                }
-               lck_mtx_unlock(cl_transaction_mtxp);
+               lck_mtx_unlock(&cl_transaction_mtxp);
 
                if (transaction_complete == FALSE) {
                        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
@@ -1024,7 +1005,7 @@ cluster_wait_IO(buf_t cbp_head, int async)
                bool done = true;
                buf_t last = NULL;
 
-               lck_mtx_lock_spin(cl_transaction_mtxp);
+               lck_mtx_lock_spin(&cl_transaction_mtxp);
 
                for (cbp = cbp_head; cbp; last = cbp, cbp = cbp->b_trans_next) {
                        if (!ISSET(cbp->b_flags, B_TDONE)) {
@@ -1037,7 +1018,7 @@ cluster_wait_IO(buf_t cbp_head, int async)
 
                        DTRACE_IO1(wait__start, buf_t, last);
                        do {
-                               msleep(last, cl_transaction_mtxp, PSPIN | (PRIBIO + 1), "cluster_wait_IO", NULL);
+                               msleep(last, &cl_transaction_mtxp, PSPIN | (PRIBIO + 1), "cluster_wait_IO", NULL);
 
                                /*
                                 * We should only have been woken up if all the
@@ -1056,7 +1037,7 @@ cluster_wait_IO(buf_t cbp_head, int async)
                        last->b_trans_next = NULL;
                }
 
-               lck_mtx_unlock(cl_transaction_mtxp);
+               lck_mtx_unlock(&cl_transaction_mtxp);
        } else { // !async
                for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next) {
                        buf_biowait(cbp);
@@ -1299,7 +1280,7 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                 * so we'll go ahead and zero out the portion of the page we can't
                 * read in from the file
                 */
-               zero_offset = upl_offset + non_rounded_size;
+               zero_offset = (int)(upl_offset + non_rounded_size);
        } else if (!ISSET(flags, CL_READ) && ISSET(flags, CL_DIRECT_IO)) {
                assert(ISSET(flags, CL_COMMIT));
 
@@ -1430,7 +1411,7 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
 
                        if (cbp_head) {
                                buf_t prev_cbp;
-                               int   bytes_in_last_page;
+                               uint32_t   bytes_in_last_page;
 
                                /*
                                 * first we have to wait for the the current outstanding I/Os
@@ -1510,11 +1491,11 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                                        trans_count = 0;
                                }
                        }
-                       if (vnode_pageout(vp, upl, trunc_page(upl_offset), trunc_page_64(f_offset), PAGE_SIZE, pageout_flags, NULL) != PAGER_SUCCESS) {
+                       if (vnode_pageout(vp, upl, (upl_offset_t)trunc_page(upl_offset), trunc_page_64(f_offset), PAGE_SIZE, pageout_flags, NULL) != PAGER_SUCCESS) {
                                error = EINVAL;
                        }
                        e_offset = round_page_64(f_offset + 1);
-                       io_size = e_offset - f_offset;
+                       io_size = (u_int)(e_offset - f_offset);
 
                        f_offset   += io_size;
                        upl_offset += io_size;
@@ -1588,7 +1569,7 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                                 */
                                bytes_to_zero = non_rounded_size;
                                if (!(flags & CL_NOZERO)) {
-                                       bytes_to_zero = (((upl_offset + io_size) + (PAGE_SIZE - 1)) & ~PAGE_MASK) - upl_offset;
+                                       bytes_to_zero = (int)((((upl_offset + io_size) + (PAGE_SIZE - 1)) & ~PAGE_MASK) - upl_offset);
                                }
 
                                zero_offset = 0;
@@ -1598,7 +1579,7 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
 
                        pg_count = 0;
 
-                       cluster_zero(upl, upl_offset, bytes_to_zero, real_bp);
+                       cluster_zero(upl, (upl_offset_t)upl_offset, bytes_to_zero, real_bp);
 
                        if (cbp_head) {
                                int     pg_resid;
@@ -1611,7 +1592,7 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                                 */
                                commit_offset = (upl_offset + (PAGE_SIZE - 1)) & ~PAGE_MASK;
 
-                               pg_resid = commit_offset - upl_offset;
+                               pg_resid = (int)(commit_offset - upl_offset);
 
                                if (bytes_to_zero >= pg_resid) {
                                        /*
@@ -1654,7 +1635,8 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                        assert(!upl_associated_upl(upl));
 
                        if ((flags & CL_COMMIT) && pg_count) {
-                               ubc_upl_commit_range(upl, commit_offset, pg_count * PAGE_SIZE,
+                               ubc_upl_commit_range(upl, (upl_offset_t)commit_offset,
+                                   pg_count * PAGE_SIZE,
                                    UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
                        }
                        upl_offset += io_size;
@@ -1723,7 +1705,7 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                         * we can finally issue the i/o on the transaction.
                         */
                        if (aligned_ofs > upl_offset) {
-                               io_size = aligned_ofs - upl_offset;
+                               io_size = (u_int)(aligned_ofs - upl_offset);
                                pg_count--;
                        }
                }
@@ -1736,9 +1718,13 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                         * bufs from the alloc_io_buf pool
                         */
                        priv = 1;
-               } else if ((flags & CL_ASYNC) && !(flags & CL_PAGEOUT)) {
+               } else if ((flags & CL_ASYNC) && !(flags & CL_PAGEOUT) && !cbp_head) {
                        /*
                         * Throttle the speculative IO
+                        *
+                        * We can only throttle this if it is the first iobuf
+                        * for the transaction. alloc_io_buf implements
+                        * additional restrictions for diskimages anyway.
                         */
                        priv = 0;
                } else {
@@ -1775,7 +1761,7 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                cbp->b_blkno  = blkno;
                cbp->b_bcount = io_size;
 
-               if (buf_setupl(cbp, upl, upl_offset)) {
+               if (buf_setupl(cbp, upl, (uint32_t)upl_offset)) {
                        panic("buf_setupl failed\n");
                }
 #if CONFIG_IOSCHED
@@ -1909,8 +1895,9 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                }
 
                if (ISSET(flags, CL_COMMIT)) {
-                       cluster_handle_associated_upl(iostate, upl, upl_offset,
-                           upl_end_offset - upl_offset);
+                       cluster_handle_associated_upl(iostate, upl,
+                           (upl_offset_t)upl_offset,
+                           (upl_size_t)(upl_end_offset - upl_offset));
                }
 
                // Free all the IO buffers in this transaction
@@ -1959,9 +1946,10 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                        int     upl_flags;
 
                        pg_offset  = upl_offset & PAGE_MASK;
-                       abort_size = (upl_end_offset - upl_offset + PAGE_MASK) & ~PAGE_MASK;
+                       abort_size = (int)((upl_end_offset - upl_offset + PAGE_MASK) & ~PAGE_MASK);
 
-                       upl_flags = cluster_ioerror(upl, upl_offset - pg_offset, abort_size, error, io_flags, vp);
+                       upl_flags = cluster_ioerror(upl, (int)(upl_offset - pg_offset),
+                           abort_size, error, io_flags, vp);
 
                        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 28)) | DBG_FUNC_NONE,
                            upl, upl_offset - pg_offset, abort_size, (error << 24) | upl_flags, 0);
@@ -2023,7 +2011,7 @@ cluster_read_prefetch(vnode_t vp, off_t f_offset, u_int size, off_t filesize, in
                return 0;
        }
        if ((off_t)size > (filesize - f_offset)) {
-               size = filesize - f_offset;
+               size = (u_int)(filesize - f_offset);
        }
        pages_in_prefetch = (size + (PAGE_SIZE - 1)) / PAGE_SIZE;
 
@@ -2082,7 +2070,7 @@ cluster_read_ahead(vnode_t vp, struct cl_extent *extent, off_t filesize, struct
                        return;
                }
        }
-       r_addr = max(extent->e_addr, rap->cl_maxra) + 1;
+       r_addr = MAX(extent->e_addr, rap->cl_maxra) + 1;
        f_offset = (off_t)(r_addr * PAGE_SIZE_64);
 
        size_of_prefetch = 0;
@@ -2105,7 +2093,7 @@ cluster_read_ahead(vnode_t vp, struct cl_extent *extent, off_t filesize, struct
                        if (read_size > max_prefetch / PAGE_SIZE) {
                                rap->cl_ralen = max_prefetch / PAGE_SIZE;
                        } else {
-                               rap->cl_ralen = read_size;
+                               rap->cl_ralen = (int)read_size;
                        }
                }
                size_of_prefetch = cluster_read_prefetch(vp, f_offset, rap->cl_ralen * PAGE_SIZE, filesize, callback, callback_arg, bflag);
@@ -2188,7 +2176,7 @@ cluster_pageout_ext(vnode_t vp, upl_t upl, upl_offset_t upl_offset, off_t f_offs
        if (size < max_size) {
                io_size = size;
        } else {
-               io_size = max_size;
+               io_size = (int)max_size;
        }
 
        rounded_size = (io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
@@ -2261,7 +2249,7 @@ cluster_pagein_ext(vnode_t vp, upl_t upl, upl_offset_t upl_offset, off_t f_offse
        if (size < max_size) {
                io_size = size;
        } else {
-               io_size = max_size;
+               io_size = (int)max_size;
        }
 
        rounded_size = (io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
@@ -2504,6 +2492,8 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in
        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_START,
            (int)uio->uio_offset, *write_length, (int)newEOF, 0, 0);
 
+       assert(vm_map_page_shift(current_map()) >= PAGE_SHIFT);
+
        max_upl_size = cluster_max_io_size(vp->v_mount, CL_WRITE);
 
        io_flag = CL_ASYNC | CL_PRESERVE | CL_COMMIT | CL_THROTTLE | CL_DIRECT_IO;
@@ -2525,7 +2515,7 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in
        iostate.io_error = 0;
        iostate.io_wanted = 0;
 
-       lck_mtx_init(&iostate.io_mtxp, cl_mtx_grp, cl_mtx_attr);
+       lck_mtx_init(&iostate.io_mtxp, &cl_mtx_grp, LCK_ATTR_NULL);
 
        mem_alignment_mask = (u_int32_t)vp->v_mount->mnt_alignmentmask;
        devblocksize = (u_int32_t)vp->v_mount->mnt_devblocksize;
@@ -2634,12 +2624,12 @@ next_dwrite:
                vm_map_t map = UIO_SEG_IS_USER_SPACE(uio->uio_segflg) ? current_map() : kernel_map;
                for (force_data_sync = 0; force_data_sync < 3; force_data_sync++) {
                        pages_in_pl = 0;
-                       upl_size = upl_needed_size;
+                       upl_size = (upl_size_t)upl_needed_size;
                        upl_flags = UPL_FILE_IO | UPL_COPYOUT_FROM | UPL_NO_SYNC |
                            UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE;
 
                        kret = vm_map_get_upl(map,
-                           (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)),
+                           vm_map_trunc_page(iov_base, vm_map_page_mask(map)),
                            &upl_size,
                            &upl,
                            NULL,
@@ -2832,7 +2822,7 @@ wait_for_dwrites:
                retval = iostate.io_error;
        }
 
-       lck_mtx_destroy(&iostate.io_mtxp, cl_mtx_grp);
+       lck_mtx_destroy(&iostate.io_mtxp, &cl_mtx_grp);
 
        if (io_throttled == TRUE && retval == 0) {
                retval = EAGAIN;
@@ -2901,7 +2891,7 @@ cluster_write_contig(vnode_t vp, struct uio *uio, off_t newEOF, int *write_type,
        iostate.io_error = 0;
        iostate.io_wanted = 0;
 
-       lck_mtx_init(&iostate.io_mtxp, cl_mtx_grp, cl_mtx_attr);
+       lck_mtx_init(&iostate.io_mtxp, &cl_mtx_grp, LCK_ATTR_NULL);
 
 next_cwrite:
        io_size = *write_length;
@@ -2912,13 +2902,13 @@ next_cwrite:
        upl_needed_size = upl_offset + io_size;
 
        pages_in_pl = 0;
-       upl_size = upl_needed_size;
+       upl_size = (upl_size_t)upl_needed_size;
        upl_flags = UPL_FILE_IO | UPL_COPYOUT_FROM | UPL_NO_SYNC |
            UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE;
 
        vm_map_t map = UIO_SEG_IS_USER_SPACE(uio->uio_segflg) ? current_map() : kernel_map;
        kret = vm_map_get_upl(map,
-           (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)),
+           vm_map_trunc_page(iov_base, vm_map_page_mask(map)),
            &upl_size, &upl[cur_upl], NULL, &pages_in_pl, &upl_flags, VM_KERN_MEMORY_FILE, 0);
 
        if (kret != KERN_SUCCESS) {
@@ -3045,7 +3035,7 @@ wait_for_cwrites:
                error = iostate.io_error;
        }
 
-       lck_mtx_destroy(&iostate.io_mtxp, cl_mtx_grp);
+       lck_mtx_destroy(&iostate.io_mtxp, &cl_mtx_grp);
 
        if (error == 0 && tail_size) {
                error = cluster_align_phys_io(vp, uio, src_paddr, tail_size, 0, callback, callback_arg);
@@ -3497,7 +3487,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
        }
        if (uio) {
                write_off = uio->uio_offset;
-               write_cnt = uio_resid(uio);
+               write_cnt = (int)uio_resid(uio);
                /*
                 * delay updating the sequential write info
                 * in the control block until we've obtained
@@ -3536,7 +3526,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                        if ((start_offset + total_size) > max_io_size) {
                                total_size = max_io_size - start_offset;
                        }
-                       xfer_resid = total_size;
+                       xfer_resid = (int)total_size;
 
                        retval = cluster_copy_ubc_data_internal(vp, uio, &xfer_resid, 1, 1);
 
@@ -3580,11 +3570,11 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                        upl_size = max_io_size;
                }
 
-               pages_in_upl = upl_size / PAGE_SIZE;
-               io_size      = upl_size - start_offset;
+               pages_in_upl = (int)(upl_size / PAGE_SIZE);
+               io_size      = (int)(upl_size - start_offset);
 
                if ((long long)io_size > total_size) {
-                       io_size = total_size;
+                       io_size = (int)total_size;
                }
 
                KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 41)) | DBG_FUNC_START, upl_size, io_size, total_size, 0, 0);
@@ -3597,7 +3587,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                 */
                kret = ubc_create_upl_kernel(vp,
                    upl_f_offset,
-                   upl_size,
+                   (int)upl_size,
                    &upl,
                    &pl,
                    UPL_SET_LITE | ((uio != NULL && (uio->uio_flags & UIO_FLAGS_IS_COMPRESSED_FILE)) ? 0 : UPL_WILL_MODIFY),
@@ -3620,7 +3610,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                        read_size = PAGE_SIZE;
 
                        if ((upl_f_offset + read_size) > oldEOF) {
-                               read_size = oldEOF - upl_f_offset;
+                               read_size = (int)(oldEOF - upl_f_offset);
                        }
 
                        retval = cluster_io(vp, upl, 0, upl_f_offset, read_size,
@@ -3635,7 +3625,8 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                                ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
 
                                if (upl_size > PAGE_SIZE) {
-                                       ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
+                                       ubc_upl_abort_range(upl, 0, (upl_size_t)upl_size,
+                                           UPL_ABORT_FREE_ON_EMPTY);
                                }
 
                                KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
@@ -3652,13 +3643,13 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                        upl_offset = upl_size - PAGE_SIZE;
 
                        if ((upl_f_offset + start_offset + io_size) < oldEOF &&
-                           !upl_valid_page(pl, upl_offset / PAGE_SIZE)) {
+                           !upl_valid_page(pl, (int)(upl_offset / PAGE_SIZE))) {
                                int   read_size;
 
                                read_size = PAGE_SIZE;
 
                                if ((off_t)(upl_f_offset + upl_offset + read_size) > oldEOF) {
-                                       read_size = oldEOF - (upl_f_offset + upl_offset);
+                                       read_size = (int)(oldEOF - (upl_f_offset + upl_offset));
                                }
 
                                retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, read_size,
@@ -3670,10 +3661,10 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                                         * need to release the rest of the pages in the upl without
                                         * modifying there state and mark the failed page in error
                                         */
-                                       ubc_upl_abort_range(upl, upl_offset, PAGE_SIZE, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
+                                       ubc_upl_abort_range(upl, (upl_offset_t)upl_offset, PAGE_SIZE, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
 
                                        if (upl_size > PAGE_SIZE) {
-                                               ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
+                                               ubc_upl_abort_range(upl, 0, (upl_size_t)upl_size, UPL_ABORT_FREE_ON_EMPTY);
                                        }
 
                                        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
@@ -3687,7 +3678,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
 
                while (zero_cnt && xfer_resid) {
                        if (zero_cnt < (long long)xfer_resid) {
-                               bytes_to_zero = zero_cnt;
+                               bytes_to_zero = (int)zero_cnt;
                        } else {
                                bytes_to_zero = xfer_resid;
                        }
@@ -3708,7 +3699,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                        retval = cluster_copy_upl_data(uio, upl, io_offset, (int *)&io_requested);
 
                        if (retval) {
-                               ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
+                               ubc_upl_abort_range(upl, 0, (upl_size_t)upl_size, UPL_ABORT_FREE_ON_EMPTY);
 
                                KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
                                    upl, 0, 0, retval, 0);
@@ -3720,7 +3711,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                }
                while (xfer_resid && zero_cnt1 && retval == 0) {
                        if (zero_cnt1 < (long long)xfer_resid) {
-                               bytes_to_zero = zero_cnt1;
+                               bytes_to_zero = (int)zero_cnt1;
                        } else {
                                bytes_to_zero = xfer_resid;
                        }
@@ -3749,7 +3740,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                                 * if the file gets extended again in such a way as to leave a
                                 * hole starting at this EOF, we'll have zero's in the correct spot
                                 */
-                               cluster_zero(upl, io_size, upl_size - io_size, NULL);
+                               cluster_zero(upl, io_size, (int)(upl_size - io_size), NULL);
                        }
                        /*
                         * release the upl now if we hold one since...
@@ -3769,7 +3760,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                         *    of this vnode is in progress, we will deadlock if the pages being flushed intersect the pages
                         *    we hold since the flushing context is holding the cluster lock.
                         */
-                       ubc_upl_commit_range(upl, 0, upl_size,
+                       ubc_upl_commit_range(upl, 0, (upl_size_t)upl_size,
                            UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
 check_cluster:
                        /*
@@ -4022,11 +4013,12 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
 
        while (io_req_size && uio->uio_offset < filesize && retval == 0) {
                max_size = filesize - uio->uio_offset;
+               bool leftover_upl_aborted = false;
 
                if ((off_t)(io_req_size) < max_size) {
                        io_size = io_req_size;
                } else {
-                       io_size = max_size;
+                       io_size = (u_int32_t)max_size;
                }
 
                if (!(flags & IO_NOCACHE)) {
@@ -4041,7 +4033,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
                                 * the cache and have issued an I/O, than we'll assume that we're likely
                                 * to continue to miss in the cache and it's to our advantage to try and prefetch
                                 */
-                               if (last_request_offset && last_ioread_offset && (size_of_prefetch = (last_request_offset - last_ioread_offset))) {
+                               if (last_request_offset && last_ioread_offset && (size_of_prefetch = (u_int32_t)(last_request_offset - last_ioread_offset))) {
                                        if ((last_ioread_offset - uio->uio_offset) <= max_rd_size && prefetch_enabled) {
                                                /*
                                                 * we've already issued I/O for this request and
@@ -4224,7 +4216,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
                         * the end of the last physical block associated with the file
                         */
                        if (iolock_inited == FALSE) {
-                               lck_mtx_init(&iostate.io_mtxp, cl_mtx_grp, cl_mtx_attr);
+                               lck_mtx_init(&iostate.io_mtxp, &cl_mtx_grp, LCK_ATTR_NULL);
 
                                iolock_inited = TRUE;
                        }
@@ -4232,7 +4224,33 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
                        io_size    = (last_pg - start_pg) * PAGE_SIZE;
 
                        if ((off_t)(upl_f_offset + upl_offset + io_size) > filesize) {
-                               io_size = filesize - (upl_f_offset + upl_offset);
+                               io_size = (u_int32_t)(filesize - (upl_f_offset + upl_offset));
+                       }
+
+                       /*
+                        * Find out if this needs verification, we'll have to manage the UPL
+                        * diffrently if so. Note that this call only lets us know if
+                        * verification is enabled on this mount point, the actual verification
+                        * is performed in the File system.
+                        */
+                       size_t verify_block_size = 0;
+                       if ((VNOP_VERIFY(vp, start_offset, NULL, 0, &verify_block_size, VNODE_VERIFY_DEFAULT, NULL) == 0) /* && verify_block_size */) {
+                               for (uio_last = last_pg; uio_last < pages_in_upl; uio_last++) {
+                                       if (!upl_valid_page(pl, uio_last)) {
+                                               break;
+                                       }
+                               }
+                               if (uio_last < pages_in_upl) {
+                                       /*
+                                        * there were some invalid pages beyond the valid pages
+                                        * that we didn't issue an I/O for, just release them
+                                        * unchanged now, so that any prefetch/readahed can
+                                        * include them
+                                        */
+                                       ubc_upl_abort_range(upl, uio_last * PAGE_SIZE,
+                                           (pages_in_upl - uio_last) * PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
+                                       leftover_upl_aborted = true;
+                               }
                        }
 
                        /*
@@ -4263,20 +4281,22 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
                         */
                        u_int  val_size;
 
-                       for (uio_last = last_pg; uio_last < pages_in_upl; uio_last++) {
-                               if (!upl_valid_page(pl, uio_last)) {
-                                       break;
+                       if (!leftover_upl_aborted) {
+                               for (uio_last = last_pg; uio_last < pages_in_upl; uio_last++) {
+                                       if (!upl_valid_page(pl, uio_last)) {
+                                               break;
+                                       }
+                               }
+                               if (uio_last < pages_in_upl) {
+                                       /*
+                                        * there were some invalid pages beyond the valid pages
+                                        * that we didn't issue an I/O for, just release them
+                                        * unchanged now, so that any prefetch/readahed can
+                                        * include them
+                                        */
+                                       ubc_upl_abort_range(upl, uio_last * PAGE_SIZE,
+                                           (pages_in_upl - uio_last) * PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
                                }
-                       }
-                       if (uio_last < pages_in_upl) {
-                               /*
-                                * there were some invalid pages beyond the valid pages
-                                * that we didn't issue an I/O for, just release them
-                                * unchanged now, so that any prefetch/readahed can
-                                * include them
-                                */
-                               ubc_upl_abort_range(upl, uio_last * PAGE_SIZE,
-                                   (pages_in_upl - uio_last) * PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
                        }
 
                        /*
@@ -4287,7 +4307,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
                        val_size = (uio_last * PAGE_SIZE) - start_offset;
 
                        if (val_size > max_size) {
-                               val_size = max_size;
+                               val_size = (u_int)max_size;
                        }
 
                        if (val_size > io_req_size) {
@@ -4298,7 +4318,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
                                last_ioread_offset = uio->uio_offset + val_size;
                        }
 
-                       if ((size_of_prefetch = (last_request_offset - last_ioread_offset)) && prefetch_enabled) {
+                       if ((size_of_prefetch = (u_int32_t)(last_request_offset - last_ioread_offset)) && prefetch_enabled) {
                                if ((last_ioread_offset - (uio->uio_offset + val_size)) <= upl_size) {
                                        /*
                                         * if there's still I/O left to do for this request, and...
@@ -4391,7 +4411,12 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
                         * their state
                         */
                        if (error) {
-                               ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
+                               if (leftover_upl_aborted) {
+                                       ubc_upl_abort_range(upl, start_pg * PAGE_SIZE, (uio_last - start_pg) * PAGE_SIZE,
+                                           UPL_ABORT_FREE_ON_EMPTY);
+                               } else {
+                                       ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
+                               }
                        } else {
                                KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_START,
                                    upl, -1, pages_in_upl - (last_pg - start_pg), 0, 0);
@@ -4453,7 +4478,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
                 */
                cluster_iostate_wait(&iostate, 0, "cluster_read_copy");
 
-               lck_mtx_destroy(&iostate.io_mtxp, cl_mtx_grp);
+               lck_mtx_destroy(&iostate.io_mtxp, &cl_mtx_grp);
        }
        if (rap != NULL) {
                KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
@@ -4491,8 +4516,8 @@ cluster_lock_direct_read(vnode_t vp, lck_rw_type_t type)
                                lck_spin_unlock(&cl_direct_read_spin_lock);
                                if (new_lck) {
                                        // Someone beat us to it, ditch the allocation
-                                       lck_rw_destroy(&new_lck->rw_lock, cl_mtx_grp);
-                                       FREE(new_lck, M_TEMP);
+                                       lck_rw_destroy(&new_lck->rw_lock, &cl_mtx_grp);
+                                       kheap_free(KHEAP_DEFAULT, new_lck, sizeof(cl_direct_read_lock_t));
                                }
                                lck_rw_lock(&lck->rw_lock, type);
                                return lck;
@@ -4510,9 +4535,9 @@ cluster_lock_direct_read(vnode_t vp, lck_rw_type_t type)
                lck_spin_unlock(&cl_direct_read_spin_lock);
 
                // Allocate a new lock
-               MALLOC(new_lck, cl_direct_read_lock_t *, sizeof(*new_lck),
-                   M_TEMP, M_WAITOK);
-               lck_rw_init(&new_lck->rw_lock, cl_mtx_grp, cl_mtx_attr);
+               new_lck = kheap_alloc(KHEAP_DEFAULT, sizeof(cl_direct_read_lock_t),
+                   Z_WAITOK);
+               lck_rw_init(&new_lck->rw_lock, &cl_mtx_grp, LCK_ATTR_NULL);
                new_lck->vp = vp;
                new_lck->ref_count = 1;
 
@@ -4529,8 +4554,8 @@ cluster_unlock_direct_read(cl_direct_read_lock_t *lck)
        if (lck->ref_count == 1) {
                LIST_REMOVE(lck, chain);
                lck_spin_unlock(&cl_direct_read_spin_lock);
-               lck_rw_destroy(&lck->rw_lock, cl_mtx_grp);
-               FREE(lck, M_TEMP);
+               lck_rw_destroy(&lck->rw_lock, &cl_mtx_grp);
+               kheap_free(KHEAP_DEFAULT, lck, sizeof(cl_direct_read_lock_t));
        } else {
                --lck->ref_count;
                lck_spin_unlock(&cl_direct_read_spin_lock);
@@ -4583,6 +4608,8 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type,
        user_addr_t      last_iov_base = 0;
        user_addr_t      next_iov_base = 0;
 
+       assert(vm_map_page_shift(current_map()) >= PAGE_SHIFT);
+
        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_START,
            (int)uio->uio_offset, (int)filesize, *read_type, *read_length, 0);
 
@@ -4614,7 +4641,7 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type,
        iostate.io_error = 0;
        iostate.io_wanted = 0;
 
-       lck_mtx_init(&iostate.io_mtxp, cl_mtx_grp, cl_mtx_attr);
+       lck_mtx_init(&iostate.io_mtxp, &cl_mtx_grp, LCK_ATTR_NULL);
 
        devblocksize = (u_int32_t)vp->v_mount->mnt_devblocksize;
        mem_alignment_mask = (u_int32_t)vp->v_mount->mnt_alignmentmask;
@@ -4646,6 +4673,20 @@ next_dread:
        offset_in_file = (u_int32_t)uio->uio_offset & (devblocksize - 1);
        offset_in_iovbase = (u_int32_t)iov_base & mem_alignment_mask;
 
+       if (vm_map_page_mask(current_map()) < PAGE_MASK) {
+               /*
+                * XXX TODO4K
+                * Direct I/O might not work as expected from a 16k kernel space
+                * to a 4k user space because each 4k chunk might point to
+                * a different 16k physical page...
+                * Let's go the "misaligned" way.
+                */
+               if (!misaligned) {
+                       DEBUG4K_VFS("forcing misaligned\n");
+               }
+               misaligned = 1;
+       }
+
        if (offset_in_file || offset_in_iovbase) {
                /*
                 * one of the 2 important offsets is misaligned
@@ -4678,7 +4719,7 @@ next_dread:
        }
 
        if ((off_t)io_req_size > max_io_size) {
-               io_req_size = max_io_size;
+               io_req_size = (u_int32_t)max_io_size;
        }
 
        /*
@@ -4875,7 +4916,7 @@ next_dread:
                vm_map_t map = UIO_SEG_IS_USER_SPACE(uio->uio_segflg) ? current_map() : kernel_map;
                for (force_data_sync = 0; force_data_sync < 3; force_data_sync++) {
                        pages_in_pl = 0;
-                       upl_size = upl_needed_size;
+                       upl_size = (upl_size_t)upl_needed_size;
                        upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE;
                        if (no_zero_fill) {
                                upl_flags |= UPL_NOZEROFILL;
@@ -5055,19 +5096,24 @@ wait_for_dreads:
                retval = iostate.io_error;
        }
 
-       lck_mtx_destroy(&iostate.io_mtxp, cl_mtx_grp);
+       lck_mtx_destroy(&iostate.io_mtxp, &cl_mtx_grp);
 
        if (io_throttled == TRUE && retval == 0) {
                retval = EAGAIN;
        }
 
-       for (next_iov_base = orig_iov_base; next_iov_base < last_iov_base; next_iov_base += PAGE_SIZE) {
+       vm_map_offset_t current_page_size, current_page_mask;
+       current_page_size = vm_map_page_size(current_map());
+       current_page_mask = vm_map_page_mask(current_map());
+       for (next_iov_base = orig_iov_base;
+           next_iov_base < last_iov_base;
+           next_iov_base += current_page_size) {
                /*
                 * This is specifically done for pmap accounting purposes.
                 * vm_pre_fault() will call vm_fault() to enter the page into
                 * the pmap if there isn't _a_ physical page for that VA already.
                 */
-               vm_pre_fault(vm_map_trunc_page(next_iov_base, PAGE_MASK), VM_PROT_READ);
+               vm_pre_fault(vm_map_trunc_page(next_iov_base, current_page_mask), VM_PROT_READ);
        }
 
        if (io_req_size && retval == 0) {
@@ -5147,7 +5193,7 @@ cluster_read_contig(vnode_t vp, struct uio *uio, off_t filesize, int *read_type,
        iostate.io_error = 0;
        iostate.io_wanted = 0;
 
-       lck_mtx_init(&iostate.io_mtxp, cl_mtx_grp, cl_mtx_attr);
+       lck_mtx_init(&iostate.io_mtxp, &cl_mtx_grp, LCK_ATTR_NULL);
 
 next_cread:
        io_size = *read_length;
@@ -5155,7 +5201,7 @@ next_cread:
        max_size = filesize - uio->uio_offset;
 
        if (io_size > max_size) {
-               io_size = max_size;
+               io_size = (u_int32_t)max_size;
        }
 
        iov_base = uio_curriovbase(uio);
@@ -5164,7 +5210,7 @@ next_cread:
        upl_needed_size = upl_offset + io_size;
 
        pages_in_pl = 0;
-       upl_size = upl_needed_size;
+       upl_size = (upl_size_t)upl_needed_size;
        upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE;
 
 
@@ -5173,7 +5219,7 @@ next_cread:
 
        vm_map_t map = UIO_SEG_IS_USER_SPACE(uio->uio_segflg) ? current_map() : kernel_map;
        kret = vm_map_get_upl(map,
-           (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)),
+           vm_map_trunc_page(iov_base, vm_map_page_mask(map)),
            &upl_size, &upl[cur_upl], NULL, &pages_in_pl, &upl_flags, VM_KERN_MEMORY_FILE, 0);
 
        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 92)) | DBG_FUNC_END,
@@ -5298,7 +5344,7 @@ wait_for_creads:
                error = iostate.io_error;
        }
 
-       lck_mtx_destroy(&iostate.io_mtxp, cl_mtx_grp);
+       lck_mtx_destroy(&iostate.io_mtxp, &cl_mtx_grp);
 
        if (error == 0 && tail_size) {
                error = cluster_align_phys_io(vp, uio, dst_paddr, tail_size, CL_READ, callback, callback_arg);
@@ -5352,7 +5398,7 @@ cluster_io_type(struct uio *uio, int *io_type, u_int32_t *io_length, u_int32_t m
 
                vm_map_t map = UIO_SEG_IS_USER_SPACE(uio->uio_segflg) ? current_map() : kernel_map;
                if ((vm_map_get_upl(map,
-                   (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)),
+                   vm_map_trunc_page(iov_base, vm_map_page_mask(map)),
                    &upl_size, &upl, NULL, NULL, &upl_flags, VM_KERN_MEMORY_FILE, 0)) != KERN_SUCCESS) {
                        /*
                         * the user app must have passed in an invalid address
@@ -5381,6 +5427,13 @@ cluster_io_type(struct uio *uio, int *io_type, u_int32_t *io_length, u_int32_t m
        }
        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 94)) | DBG_FUNC_END, iov_base, *io_type, *io_length, retval, 0);
 
+       if (*io_type == IO_DIRECT &&
+           vm_map_page_shift(current_map()) < PAGE_SHIFT) {
+               /* no direct I/O for sub-page-size address spaces */
+               DEBUG4K_VFS("io_type IO_DIRECT -> IO_COPY\n");
+               *io_type = IO_COPY;
+       }
+
        return retval;
 }
 
@@ -5420,23 +5473,17 @@ advisory_read_ext(vnode_t vp, off_t filesize, off_t f_offset, int resid, int (*c
                return EINVAL;
        }
 
-       if (resid < 0) {
+       if (f_offset < 0 || resid < 0) {
                return EINVAL;
        }
 
        max_io_size = cluster_max_io_size(vp->v_mount, CL_READ);
 
-#if CONFIG_EMBEDDED
-       if (max_io_size > speculative_prefetch_max_iosize) {
-               max_io_size = speculative_prefetch_max_iosize;
-       }
-#else
        if (disk_conditioner_mount_is_ssd(vp->v_mount)) {
                if (max_io_size > speculative_prefetch_max_iosize) {
                        max_io_size = speculative_prefetch_max_iosize;
                }
        }
-#endif
 
        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 60)) | DBG_FUNC_START,
            (int)f_offset, resid, (int)filesize, 0, 0);
@@ -5457,7 +5504,7 @@ advisory_read_ext(vnode_t vp, off_t filesize, off_t f_offset, int resid, int (*c
                if (resid < max_size) {
                        io_size = resid;
                } else {
-                       io_size = max_size;
+                       io_size = (int)max_size;
                }
 
                upl_size = (start_offset + io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
@@ -5563,7 +5610,7 @@ advisory_read_ext(vnode_t vp, off_t filesize, off_t f_offset, int resid, int (*c
                                io_size    = (last_pg - start_pg) * PAGE_SIZE;
 
                                if ((off_t)(upl_f_offset + upl_offset + io_size) > filesize) {
-                                       io_size = filesize - (upl_f_offset + upl_offset);
+                                       io_size = (int)(filesize - (upl_f_offset + upl_offset));
                                }
 
                                /*
@@ -5756,22 +5803,18 @@ cluster_release(struct ubc_info *ubc)
                if (wbp->cl_scmap) {
                        vfs_drt_control(&(wbp->cl_scmap), 0);
                }
+               lck_mtx_destroy(&wbp->cl_lockw, &cl_mtx_grp);
+               zfree(cl_wr_zone, wbp);
+               ubc->cl_wbehind = NULL;
        } else {
                KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 81)) | DBG_FUNC_START, ubc, 0, 0, 0, 0);
        }
 
-       rap = ubc->cl_rahead;
-
-       if (wbp != NULL) {
-               lck_mtx_destroy(&wbp->cl_lockw, cl_mtx_grp);
-               FREE_ZONE(wbp, sizeof *wbp, M_CLWRBEHIND);
-       }
        if ((rap = ubc->cl_rahead)) {
-               lck_mtx_destroy(&rap->cl_lockr, cl_mtx_grp);
-               FREE_ZONE(rap, sizeof *rap, M_CLRDAHEAD);
+               lck_mtx_destroy(&rap->cl_lockr, &cl_mtx_grp);
+               zfree(cl_rd_zone, rap);
+               ubc->cl_rahead  = NULL;
        }
-       ubc->cl_rahead  = NULL;
-       ubc->cl_wbehind = NULL;
 
        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 81)) | DBG_FUNC_END, ubc, rap, wbp, 0, 0);
 }
@@ -6036,7 +6079,7 @@ cluster_push_now(vnode_t vp, struct cl_extent *cl, off_t EOF, int flags,
 
                        return 0;
                }
-               size = EOF - upl_f_offset;
+               size = (int)(EOF - upl_f_offset);
 
                upl_size = (size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
                pages_in_upl = upl_size / PAGE_SIZE;
@@ -6558,7 +6601,7 @@ cluster_copy_ubc_data_internal(vnode_t vp, struct uio *uio, int *io_resid, int m
 
        if ((io_size = *io_resid)) {
                start_offset = (int)(uio->uio_offset & PAGE_MASK_64);
-               xsize = uio_resid(uio);
+               xsize = (int)uio_resid(uio);
 
                retval = memory_object_control_uiomove(control, uio->uio_offset - start_offset, uio,
                    start_offset, io_size, mark_dirty, take_reference);
@@ -6663,7 +6706,7 @@ is_file_clean(vnode_t vp, off_t filesize)
        } while(0);
 
 
-#if CONFIG_EMBEDDED
+#if !defined(XNU_TARGET_OS_OSX)
 /*
  * Hash table moduli.
  *
@@ -6696,7 +6739,7 @@ is_file_clean(vnode_t vp, off_t filesize)
 #define DRT_LARGE_ALLOCATION    32768   /* 144 bytes spare */
 #define DRT_XLARGE_ALLOCATION    131072  /* 208 bytes spare */
 
-#else
+#else /* XNU_TARGET_OS_OSX */
 /*
  * Hash table moduli.
  *
@@ -6729,7 +6772,7 @@ is_file_clean(vnode_t vp, off_t filesize)
 #define DRT_LARGE_ALLOCATION    131072  /* 208 bytes spare */
 #define DRT_XLARGE_ALLOCATION   524288  /* 304 bytes spare */
 
-#endif
+#endif /* ! XNU_TARGET_OS_OSX */
 
 /* *** nothing below here has secret dependencies on DRT_BITVECTOR_PAGES *** */
 
@@ -7179,7 +7222,7 @@ vfs_drt_do_mark_pages(
                 * Work out how many pages we're modifying in this
                 * hashtable entry.
                 */
-               pgoff = (offset - DRT_ALIGN_ADDRESS(offset)) / PAGE_SIZE;
+               pgoff = (int)((offset - DRT_ALIGN_ADDRESS(offset)) / PAGE_SIZE);
                pgcount = min((length / PAGE_SIZE), (DRT_BITVECTOR_PAGES - pgoff));
 
                /*