]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/nfs/nfs_bio.c
xnu-7195.60.75.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_bio.c
index 8b2ab3e1f7f52da37d08d8a622aae5885771edbb..b9c2b5ac1e773f564de12066f909239ea7b27176 100644 (file)
@@ -1,8 +1,8 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
  * unlawful or unlicensed copies of an Apple operating system, or to
  * circumvent, violate, or enable the circumvention or violation of, any
  * terms of an Apple operating system software license agreement.
- * 
+ *
  * Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -22,7 +22,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
+ *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  *     @(#)nfs_bio.c   8.9 (Berkeley) 3/30/95
  * FreeBSD-Id: nfs_bio.c,v 1.44 1997/09/10 19:52:25 phk Exp $
  */
+
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
@@ -77,6 +81,7 @@
 #include <sys/kernel.h>
 #include <sys/ubc_internal.h>
 #include <sys/uio_internal.h>
+#include <sys/kpi_mbuf.h>
 
 #include <sys/vm.h>
 #include <sys/vmparam.h>
 #include <nfs/nfsnode.h>
 #include <sys/buf_internal.h>
 #include <libkern/OSAtomic.h>
+#include <os/refcnt.h>
 
-kern_return_t  thread_terminate(thread_t); /* XXX */
+#define NFS_BIO_DBG(...) NFS_DBG(NFS_FAC_BIO, 7, ## __VA_ARGS__)
 
-#define        NFSBUFHASH(np, lbn)     \
+kern_return_t   thread_terminate(thread_t); /* XXX */
+
+#define NFSBUFHASH(np, lbn)     \
        (&nfsbufhashtbl[((long)(np) / sizeof(*(np)) + (int)(lbn)) & nfsbufhash])
-LIST_HEAD(nfsbufhashhead, nfsbuf) *nfsbufhashtbl;
+LIST_HEAD(nfsbufhashhead, nfsbuf) * nfsbufhashtbl;
 struct nfsbuffreehead nfsbuffree, nfsbuffreemeta, nfsbufdelwri;
 u_long nfsbufhash;
 int nfsbufcnt, nfsbufmin, nfsbufmax, nfsbufmetacnt, nfsbufmetamax;
@@ -109,37 +117,98 @@ int nfs_nbdwrite;
 int nfs_buf_timer_on = 0;
 thread_t nfsbufdelwrithd = NULL;
 
+ZONE_DECLARE(nfsbuf_zone, "NFS bio", sizeof(struct nfsbuf), ZC_NONE);
+
 lck_grp_t *nfs_buf_lck_grp;
 lck_mtx_t *nfs_buf_mutex;
 
-#define NFSBUF_FREE_PERIOD     30      /* seconds */
-#define NFSBUF_LRU_STALE       120
-#define NFSBUF_META_STALE      240
+#define NFSBUF_FREE_PERIOD      30      /* seconds */
+#define NFSBUF_LRU_STALE        120
+#define NFSBUF_META_STALE       240
 
 /* number of nfsbufs nfs_buf_freeup() should attempt to free from nfsbuffree list */
-#define LRU_TO_FREEUP                  6
+#define LRU_TO_FREEUP                   6
 /* number of nfsbufs nfs_buf_freeup() should attempt to free from nfsbuffreemeta list */
-#define META_TO_FREEUP                 3
+#define META_TO_FREEUP                  3
 /* total number of nfsbufs nfs_buf_freeup() should attempt to free */
-#define TOTAL_TO_FREEUP                        (LRU_TO_FREEUP+META_TO_FREEUP)
+#define TOTAL_TO_FREEUP                 (LRU_TO_FREEUP+META_TO_FREEUP)
 /* fraction of nfsbufs nfs_buf_freeup() should attempt to free from nfsbuffree list when called from timer */
-#define LRU_FREEUP_FRAC_ON_TIMER       8
+#define LRU_FREEUP_FRAC_ON_TIMER        8
 /* fraction of nfsbufs nfs_buf_freeup() should attempt to free from nfsbuffreemeta list when called from timer */
-#define META_FREEUP_FRAC_ON_TIMER      16
+#define META_FREEUP_FRAC_ON_TIMER       16
 /* fraction of total nfsbufs that nfsbuffreecnt should exceed before bothering to call nfs_buf_freeup() */
-#define LRU_FREEUP_MIN_FRAC            4
+#define LRU_FREEUP_MIN_FRAC             4
 /* fraction of total nfsbufs that nfsbuffreemetacnt should exceed before bothering to call nfs_buf_freeup() */
-#define META_FREEUP_MIN_FRAC           2
+#define META_FREEUP_MIN_FRAC            2
+
+#define NFS_ROUND_BLOCK(p, blksize)         ((((uint64_t)(p) + blksize - 1) & ~((uint64_t)blksize - 1)) / blksize)
 
 #define NFS_BUF_FREEUP() \
        do { \
-               /* only call nfs_buf_freeup() if it has work to do: */ \
-               if (((nfsbuffreecnt > nfsbufcnt/LRU_FREEUP_MIN_FRAC) || \
-                    (nfsbuffreemetacnt > nfsbufcnt/META_FREEUP_MIN_FRAC)) && \
-                   ((nfsbufcnt - TOTAL_TO_FREEUP) > nfsbufmin)) \
-                       nfs_buf_freeup(0); \
+       /* only call nfs_buf_freeup() if it has work to do: */ \
+               if (((nfsbuffreecnt > nfsbufcnt/LRU_FREEUP_MIN_FRAC) || \
+                    (nfsbuffreemetacnt > nfsbufcnt/META_FREEUP_MIN_FRAC)) && \
+                   ((nfsbufcnt - TOTAL_TO_FREEUP) > nfsbufmin)) \
+                       nfs_buf_freeup(0); \
        } while (0)
 
+void
+nfs_buf_pgs_get_page_mask(nfsbufpgs *nfsbp, off_t page)
+{
+       off_t page_pos = page / NBPGS_ELEMENT_PAGES;
+       off_t max_page = NBPGS_STRUCT_SIZE * 8;
+       NBPGS_ERASE(nfsbp);
+
+       if (page >= max_page) {
+               nfs_buf_pgs_bit_not(nfsbp);
+               return;
+       }
+
+       NBPGS_SET(nfsbp, page);
+       nfsbp->pages[page_pos]--;
+       for (off_t i = page_pos - 1; i >= 0; i--) {
+               nfsbp->pages[i] = ~0;
+       }
+}
+
+void
+nfs_buf_pgs_bit_not(nfsbufpgs *nfsbp)
+{
+       for (uint32_t i = 0; i < NBPGS_ELEMENTS; i++) {
+               nfsbp->pages[i] = ~nfsbp->pages[i];
+       }
+}
+
+void
+nfs_buf_pgs_bit_and(nfsbufpgs *nfsbp_src1, nfsbufpgs *nfsbp_src2, nfsbufpgs *nfsbp_dst)
+{
+       for (uint32_t i = 0; i < NBPGS_ELEMENTS; i++) {
+               nfsbp_dst->pages[i] = nfsbp_src1->pages[i] & nfsbp_src2->pages[i];
+       }
+}
+
+void
+nfs_buf_pgs_set_pages_between(nfsbufpgs *nfsbp, off_t firstpg, off_t lastpg)
+{
+       nfsbufpgs pagemaskfirst, pagemasklast;
+
+       nfs_buf_pgs_get_page_mask(&pagemasklast, lastpg);
+       nfs_buf_pgs_get_page_mask(&pagemaskfirst, firstpg);
+       nfs_buf_pgs_bit_not(&pagemaskfirst);
+       nfs_buf_pgs_bit_and(&pagemaskfirst, &pagemasklast, nfsbp);
+}
+
+int
+nfs_buf_pgs_is_set(nfsbufpgs *nfsbp)
+{
+       for (uint32_t i = 0; i < NBPGS_ELEMENTS; i++) {
+               if (nfsbp->pages[i] != 0) {
+                       return 1;
+               }
+       }
+       return 0;
+}
+
 /*
  * Initialize nfsbuf lists
  */
@@ -150,19 +219,18 @@ nfs_nbinit(void)
        nfs_buf_mutex = lck_mtx_alloc_init(nfs_buf_lck_grp, LCK_ATTR_NULL);
 
        nfsbufcnt = nfsbufmetacnt =
-       nfsbuffreecnt = nfsbuffreemetacnt = nfsbufdelwricnt = 0;
+           nfsbuffreecnt = nfsbuffreemetacnt = nfsbufdelwricnt = 0;
        nfsbufmin = 128;
        /* size nfsbufmax to cover at most half sane_size (w/default buf size) */
-       nfsbufmax = (sane_size >> PAGE_SHIFT) / (2 * (NFS_RWSIZE >> PAGE_SHIFT));
+       nfsbufmax = (int)(sane_size >> PAGE_SHIFT) / (2 * (NFS_RWSIZE >> PAGE_SHIFT));
        nfsbufmetamax = nfsbufmax / 4;
        nfsneedbuffer = 0;
        nfs_nbdwrite = 0;
 
-       nfsbufhashtbl = hashinit(nfsbufmax/4, M_TEMP, &nfsbufhash);
+       nfsbufhashtbl = hashinit(nfsbufmax / 4, M_NFSBIO, &nfsbufhash);
        TAILQ_INIT(&nfsbuffree);
        TAILQ_INIT(&nfsbuffreemeta);
        TAILQ_INIT(&nfsbufdelwri);
-
 }
 
 /*
@@ -182,7 +250,7 @@ nfs_buf_timer(__unused void *param0, __unused void *param1)
        lck_mtx_unlock(nfs_buf_mutex);
 
        nfs_interval_timer_start(nfs_buf_timer_call,
-               NFSBUF_FREE_PERIOD * 1000);
+           NFSBUF_FREE_PERIOD * 1000);
 }
 
 /*
@@ -204,16 +272,19 @@ nfs_buf_freeup(int timer)
 
        FSDBG(320, nfsbufcnt, nfsbuffreecnt, nfsbuffreemetacnt, 0);
 
-       count = timer ? nfsbuffreecnt/LRU_FREEUP_FRAC_ON_TIMER : LRU_TO_FREEUP;
+       count = timer ? nfsbuffreecnt / LRU_FREEUP_FRAC_ON_TIMER : LRU_TO_FREEUP;
        while ((nfsbufcnt > nfsbufmin) && (count-- > 0)) {
                fbp = TAILQ_FIRST(&nfsbuffree);
-               if (!fbp)
+               if (!fbp) {
                        break;
-               if (fbp->nb_refs)
+               }
+               if (os_ref_get_count(&fbp->nb_refs) > 1) {
                        break;
+               }
                if (NBUFSTAMPVALID(fbp) &&
-                   (fbp->nb_timestamp + (2*NFSBUF_LRU_STALE)) > now.tv_sec)
+                   (fbp->nb_timestamp + (2 * NFSBUF_LRU_STALE)) > now.tv_sec) {
                        break;
+               }
                nfs_buf_remfree(fbp);
                /* disassociate buffer from any nfsnode */
                if (fbp->nb_np) {
@@ -228,16 +299,19 @@ nfs_buf_freeup(int timer)
                nfsbufcnt--;
        }
 
-       count = timer ? nfsbuffreemetacnt/META_FREEUP_FRAC_ON_TIMER : META_TO_FREEUP;
+       count = timer ? nfsbuffreemetacnt / META_FREEUP_FRAC_ON_TIMER : META_TO_FREEUP;
        while ((nfsbufcnt > nfsbufmin) && (count-- > 0)) {
                fbp = TAILQ_FIRST(&nfsbuffreemeta);
-               if (!fbp)
+               if (!fbp) {
                        break;
-               if (fbp->nb_refs)
+               }
+               if (os_ref_get_count(&fbp->nb_refs) > 1) {
                        break;
+               }
                if (NBUFSTAMPVALID(fbp) &&
-                   (fbp->nb_timestamp + (2*NFSBUF_META_STALE)) > now.tv_sec)
+                   (fbp->nb_timestamp + (2 * NFSBUF_META_STALE)) > now.tv_sec) {
                        break;
+               }
                nfs_buf_remfree(fbp);
                /* disassociate buffer from any nfsnode */
                if (fbp->nb_np) {
@@ -261,16 +335,18 @@ nfs_buf_freeup(int timer)
        while ((fbp = TAILQ_FIRST(&nfsbuffreeup))) {
                TAILQ_REMOVE(&nfsbuffreeup, fbp, nb_free);
                /* nuke any creds */
-               if (IS_VALID_CRED(fbp->nb_rcred))
+               if (IS_VALID_CRED(fbp->nb_rcred)) {
                        kauth_cred_unref(&fbp->nb_rcred);
-               if (IS_VALID_CRED(fbp->nb_wcred))
+               }
+               if (IS_VALID_CRED(fbp->nb_wcred)) {
                        kauth_cred_unref(&fbp->nb_wcred);
+               }
                /* if buf was NB_META, dump buffer */
-               if (ISSET(fbp->nb_flags, NB_META) && fbp->nb_data)
-                       kfree(fbp->nb_data, fbp->nb_bufsize);
-               FREE(fbp, M_TEMP);
+               if (ISSET(fbp->nb_flags, NB_META) && fbp->nb_data) {
+                       kheap_free(KHEAP_DATA_BUFFERS, fbp->nb_data, fbp->nb_bufsize);
+               }
+               NFS_ZFREE(nfsbuf_zone, fbp);
        }
-
 }
 
 /*
@@ -280,8 +356,9 @@ nfs_buf_freeup(int timer)
 void
 nfs_buf_remfree(struct nfsbuf *bp)
 {
-       if (bp->nb_free.tqe_next == NFSNOLIST)
+       if (bp->nb_free.tqe_next == NFSNOLIST) {
                panic("nfsbuf not on free list");
+       }
        if (ISSET(bp->nb_flags, NB_DELWRI)) {
                nfsbufdelwricnt--;
                TAILQ_REMOVE(&nfsbufdelwri, bp, nb_free);
@@ -304,12 +381,13 @@ nfs_buf_is_incore(nfsnode_t np, daddr64_t blkno)
 {
        boolean_t rv;
        lck_mtx_lock(nfs_buf_mutex);
-       if (nfs_buf_incore(np, blkno))
+       if (nfs_buf_incore(np, blkno)) {
                rv = TRUE;
-       else
+       } else {
                rv = FALSE;
+       }
        lck_mtx_unlock(nfs_buf_mutex);
-       return (rv);
+       return rv;
 }
 
 /*
@@ -320,14 +398,15 @@ nfs_buf_incore(nfsnode_t np, daddr64_t blkno)
 {
        /* Search hash chain */
        struct nfsbuf * bp = NFSBUFHASH(np, blkno)->lh_first;
-       for (; bp != NULL; bp = bp->nb_hash.le_next)
+       for (; bp != NULL; bp = bp->nb_hash.le_next) {
                if ((bp->nb_lblkno == blkno) && (bp->nb_np == np)) {
                        if (!ISSET(bp->nb_flags, NB_INVAL)) {
                                FSDBG(547, bp, blkno, bp->nb_flags, bp->nb_np);
-                               return (bp);
+                               return bp;
                        }
                }
-       return (NULL);
+       }
+       return NULL;
 }
 
 /*
@@ -345,13 +424,15 @@ nfs_buf_page_inval(vnode_t vp, off_t offset)
        struct nfsbuf *bp;
        int error = 0;
 
-       if (!nmp)
-               return (ENXIO);
+       if (nfs_mount_gone(nmp)) {
+               return ENXIO;
+       }
 
        lck_mtx_lock(nfs_buf_mutex);
        bp = nfs_buf_incore(VTONFS(vp), (daddr64_t)(offset / nmp->nm_biosize));
-       if (!bp)
+       if (!bp) {
                goto out;
+       }
        FSDBG(325, bp, bp->nb_flags, bp->nb_dirtyoff, bp->nb_dirtyend);
        if (ISSET(bp->nb_lflags, NBL_BUSY)) {
                error = EBUSY;
@@ -363,16 +444,25 @@ nfs_buf_page_inval(vnode_t vp, off_t offset)
         * If it does, we can't let the pager drop the page.
         */
        if (bp->nb_dirtyend > 0) {
-               int start = offset - NBOFF(bp);
-               if (bp->nb_dirtyend <= start ||
-                   bp->nb_dirtyoff >= (start + PAGE_SIZE))
-                       error = 0;
-               else
+               off_t start = offset - NBOFF(bp);
+               if ((bp->nb_dirtyend > start) &&
+                   (bp->nb_dirtyoff < (start + PAGE_SIZE))) {
+                       /*
+                        * Before returning the bad news, move the
+                        * buffer to the start of the delwri list and
+                        * give the list a push to try to flush the
+                        * buffer out.
+                        */
                        error = EBUSY;
+                       nfs_buf_remfree(bp);
+                       TAILQ_INSERT_HEAD(&nfsbufdelwri, bp, nb_free);
+                       nfsbufdelwricnt++;
+                       nfs_buf_delwri_push(1);
+               }
        }
 out:
        lck_mtx_unlock(nfs_buf_mutex);
-       return (error);
+       return error;
 }
 
 /*
@@ -386,8 +476,9 @@ nfs_buf_upl_setup(struct nfsbuf *bp)
        upl_t upl;
        int upl_flags;
 
-       if (ISSET(bp->nb_flags, NB_PAGELIST))
-               return (0);
+       if (ISSET(bp->nb_flags, NB_PAGELIST)) {
+               return 0;
+       }
 
        upl_flags = UPL_PRECIOUS;
        if (!ISSET(bp->nb_flags, NB_READ)) {
@@ -397,24 +488,24 @@ nfs_buf_upl_setup(struct nfsbuf *bp)
                 */
                upl_flags |= UPL_WILL_MODIFY;
        }
-       kret = ubc_create_upl(NFSTOV(bp->nb_np), NBOFF(bp), bp->nb_bufsize,
-                               &upl, NULL, upl_flags);
+       kret = ubc_create_upl_kernel(NFSTOV(bp->nb_np), NBOFF(bp), bp->nb_bufsize,
+           &upl, NULL, upl_flags, VM_KERN_MEMORY_FILE);
        if (kret == KERN_INVALID_ARGUMENT) {
                /* vm object probably doesn't exist any more */
                bp->nb_pagelist = NULL;
-               return (EINVAL);
+               return EINVAL;
        }
        if (kret != KERN_SUCCESS) {
                printf("nfs_buf_upl_setup(): failed to get pagelist %d\n", kret);
                bp->nb_pagelist = NULL;
-               return (EIO);
+               return EIO;
        }
 
        FSDBG(538, bp, NBOFF(bp), bp->nb_bufsize, bp->nb_np);
 
        bp->nb_pagelist = upl;
        SET(bp->nb_flags, NB_PAGELIST);
-       return (0);
+       return 0;
 }
 
 /*
@@ -428,38 +519,44 @@ nfs_buf_upl_check(struct nfsbuf *bp)
        off_t filesize, fileoffset;
        int i, npages;
 
-       if (!ISSET(bp->nb_flags, NB_PAGELIST))
+       if (!ISSET(bp->nb_flags, NB_PAGELIST)) {
                return;
+       }
 
        npages = round_page_32(bp->nb_bufsize) / PAGE_SIZE;
        filesize = ubc_getsize(NFSTOV(bp->nb_np));
        fileoffset = NBOFF(bp);
-       if (fileoffset < filesize)
+       if (fileoffset < filesize) {
                SET(bp->nb_flags, NB_CACHE);
-       else
+       } else {
                CLR(bp->nb_flags, NB_CACHE);
+       }
 
        pl = ubc_upl_pageinfo(bp->nb_pagelist);
-       bp->nb_valid = bp->nb_dirty = 0;
+       NBPGS_ERASE(&bp->nb_valid);
+       NBPGS_ERASE(&bp->nb_dirty);
 
-       for (i=0; i < npages; i++, fileoffset += PAGE_SIZE_64) {
+       for (i = 0; i < npages; i++, fileoffset += PAGE_SIZE_64) {
                /* anything beyond the end of the file is not valid or dirty */
-               if (fileoffset >= filesize)
+               if (fileoffset >= filesize) {
                        break;
+               }
                if (!upl_valid_page(pl, i)) {
                        CLR(bp->nb_flags, NB_CACHE);
                        continue;
                }
-               NBPGVALID_SET(bp,i);
-               if (upl_dirty_page(pl, i))
+               NBPGVALID_SET(bp, i);
+               if (upl_dirty_page(pl, i)) {
                        NBPGDIRTY_SET(bp, i);
+               }
        }
        fileoffset = NBOFF(bp);
        if (ISSET(bp->nb_flags, NB_CACHE)) {
                bp->nb_validoff = 0;
                bp->nb_validend = bp->nb_bufsize;
-               if (fileoffset + bp->nb_validend > filesize)
+               if (fileoffset + bp->nb_validend > filesize) {
                        bp->nb_validend = filesize - fileoffset;
+               }
        } else {
                bp->nb_validoff = bp->nb_validend = -1;
        }
@@ -476,18 +573,22 @@ nfs_buf_map(struct nfsbuf *bp)
 {
        kern_return_t kret;
 
-       if (bp->nb_data)
-               return (0);
-       if (!ISSET(bp->nb_flags, NB_PAGELIST))
-               return (EINVAL);
+       if (bp->nb_data) {
+               return 0;
+       }
+       if (!ISSET(bp->nb_flags, NB_PAGELIST)) {
+               return EINVAL;
+       }
 
-       kret = ubc_upl_map(bp->nb_pagelist, (vm_address_t *)&(bp->nb_data));
-       if (kret != KERN_SUCCESS)
+       kret = ubc_upl_map(bp->nb_pagelist, (vm_offset_t *)&(bp->nb_data));
+       if (kret != KERN_SUCCESS) {
                panic("nfs_buf_map: ubc_upl_map() failed with (%d)", kret);
-       if (bp->nb_data == 0)
+       }
+       if (bp->nb_data == 0) {
                panic("ubc_upl_map mapped 0");
+       }
        FSDBG(540, bp, bp->nb_flags, NBOFF(bp), bp->nb_data);
-       return (0);
+       return 0;
 }
 
 /*
@@ -502,28 +603,31 @@ nfs_buf_map(struct nfsbuf *bp)
 void
 nfs_buf_normalize_valid_range(nfsnode_t np, struct nfsbuf *bp)
 {
-       int pg, npg;
+       off_t pg, npg;
        /* pull validoff back to start of contiguous valid page range */
-       pg = bp->nb_validoff/PAGE_SIZE;
-       while (pg >= 0 && NBPGVALID(bp,pg))
+       pg = bp->nb_validoff / PAGE_SIZE;
+       while (pg >= 0 && NBPGVALID(bp, pg)) {
                pg--;
-       bp->nb_validoff = (pg+1) * PAGE_SIZE;
+       }
+       bp->nb_validoff = (pg + 1) * PAGE_SIZE;
        /* push validend forward to end of contiguous valid page range */
-       npg = bp->nb_bufsize/PAGE_SIZE;
-       pg = bp->nb_validend/PAGE_SIZE;
-       while (pg < npg && NBPGVALID(bp,pg))
+       npg = bp->nb_bufsize / PAGE_SIZE;
+       pg = bp->nb_validend / PAGE_SIZE;
+       while (pg < npg && NBPGVALID(bp, pg)) {
                pg++;
+       }
        bp->nb_validend = pg * PAGE_SIZE;
        /* clip to EOF */
-       if (NBOFF(bp) + bp->nb_validend > (off_t)np->n_size)
+       if (NBOFF(bp) + bp->nb_validend > (off_t)np->n_size) {
                bp->nb_validend = np->n_size % bp->nb_bufsize;
+       }
 }
 
 /*
  * process some entries on the delayed write queue
  * (must be called with nfs_buf_mutex held)
  */
-static void
+void
 nfs_buf_delwri_service(void)
 {
        struct nfsbuf *bp;
@@ -534,17 +638,21 @@ nfs_buf_delwri_service(void)
                np = bp->nb_np;
                nfs_buf_remfree(bp);
                nfs_buf_refget(bp);
-               while ((error = nfs_buf_acquire(bp, 0, 0, 0)) == EAGAIN);
+               while ((error = nfs_buf_acquire(bp, 0, 0, 0)) == EAGAIN) {
+                       ;
+               }
                nfs_buf_refrele(bp);
-               if (error)
+               if (error) {
                        break;
+               }
                if (!bp->nb_np) {
                        /* buffer is no longer valid */
                        nfs_buf_drop(bp);
                        continue;
                }
-               if (ISSET(bp->nb_flags, NB_NEEDCOMMIT))
+               if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
                        nfs_buf_check_write_verifier(np, bp);
+               }
                if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
                        /* put buffer at end of delwri list */
                        TAILQ_INSERT_TAIL(&nfsbufdelwri, bp, nb_free);
@@ -565,10 +673,10 @@ nfs_buf_delwri_service(void)
 /*
  * thread to service the delayed write queue when asked
  */
-static void
+void
 nfs_buf_delwri_thread(__unused void *arg, __unused wait_result_t wr)
 {
-       struct timespec ts = { 30, 0 };
+       struct timespec ts = { .tv_sec = 30, .tv_nsec = 0 };
        int error = 0;
 
        lck_mtx_lock(nfs_buf_mutex);
@@ -585,23 +693,28 @@ nfs_buf_delwri_thread(__unused void *arg, __unused wait_result_t wr)
  * try to push out some delayed/uncommitted writes
  * ("locked" indicates whether nfs_buf_mutex is already held)
  */
-static void
+void
 nfs_buf_delwri_push(int locked)
 {
-       if (TAILQ_EMPTY(&nfsbufdelwri))
+       if (TAILQ_EMPTY(&nfsbufdelwri)) {
                return;
-       if (!locked)
+       }
+       if (!locked) {
                lck_mtx_lock(nfs_buf_mutex);
+       }
        /* wake up the delayed write service thread */
-       if (nfsbufdelwrithd)
+       if (nfsbufdelwrithd) {
                wakeup(&nfsbufdelwrithd);
-       else if (kernel_thread_start(nfs_buf_delwri_thread, NULL, &nfsbufdelwrithd) == KERN_SUCCESS)
+       } else if (kernel_thread_start(nfs_buf_delwri_thread, NULL, &nfsbufdelwrithd) == KERN_SUCCESS) {
                thread_deallocate(nfsbufdelwrithd);
+       }
        /* otherwise, try to do some of the work ourselves */
-       if (!nfsbufdelwrithd)
+       if (!nfsbufdelwrithd) {
                nfs_buf_delwri_service();
-       if (!locked)
+       }
+       if (!locked) {
                lck_mtx_unlock(nfs_buf_mutex);
+       }
 }
 
 /*
@@ -627,7 +740,7 @@ int
 nfs_buf_get(
        nfsnode_t np,
        daddr64_t blkno,
-       int size,
+       uint32_t size,
        thread_t thd,
        int flags,
        struct nfsbuf **bpp)
@@ -635,7 +748,7 @@ nfs_buf_get(
        vnode_t vp = NFSTOV(np);
        struct nfsmount *nmp = VTONMP(vp);
        struct nfsbuf *bp;
-       int bufsize;
+       uint32_t bufsize;
        int slpflag = PCATCH;
        int operation = (flags & NBLK_OPMASK);
        int error = 0;
@@ -645,17 +758,18 @@ nfs_buf_get(
        *bpp = NULL;
 
        bufsize = size;
-       if (bufsize > NFS_MAXBSIZE)
+       if (bufsize > NFS_MAXBSIZE) {
                panic("nfs_buf_get: buffer larger than NFS_MAXBSIZE requested");
+       }
 
-       if (!nmp) {
+       if (nfs_mount_gone(nmp)) {
                FSDBG_BOT(541, np, blkno, 0, ENXIO);
-               return (ENXIO);
+               return ENXIO;
        }
 
        if (!UBCINFOEXISTS(vp)) {
                operation = NBLK_META;
-       } else if (bufsize < nmp->nm_biosize) {
+       } else if (bufsize < (uint32_t)nmp->nm_biosize) {
                /* reg files should always have biosize blocks */
                bufsize = nmp->nm_biosize;
        }
@@ -675,6 +789,22 @@ nfs_buf_get(
 loop:
        lck_mtx_lock(nfs_buf_mutex);
 
+       /* wait for any buffer invalidation/flushing to complete */
+       while (np->n_bflag & NBINVALINPROG) {
+               np->n_bflag |= NBINVALWANT;
+               ts.tv_sec = 2;
+               ts.tv_nsec = 0;
+               msleep(&np->n_bflag, nfs_buf_mutex, slpflag, "nfs_buf_get_invalwait", &ts);
+               if ((error = nfs_sigintr(VTONMP(vp), NULL, thd, 0))) {
+                       lck_mtx_unlock(nfs_buf_mutex);
+                       FSDBG_BOT(541, np, blkno, 0, error);
+                       return error;
+               }
+               if (np->n_bflag & NBINVALINPROG) {
+                       slpflag = 0;
+               }
+       }
+
        /* check for existence of nfsbuf in cache */
        if ((bp = nfs_buf_incore(np, blkno))) {
                /* if busy, set wanted and wait */
@@ -682,40 +812,40 @@ loop:
                        if (flags & NBLK_NOWAIT) {
                                lck_mtx_unlock(nfs_buf_mutex);
                                FSDBG_BOT(541, np, blkno, bp, 0xbcbcbcbc);
-                               return (0);
+                               return 0;
                        }
                        FSDBG_TOP(543, np, blkno, bp, bp->nb_flags);
                        SET(bp->nb_lflags, NBL_WANTED);
 
                        ts.tv_sec = 2;
                        ts.tv_nsec = 0;
-                       error = msleep(bp, nfs_buf_mutex, slpflag|(PRIBIO+1)|PDROP,
-                                       "nfsbufget", (slpflag == PCATCH) ? NULL : &ts);
-                       if (error == EWOULDBLOCK)
-                               error = 0;
+                       msleep(bp, nfs_buf_mutex, slpflag | (PRIBIO + 1) | PDROP,
+                           "nfsbufget", (slpflag == PCATCH) ? NULL : &ts);
                        slpflag = 0;
                        FSDBG_BOT(543, np, blkno, bp, bp->nb_flags);
-                       if (error || ((error = nfs_sigintr(VTONMP(vp), NULL, thd, 0)))) {
+                       if ((error = nfs_sigintr(VTONMP(vp), NULL, thd, 0))) {
                                FSDBG_BOT(541, np, blkno, 0, error);
-                               return (error);
+                               return error;
                        }
                        goto loop;
                }
-               if (bp->nb_bufsize != bufsize)
+               if (bp->nb_bufsize != bufsize) {
                        panic("nfsbuf size mismatch");
+               }
                SET(bp->nb_lflags, NBL_BUSY);
                SET(bp->nb_flags, NB_CACHE);
                nfs_buf_remfree(bp);
                /* additional paranoia: */
-               if (ISSET(bp->nb_flags, NB_PAGELIST))
+               if (ISSET(bp->nb_flags, NB_PAGELIST)) {
                        panic("pagelist buffer was not busy");
+               }
                goto buffer_setup;
        }
 
        if (flags & NBLK_ONLYVALID) {
                lck_mtx_unlock(nfs_buf_mutex);
                FSDBG_BOT(541, np, blkno, 0, 0x0000cace);
-               return (0);
+               return 0;
        }
 
        /*
@@ -743,28 +873,31 @@ loop:
                /* if the next LRU or META buffer is invalid or stale, use it */
                lrubp = TAILQ_FIRST(&nfsbuffree);
                if (lrubp && (!NBUFSTAMPVALID(lrubp) ||
-                   ((lrubp->nb_timestamp + NFSBUF_LRU_STALE) < now.tv_sec)))
+                   ((lrubp->nb_timestamp + NFSBUF_LRU_STALE) < now.tv_sec))) {
                        bp = lrubp;
+               }
                metabp = TAILQ_FIRST(&nfsbuffreemeta);
                if (!bp && metabp && (!NBUFSTAMPVALID(metabp) ||
-                   ((metabp->nb_timestamp + NFSBUF_META_STALE) < now.tv_sec)))
+                   ((metabp->nb_timestamp + NFSBUF_META_STALE) < now.tv_sec))) {
                        bp = metabp;
+               }
 
                if (!bp && (nfsbufcnt >= nfsbufmax)) {
                        /* we've already allocated all bufs, so */
                        /* choose the buffer that'll go stale first */
-                       if (!metabp)
+                       if (!metabp) {
                                bp = lrubp;
-                       else if (!lrubp)
+                       } else if (!lrubp) {
                                bp = metabp;
-                       else {
-                               int32_t lru_stale_time, meta_stale_time;
+                       else {
+                               time_t lru_stale_time, meta_stale_time;
                                lru_stale_time = lrubp->nb_timestamp + NFSBUF_LRU_STALE;
                                meta_stale_time = metabp->nb_timestamp + NFSBUF_META_STALE;
-                               if (lru_stale_time <= meta_stale_time)
+                               if (lru_stale_time <= meta_stale_time) {
                                        bp = lrubp;
-                               else
+                               } else {
                                        bp = metabp;
+                               }
                        }
                }
        }
@@ -773,8 +906,9 @@ loop:
                /* we have a buffer to reuse */
                FSDBG(544, np, blkno, bp, bp->nb_flags);
                nfs_buf_remfree(bp);
-               if (ISSET(bp->nb_flags, NB_DELWRI))
+               if (ISSET(bp->nb_flags, NB_DELWRI)) {
                        panic("nfs_buf_get: delwri");
+               }
                SET(bp->nb_lflags, NBL_BUSY);
                /* disassociate buffer from previous nfsnode */
                if (bp->nb_np) {
@@ -786,17 +920,20 @@ loop:
                }
                LIST_REMOVE(bp, nb_hash);
                /* nuke any creds we're holding */
-               if (IS_VALID_CRED(bp->nb_rcred))
+               if (IS_VALID_CRED(bp->nb_rcred)) {
                        kauth_cred_unref(&bp->nb_rcred);
-               if (IS_VALID_CRED(bp->nb_wcred))
+               }
+               if (IS_VALID_CRED(bp->nb_wcred)) {
                        kauth_cred_unref(&bp->nb_wcred);
+               }
                /* if buf will no longer be NB_META, dump old buffer */
                if (operation == NBLK_META) {
-                       if (!ISSET(bp->nb_flags, NB_META))
+                       if (!ISSET(bp->nb_flags, NB_META)) {
                                nfsbufmetacnt++;
+                       }
                } else if (ISSET(bp->nb_flags, NB_META)) {
                        if (bp->nb_data) {
-                               kfree(bp->nb_data, bp->nb_bufsize);
+                               kheap_free(KHEAP_DATA_BUFFERS, bp->nb_data, bp->nb_bufsize);
                                bp->nb_data = NULL;
                        }
                        nfsbufmetacnt--;
@@ -805,20 +942,15 @@ loop:
                bp->nb_error = 0;
                bp->nb_validoff = bp->nb_validend = -1;
                bp->nb_dirtyoff = bp->nb_dirtyend = 0;
-               bp->nb_valid = 0;
-               bp->nb_dirty = 0;
+               NBPGS_ERASE(&bp->nb_valid);
+               NBPGS_ERASE(&bp->nb_dirty);
                bp->nb_verf = 0;
        } else {
                /* no buffer to reuse */
                if ((nfsbufcnt < nfsbufmax) &&
                    ((operation != NBLK_META) || (nfsbufmetacnt < nfsbufmetamax))) {
                        /* just alloc a new one */
-                       MALLOC(bp, struct nfsbuf *, sizeof(struct nfsbuf), M_TEMP, M_WAITOK);
-                       if (!bp) {
-                               lck_mtx_unlock(nfs_buf_mutex);
-                               FSDBG_BOT(541, np, blkno, 0, error);
-                               return (ENOMEM);
-                       }
+                       bp = zalloc(nfsbuf_zone);
                        nfsbufcnt++;
 
                        /*
@@ -828,14 +960,17 @@ loop:
                        if (nfsbufcnt > nfsbufmin && !nfs_buf_timer_on) {
                                nfs_buf_timer_on = 1;
                                nfs_interval_timer_start(nfs_buf_timer_call,
-                                       NFSBUF_FREE_PERIOD * 1000);
+                                   NFSBUF_FREE_PERIOD * 1000);
                        }
 
-                       if (operation == NBLK_META)
+                       if (operation == NBLK_META) {
                                nfsbufmetacnt++;
+                       }
                        NFSBUFCNTCHK();
                        /* init nfsbuf */
                        bzero(bp, sizeof(*bp));
+                       os_ref_init(&bp->nb_refs, NULL);
+
                        bp->nb_free.tqe_next = NFSNOLIST;
                        bp->nb_validoff = bp->nb_validend = -1;
                        FSDBG(545, np, blkno, bp, 0);
@@ -847,18 +982,18 @@ loop:
                        nfs_buf_delwri_push(1);
 
                        nfsneedbuffer = 1;
-                       error = msleep(&nfsneedbuffer, nfs_buf_mutex, PCATCH|PDROP, "nfsbufget", NULL);
+                       msleep(&nfsneedbuffer, nfs_buf_mutex, PCATCH | PDROP, "nfsbufget", NULL);
                        FSDBG_BOT(546, np, blkno, nfsbufcnt, nfsbufmax);
-                       if (error || ((error = nfs_sigintr(VTONMP(vp), NULL, thd, 0)))) {
+                       if ((error = nfs_sigintr(VTONMP(vp), NULL, thd, 0))) {
                                FSDBG_BOT(541, np, blkno, 0, error);
-                               return (error);
+                               return error;
                        }
                        goto loop;
                }
        }
 
-       /* setup nfsbuf */
-       bp->nb_lflags = NBL_BUSY;
+       /* set up nfsbuf */
+       SET(bp->nb_lflags, NBL_BUSY);
        bp->nb_flags = 0;
        bp->nb_lblkno = blkno;
        /* insert buf in hash */
@@ -876,16 +1011,18 @@ buffer_setup:
        case NBLK_META:
                SET(bp->nb_flags, NB_META);
                if ((bp->nb_bufsize != bufsize) && bp->nb_data) {
-                       kfree(bp->nb_data, bp->nb_bufsize);
+                       kheap_free(KHEAP_DATA_BUFFERS, bp->nb_data, bp->nb_bufsize);
                        bp->nb_data = NULL;
                        bp->nb_validoff = bp->nb_validend = -1;
                        bp->nb_dirtyoff = bp->nb_dirtyend = 0;
-                       bp->nb_valid = 0;
-                       bp->nb_dirty = 0;
+                       NBPGS_ERASE(&bp->nb_valid);
+                       NBPGS_ERASE(&bp->nb_dirty);
                        CLR(bp->nb_flags, NB_CACHE);
                }
-               if (!bp->nb_data)
-                       bp->nb_data = kalloc(bufsize);
+               if (!bp->nb_data) {
+                       bp->nb_data = kheap_alloc(KHEAP_DATA_BUFFERS,
+                           bufsize, Z_WAITOK);
+               }
                if (!bp->nb_data) {
                        /* Ack! couldn't allocate the data buffer! */
                        /* clean up buffer and return error */
@@ -895,13 +1032,14 @@ buffer_setup:
                        bp->nb_np = NULL;
                        /* invalidate usage timestamp to allow immediate freeing */
                        NBUFSTAMPINVALIDATE(bp);
-                       if (bp->nb_free.tqe_next != NFSNOLIST)
+                       if (bp->nb_free.tqe_next != NFSNOLIST) {
                                panic("nfsbuf on freelist");
+                       }
                        TAILQ_INSERT_HEAD(&nfsbuffree, bp, nb_free);
                        nfsbuffreecnt++;
                        lck_mtx_unlock(nfs_buf_mutex);
                        FSDBG_BOT(541, np, blkno, 0xb00, ENOMEM);
-                       return (ENOMEM);
+                       return ENOMEM;
                }
                bp->nb_bufsize = bufsize;
                break;
@@ -917,8 +1055,9 @@ buffer_setup:
                } else {
                        CLR(bp->nb_flags, NB_READ);
                }
-               if (bufsize < PAGE_SIZE)
+               if (bufsize < PAGE_SIZE) {
                        bufsize = PAGE_SIZE;
+               }
                bp->nb_bufsize = bufsize;
                bp->nb_validoff = bp->nb_validend = -1;
 
@@ -934,13 +1073,14 @@ buffer_setup:
                                bp->nb_np = NULL;
                                /* invalidate usage timestamp to allow immediate freeing */
                                NBUFSTAMPINVALIDATE(bp);
-                               if (bp->nb_free.tqe_next != NFSNOLIST)
+                               if (bp->nb_free.tqe_next != NFSNOLIST) {
                                        panic("nfsbuf on freelist");
+                               }
                                TAILQ_INSERT_HEAD(&nfsbuffree, bp, nb_free);
                                nfsbuffreecnt++;
                                lck_mtx_unlock(nfs_buf_mutex);
                                FSDBG_BOT(541, np, blkno, 0x2bc, EIO);
-                               return (EIO);
+                               return EIO;
                        }
                        nfs_buf_upl_check(bp);
                }
@@ -954,7 +1094,7 @@ buffer_setup:
 
        FSDBG_BOT(541, np, blkno, bp, bp->nb_flags);
 
-       return (0);
+       return 0;
 }
 
 void
@@ -971,66 +1111,79 @@ nfs_buf_release(struct nfsbuf *bp, int freeup)
 
        vp = np ? NFSTOV(np) : NULL;
        if (vp && UBCINFOEXISTS(vp) && bp->nb_bufsize) {
-               int upl_flags;
+               int upl_flags, rv;
                upl_t upl;
-               int i, rv;
+               uint32_t i;
 
                if (!ISSET(bp->nb_flags, NB_PAGELIST) && !ISSET(bp->nb_flags, NB_INVAL)) {
                        rv = nfs_buf_upl_setup(bp);
-                       if (rv)
+                       if (rv) {
                                printf("nfs_buf_release: upl create failed %d\n", rv);
-                       else
+                       } else {
                                nfs_buf_upl_check(bp);
+                       }
                }
                upl = bp->nb_pagelist;
-               if (!upl)
+               if (!upl) {
                        goto pagelist_cleanup_done;
+               }
                if (bp->nb_data) {
-                       if (ubc_upl_unmap(upl) != KERN_SUCCESS)
+                       if (ubc_upl_unmap(upl) != KERN_SUCCESS) {
                                panic("ubc_upl_unmap failed");
+                       }
                        bp->nb_data = NULL;
                }
                /*
                 * Abort the pages on error or: if this is an invalid or
                 * non-needcommit nocache buffer AND no pages are dirty.
                 */
-               if (ISSET(bp->nb_flags, NB_ERROR) || (!bp->nb_dirty && (ISSET(bp->nb_flags, NB_INVAL) ||
+               if (ISSET(bp->nb_flags, NB_ERROR) || (!nfs_buf_pgs_is_set(&bp->nb_dirty) && (ISSET(bp->nb_flags, NB_INVAL) ||
                    (ISSET(bp->nb_flags, NB_NOCACHE) && !ISSET(bp->nb_flags, (NB_NEEDCOMMIT | NB_DELWRI)))))) {
-                       if (ISSET(bp->nb_flags, (NB_READ | NB_INVAL | NB_NOCACHE)))
+                       if (ISSET(bp->nb_flags, (NB_READ | NB_INVAL | NB_NOCACHE))) {
                                upl_flags = UPL_ABORT_DUMP_PAGES;
-                       else
+                       } else {
                                upl_flags = 0;
+                       }
                        ubc_upl_abort(upl, upl_flags);
                        goto pagelist_cleanup_done;
                }
-               for (i=0; i <= (bp->nb_bufsize - 1)/PAGE_SIZE; i++) {
-                       if (!NBPGVALID(bp,i))
+               for (i = 0; i <= (bp->nb_bufsize - 1) / PAGE_SIZE; i++) {
+                       if (!NBPGVALID(bp, i)) {
                                ubc_upl_abort_range(upl,
-                                       i*PAGE_SIZE, PAGE_SIZE,
-                                       UPL_ABORT_DUMP_PAGES |
-                                       UPL_ABORT_FREE_ON_EMPTY);
-                       else {
-                               if (NBPGDIRTY(bp,i))
+                                   i * PAGE_SIZE, PAGE_SIZE,
+                                   UPL_ABORT_DUMP_PAGES |
+                                   UPL_ABORT_FREE_ON_EMPTY);
+                       else {
+                               if (NBPGDIRTY(bp, i)) {
                                        upl_flags = UPL_COMMIT_SET_DIRTY;
-                               else
+                               } else {
                                        upl_flags = UPL_COMMIT_CLEAR_DIRTY;
+                               }
+
+                               if (!ISSET(bp->nb_flags, (NB_NEEDCOMMIT | NB_DELWRI))) {
+                                       upl_flags |= UPL_COMMIT_CLEAR_PRECIOUS;
+                               }
+
                                ubc_upl_commit_range(upl,
-                                       i*PAGE_SIZE, PAGE_SIZE,
-                                       upl_flags |
-                                       UPL_COMMIT_INACTIVATE |
-                                       UPL_COMMIT_FREE_ON_EMPTY);
+                                   i * PAGE_SIZE, PAGE_SIZE,
+                                   upl_flags |
+                                   UPL_COMMIT_INACTIVATE |
+                                   UPL_COMMIT_FREE_ON_EMPTY);
                        }
                }
 pagelist_cleanup_done:
-               /* was this the last buffer in the file? */
+               /* invalidate any pages past EOF */
                if (NBOFF(bp) + bp->nb_bufsize > (off_t)(np->n_size)) {
-                       /* if so, invalidate all pages of last buffer past EOF */
                        off_t start, end;
                        start = trunc_page_64(np->n_size) + PAGE_SIZE_64;
                        end = trunc_page_64(NBOFF(bp) + bp->nb_bufsize);
+                       if (start < NBOFF(bp)) {
+                               start = NBOFF(bp);
+                       }
                        if (end > start) {
-                               if (!(rv = ubc_sync_range(vp, start, end, UBC_INVALIDATE)))
-                                       printf("nfs_buf_release(): ubc_sync_range failed!\n");
+                               if ((rv = ubc_msync(vp, start, end, NULL, UBC_INVALIDATE))) {
+                                       printf("nfs_buf_release(): ubc_msync failed!, error %d\n", rv);
+                               }
                        }
                }
                CLR(bp->nb_flags, NB_PAGELIST);
@@ -1054,8 +1207,9 @@ pagelist_cleanup_done:
 
        /* If it's non-needcommit nocache, or an error, mark it invalid. */
        if (ISSET(bp->nb_flags, NB_ERROR) ||
-           (ISSET(bp->nb_flags, NB_NOCACHE) && !ISSET(bp->nb_flags, (NB_NEEDCOMMIT | NB_DELWRI))))
+           (ISSET(bp->nb_flags, NB_NOCACHE) && !ISSET(bp->nb_flags, (NB_NEEDCOMMIT | NB_DELWRI)))) {
                SET(bp->nb_flags, NB_INVAL);
+       }
 
        if ((bp->nb_bufsize <= 0) || ISSET(bp->nb_flags, NB_INVAL)) {
                /* If it's invalid or empty, dissociate it from its nfsnode */
@@ -1075,8 +1229,9 @@ pagelist_cleanup_done:
                /* invalidate usage timestamp to allow immediate freeing */
                NBUFSTAMPINVALIDATE(bp);
                /* put buffer at head of free list */
-               if (bp->nb_free.tqe_next != NFSNOLIST)
+               if (bp->nb_free.tqe_next != NFSNOLIST) {
                        panic("nfsbuf on freelist");
+               }
                SET(bp->nb_flags, NB_INVAL);
                if (ISSET(bp->nb_flags, NB_META)) {
                        TAILQ_INSERT_HEAD(&nfsbuffreemeta, bp, nb_free);
@@ -1087,8 +1242,9 @@ pagelist_cleanup_done:
                }
        } else if (ISSET(bp->nb_flags, NB_DELWRI)) {
                /* put buffer at end of delwri list */
-               if (bp->nb_free.tqe_next != NFSNOLIST)
+               if (bp->nb_free.tqe_next != NFSNOLIST) {
                        panic("nfsbuf on freelist");
+               }
                TAILQ_INSERT_TAIL(&nfsbufdelwri, bp, nb_free);
                nfsbufdelwricnt++;
                freeup = 0;
@@ -1097,8 +1253,9 @@ pagelist_cleanup_done:
                microuptime(&now);
                bp->nb_timestamp = now.tv_sec;
                /* put buffer at end of free list */
-               if (bp->nb_free.tqe_next != NFSNOLIST)
+               if (bp->nb_free.tqe_next != NFSNOLIST) {
                        panic("nfsbuf on freelist");
+               }
                if (ISSET(bp->nb_flags, NB_META)) {
                        TAILQ_INSERT_TAIL(&nfsbuffreemeta, bp, nb_free);
                        nfsbuffreemetacnt++;
@@ -1118,14 +1275,18 @@ pagelist_cleanup_done:
 
        lck_mtx_unlock(nfs_buf_mutex);
 
-       if (wakeup_needbuffer)
+       if (wakeup_needbuffer) {
                wakeup(&nfsneedbuffer);
-       if (wakeup_buffer)
+       }
+       if (wakeup_buffer) {
                wakeup(bp);
-       if (wakeup_nbdwrite)
+       }
+       if (wakeup_nbdwrite) {
                wakeup(&nfs_nbdwrite);
-       if (freeup)
+       }
+       if (freeup) {
                NFS_BUF_FREEUP();
+       }
 }
 
 /*
@@ -1139,8 +1300,9 @@ nfs_buf_iowait(struct nfsbuf *bp)
 
        lck_mtx_lock(nfs_buf_mutex);
 
-       while (!ISSET(bp->nb_flags, NB_DONE))
+       while (!ISSET(bp->nb_flags, NB_DONE)) {
                msleep(bp, nfs_buf_mutex, PRIBIO + 1, "nfs_buf_iowait", NULL);
+       }
 
        lck_mtx_unlock(nfs_buf_mutex);
 
@@ -1149,10 +1311,11 @@ nfs_buf_iowait(struct nfsbuf *bp)
        /* check for interruption of I/O, then errors. */
        if (ISSET(bp->nb_flags, NB_EINTR)) {
                CLR(bp->nb_flags, NB_EINTR);
-               return (EINTR);
-       } else if (ISSET(bp->nb_flags, NB_ERROR))
-               return (bp->nb_error ? bp->nb_error : EIO);
-       return (0);
+               return EINTR;
+       } else if (ISSET(bp->nb_flags, NB_ERROR)) {
+               return bp->nb_error ? bp->nb_error : EIO;
+       }
+       return 0;
 }
 
 /*
@@ -1161,11 +1324,11 @@ nfs_buf_iowait(struct nfsbuf *bp)
 void
 nfs_buf_iodone(struct nfsbuf *bp)
 {
-
        FSDBG_TOP(550, bp, NBOFF(bp), bp->nb_flags, bp->nb_error);
 
-       if (ISSET(bp->nb_flags, NB_DONE))
+       if (ISSET(bp->nb_flags, NB_DONE)) {
                panic("nfs_buf_iodone already");
+       }
 
        if (!ISSET(bp->nb_flags, NB_READ)) {
                CLR(bp->nb_flags, NB_WRITEINPROG);
@@ -1174,15 +1337,18 @@ nfs_buf_iodone(struct nfsbuf *bp)
                 * any throttled write operations
                 */
                vnode_writedone(NFSTOV(bp->nb_np));
+               nfs_node_lock_force(bp->nb_np);
+               bp->nb_np->n_numoutput--;
+               nfs_node_unlock(bp->nb_np);
        }
-       if (ISSET(bp->nb_flags, NB_ASYNC)) {    /* if async, release it */
-               SET(bp->nb_flags, NB_DONE);             /* note that it's done */
+       if (ISSET(bp->nb_flags, NB_ASYNC)) {    /* if async, release it */
+               SET(bp->nb_flags, NB_DONE);             /* note that it's done */
                nfs_buf_release(bp, 1);
-       } else {                                        /* or just wakeup the buffer */ 
-               lck_mtx_lock(nfs_buf_mutex);
-               SET(bp->nb_flags, NB_DONE);             /* note that it's done */
+       } else {                                        /* or just wakeup the buffer */
+               lck_mtx_lock(nfs_buf_mutex);
+               SET(bp->nb_flags, NB_DONE);             /* note that it's done */
                CLR(bp->nb_lflags, NBL_WANTED);
-               lck_mtx_unlock(nfs_buf_mutex);
+               lck_mtx_unlock(nfs_buf_mutex);
                wakeup(bp);
        }
 
@@ -1208,8 +1374,9 @@ nfs_buf_write_delayed(struct nfsbuf *bp)
                lck_mtx_lock(nfs_buf_mutex);
                nfs_nbdwrite++;
                NFSBUFCNTCHK();
-               if (bp->nb_vnbufs.le_next != NFSNOLIST)
+               if (bp->nb_vnbufs.le_next != NFSNOLIST) {
                        LIST_REMOVE(bp, nb_vnbufs);
+               }
                LIST_INSERT_HEAD(&np->n_dirtyblkhd, bp, nb_vnbufs);
                lck_mtx_unlock(nfs_buf_mutex);
        }
@@ -1221,16 +1388,17 @@ nfs_buf_write_delayed(struct nfsbuf *bp)
        vnode_waitforwrites(NFSTOV(np), VNODE_ASYNC_THROTTLE, 0, 0, "nfs_buf_write_delayed");
 
        /* the file is in a modified state, so make sure the flag's set */
-       nfs_lock(np, NFS_NODE_LOCK_FORCE);
+       nfs_node_lock_force(np);
        np->n_flag |= NMODIFIED;
-       nfs_unlock(np);
+       nfs_node_unlock(np);
 
        /*
         * If we have too many delayed write buffers,
         * just fall back to doing the async write.
         */
-       if (nfs_nbdwrite < 0)
+       if (nfs_nbdwrite < 0) {
                panic("nfs_buf_write_delayed: Negative nfs_nbdwrite");
+       }
        if (nfs_nbdwrite > NFS_A_LOT_OF_DELAYED_WRITES) {
                /* issue async write */
                SET(bp->nb_flags, NB_ASYNC);
@@ -1256,22 +1424,25 @@ nfs_buf_check_write_verifier(nfsnode_t np, struct nfsbuf *bp)
 {
        struct nfsmount *nmp;
 
-       if (!ISSET(bp->nb_flags, NB_NEEDCOMMIT))
+       if (!ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
                return;
+       }
 
        nmp = NFSTONMP(np);
-       if (!nmp)
+       if (nfs_mount_gone(nmp)) {
                return;
-       if (!ISSET(bp->nb_flags, NB_STALEWVERF) && (bp->nb_verf == nmp->nm_verf))
+       }
+       if (!ISSET(bp->nb_flags, NB_STALEWVERF) && (bp->nb_verf == nmp->nm_verf)) {
                return;
+       }
 
        /* write verifier changed, clear commit/wverf flags */
        CLR(bp->nb_flags, (NB_NEEDCOMMIT | NB_STALEWVERF));
        bp->nb_verf = 0;
-       nfs_lock(np, NFS_NODE_LOCK_FORCE);
+       nfs_node_lock_force(np);
        np->n_needcommitcnt--;
        CHECK_NEEDCOMMITCNT(np);
-       nfs_unlock(np);
+       nfs_node_unlock(np);
 }
 
 /*
@@ -1281,7 +1452,7 @@ nfs_buf_check_write_verifier(nfsnode_t np, struct nfsbuf *bp)
 void
 nfs_buf_refget(struct nfsbuf *bp)
 {
-       bp->nb_refs++;
+       os_ref_retain_locked(&bp->nb_refs);
 }
 /*
  * release a reference on a buffer
@@ -1290,7 +1461,7 @@ nfs_buf_refget(struct nfsbuf *bp)
 void
 nfs_buf_refrele(struct nfsbuf *bp)
 {
-       bp->nb_refs--;
+       (void) os_ref_release_locked(&bp->nb_refs);
 }
 
 /*
@@ -1304,30 +1475,33 @@ nfs_buf_acquire(struct nfsbuf *bp, int flags, int slpflag, int slptimeo)
        struct timespec ts;
 
        if (ISSET(bp->nb_lflags, NBL_BUSY)) {
-               /*      
-                * since the mutex_lock may block, the buffer
+               /*
+                * since the lck_mtx_lock may block, the buffer
                 * may become BUSY, so we need to recheck for
                 * a NOWAIT request
                 */
-               if (flags & NBAC_NOWAIT)
-                       return (EBUSY);
-               SET(bp->nb_lflags, NBL_WANTED);
+               if (flags & NBAC_NOWAIT) {
+                       return EBUSY;
+               }
+               SET(bp->nb_lflags, NBL_WANTED);
 
-               ts.tv_sec = (slptimeo/100);
+               ts.tv_sec = (slptimeo / 100);
                /* the hz value is 100; which leads to 10ms */
                ts.tv_nsec = (slptimeo % 100) * 10  * NSEC_PER_USEC * 1000;
 
                error = msleep(bp, nfs_buf_mutex, slpflag | (PRIBIO + 1),
-                       "nfs_buf_acquire", &ts);
-               if (error)
-                       return (error);
-               return (EAGAIN);
+                   "nfs_buf_acquire", &ts);
+               if (error) {
+                       return error;
+               }
+               return EAGAIN;
+       }
+       if (flags & NBAC_REMOVE) {
+               nfs_buf_remfree(bp);
        }
-       if (flags & NBAC_REMOVE)
-               nfs_buf_remfree(bp);
        SET(bp->nb_lflags, NBL_BUSY);
 
-       return (0);
+       return 0;
 }
 
 /*
@@ -1339,17 +1513,19 @@ nfs_buf_drop(struct nfsbuf *bp)
 {
        int need_wakeup = 0;
 
-       if (!ISSET(bp->nb_lflags, NBL_BUSY))
+       if (!ISSET(bp->nb_lflags, NBL_BUSY)) {
                panic("nfs_buf_drop: buffer not busy!");
+       }
        if (ISSET(bp->nb_lflags, NBL_WANTED)) {
-               /* delay the actual wakeup until after we clear NBL_BUSY */
+               /* delay the actual wakeup until after we clear NBL_BUSY */
                need_wakeup = 1;
        }
        /* Unlock the buffer. */
        CLR(bp->nb_lflags, (NBL_BUSY | NBL_WANTED));
 
-       if (need_wakeup)
-               wakeup(bp);
+       if (need_wakeup) {
+               wakeup(bp);
+       }
 }
 
 /*
@@ -1362,31 +1538,32 @@ nfs_buf_iterprepare(nfsnode_t np, struct nfsbuflists *iterheadp, int flags)
 {
        struct nfsbuflists *listheadp;
 
-       if (flags & NBI_DIRTY)
+       if (flags & NBI_DIRTY) {
                listheadp = &np->n_dirtyblkhd;
-       else
+       } else {
                listheadp = &np->n_cleanblkhd;
+       }
 
        if ((flags & NBI_NOWAIT) && (np->n_bufiterflags & NBI_ITER)) {
-               LIST_INIT(iterheadp);
-               return(EWOULDBLOCK);
+               LIST_INIT(iterheadp);
+               return EWOULDBLOCK;
        }
 
-       while (np->n_bufiterflags & NBI_ITER)   {
-               np->n_bufiterflags |= NBI_ITERWANT;
+       while (np->n_bufiterflags & NBI_ITER) {
+               np->n_bufiterflags |= NBI_ITERWANT;
                msleep(&np->n_bufiterflags, nfs_buf_mutex, 0, "nfs_buf_iterprepare", NULL);
        }
        if (LIST_EMPTY(listheadp)) {
-               LIST_INIT(iterheadp);
-               return(EINVAL);
+               LIST_INIT(iterheadp);
+               return EINVAL;
        }
        np->n_bufiterflags |= NBI_ITER;
 
        iterheadp->lh_first = listheadp->lh_first;
-       listheadp->lh_first->nb_vnbufs.le_prev = &iterheadp->lh_first;  
+       listheadp->lh_first->nb_vnbufs.le_prev = &iterheadp->lh_first;
        LIST_INIT(listheadp);
 
-       return(0);
+       return 0;
 }
 
 /*
@@ -1400,10 +1577,11 @@ nfs_buf_itercomplete(nfsnode_t np, struct nfsbuflists *iterheadp, int flags)
        struct nfsbuflists * listheadp;
        struct nfsbuf *bp;
 
-       if (flags & NBI_DIRTY)
+       if (flags & NBI_DIRTY) {
                listheadp = &np->n_dirtyblkhd;
-       else
+       } else {
                listheadp = &np->n_cleanblkhd;
+       }
 
        while (!LIST_EMPTY(iterheadp)) {
                bp = LIST_FIRST(iterheadp);
@@ -1432,19 +1610,22 @@ nfs_buf_read(struct nfsbuf *bp)
 
        np = bp->nb_np;
        cred = bp->nb_rcred;
-       if (IS_VALID_CRED(cred))
+       if (IS_VALID_CRED(cred)) {
                kauth_cred_ref(cred);
+       }
        thd = ISSET(bp->nb_flags, NB_ASYNC) ? NULL : current_thread();
 
        /* sanity checks */
-       if (!ISSET(bp->nb_flags, NB_READ))
+       if (!ISSET(bp->nb_flags, NB_READ)) {
                panic("nfs_buf_read: !NB_READ");
-       if (ISSET(bp->nb_flags, NB_DONE))
+       }
+       if (ISSET(bp->nb_flags, NB_DONE)) {
                CLR(bp->nb_flags, NB_DONE);
+       }
 
        NFS_BUF_MAP(bp);
 
-       OSAddAtomic(1, (SInt32 *)&nfsstats.read_bios);
+       OSAddAtomic64(1, &nfsstats.read_bios);
 
        error = nfs_buf_read_rpc(bp, thd, cred);
        /*
@@ -1452,9 +1633,10 @@ nfs_buf_read(struct nfsbuf *bp)
         * read.  Otherwise, the read has already been finished.
         */
 
-       if (IS_VALID_CRED(cred))
+       if (IS_VALID_CRED(cred)) {
                kauth_cred_unref(&cred);
-       return (error);
+       }
+       return error;
 }
 
 /*
@@ -1470,7 +1652,7 @@ nfs_buf_read_finish(struct nfsbuf *bp)
                /* update valid range */
                bp->nb_validoff = 0;
                bp->nb_validend = bp->nb_endio;
-               if (bp->nb_endio < bp->nb_bufsize) { 
+               if (bp->nb_endio < bp->nb_bufsize) {
                        /*
                         * The read may be short because we have unflushed writes
                         * that are extending the file size and the reads hit the
@@ -1480,20 +1662,22 @@ nfs_buf_read_finish(struct nfsbuf *bp)
                         * in nfs_buf_read_rpc_finish().
                         */
                        off_t boff = NBOFF(bp);
-                       if ((off_t)np->n_size >= (boff + bp->nb_bufsize))
+                       if ((off_t)np->n_size >= (boff + bp->nb_bufsize)) {
                                bp->nb_validend = bp->nb_bufsize;
-                       else if ((off_t)np->n_size >= boff)
+                       } else if ((off_t)np->n_size >= boff) {
                                bp->nb_validend = np->n_size - boff;
-                       else
+                       } else {
                                bp->nb_validend = 0;
+                       }
                }
                if ((nmp = NFSTONMP(np)) && (nmp->nm_vers == NFS_VER2) &&
-                   ((NBOFF(bp) + bp->nb_validend) > 0x100000000LL))
+                   ((NBOFF(bp) + bp->nb_validend) > 0x100000000LL)) {
                        bp->nb_validend = 0x100000000LL - NBOFF(bp);
-               bp->nb_valid = (1 << (round_page_32(bp->nb_validend) / PAGE_SIZE)) - 1;
+               }
+               nfs_buf_pgs_get_page_mask(&bp->nb_valid, round_page_64(bp->nb_validend) / PAGE_SIZE);
                if (bp->nb_validend & PAGE_MASK) {
                        /* zero-fill remainder of last page */
-                       bzero(bp->nb_data + bp->nb_validend, bp->nb_bufsize - bp->nb_validend);
+                       bzero(bp->nb_data + bp->nb_validend, PAGE_SIZE - (bp->nb_validend & PAGE_MASK));
                }
        }
        nfs_buf_iodone(bp);
@@ -1508,17 +1692,20 @@ nfs_buf_read_rpc(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred)
        struct nfsmount *nmp;
        nfsnode_t np = bp->nb_np;
        int error = 0, nfsvers, async;
-       int offset, length, nmrsize, nrpcs, len;
+       int offset;
+       uint64_t length, nrpcs;
+       uint32_t nmrsize;
+       size_t len;
        off_t boff;
        struct nfsreq *req;
        struct nfsreq_cbinfo cb;
 
        nmp = NFSTONMP(np);
-       if (!nmp) {
+       if (nfs_mount_gone(nmp)) {
                bp->nb_error = error = ENXIO;
                SET(bp->nb_flags, NB_ERROR);
                nfs_buf_iodone(bp);
-               return (error);
+               return error;
        }
        nfsvers = nmp->nm_vers;
        nmrsize = nmp->nm_rsize;
@@ -1532,10 +1719,11 @@ nfs_buf_read_rpc(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred)
                        bp->nb_error = error = EFBIG;
                        SET(bp->nb_flags, NB_ERROR);
                        nfs_buf_iodone(bp);
-                       return (error);
+                       return error;
                }
-               if ((boff + length - 1) > 0xffffffffLL)
+               if ((boff + length - 1) > 0xffffffffLL) {
                        length = 0x100000000LL - boff;
+               }
        }
 
        /* Note: Can only do async I/O if nfsiods are configured. */
@@ -1556,17 +1744,24 @@ nfs_buf_read_rpc(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred)
                        error = bp->nb_error;
                        break;
                }
-               len = (length > nmrsize) ? nmrsize : length;
-               cb.rcb_args[0] = offset;
-               cb.rcb_args[1] = len;
+               len = (length > nmrsize) ? nmrsize : (uint32_t)length;
+               cb.rcb_args.offset = offset;
+               cb.rcb_args.length = len;
+#if CONFIG_NFS4
+               if (nmp->nm_vers >= NFS_VER4) {
+                       cb.rcb_args.stategenid = nmp->nm_stategenid;
+               }
+#endif
                req = NULL;
                error = nmp->nm_funcs->nf_read_rpc_async(np, boff + offset, len, thd, cred, &cb, &req);
-               if (error)
+               if (error) {
                        break;
+               }
                offset += len;
                length -= len;
-               if (async)
+               if (async) {
                        continue;
+               }
                nfs_buf_read_rpc_finish(req);
                if (ISSET(bp->nb_flags, NB_ERROR)) {
                        error = bp->nb_error;
@@ -1591,9 +1786,10 @@ nfs_buf_read_rpc(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred)
                                nfs_buf_iodone(bp);
                        } else {
                                /* wait for the last RPC to mark it done */
-                               while (bp->nb_rpcs > 0)
+                               while (bp->nb_rpcs > 0) {
                                        msleep(&bp->nb_rpcs, nfs_buf_mutex, 0,
-                                               "nfs_buf_read_rpc_cancel", NULL);
+                                           "nfs_buf_read_rpc_cancel", NULL);
+                               }
                                lck_mtx_unlock(nfs_buf_mutex);
                        }
                } else {
@@ -1601,7 +1797,7 @@ nfs_buf_read_rpc(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred)
                }
        }
 
-       return (error);
+       return error;
 }
 
 /*
@@ -1611,29 +1807,34 @@ void
 nfs_buf_read_rpc_finish(struct nfsreq *req)
 {
        struct nfsmount *nmp;
-       size_t rlen;
+       size_t rlen, length;
        struct nfsreq_cbinfo cb;
        struct nfsbuf *bp;
-       int error = 0, nfsvers, offset, length, eof = 0, multasyncrpc, finished;
+       int error = 0, nfsvers, eof = 0, multasyncrpc, finished;
+       off_t offset;
        void *wakeme = NULL;
        struct nfsreq *rreq = NULL;
        nfsnode_t np;
        thread_t thd;
        kauth_cred_t cred;
-       struct uio uio;
-       struct iovec_32 io;
+       uio_t auio;
+       char uio_buf[UIO_SIZEOF(1)];
 
 finish:
        np = req->r_np;
        thd = req->r_thread;
        cred = req->r_cred;
-       if (IS_VALID_CRED(cred))
+       if (IS_VALID_CRED(cred)) {
                kauth_cred_ref(cred);
+       }
        cb = req->r_callback;
        bp = cb.rcb_bp;
+       if (cb.rcb_func) { /* take an extra reference on the nfsreq in case we want to resend it later due to grace error */
+               nfs_request_ref(req, 0);
+       }
 
        nmp = NFSTONMP(np);
-       if (!nmp) {
+       if (nfs_mount_gone(nmp)) {
                SET(bp->nb_flags, NB_ERROR);
                bp->nb_error = error = ENXIO;
        }
@@ -1644,39 +1845,82 @@ finish:
        }
 
        nfsvers = nmp->nm_vers;
-       offset = cb.rcb_args[0];
-       rlen = length = cb.rcb_args[1];
-
-       uio.uio_iovs.iov32p = &io;
-       uio.uio_iovcnt = 1;
-       uio.uio_rw = UIO_READ;
-#if 1  /* LP64todo - can't use new segment flags until the drivers are ready */
-       uio.uio_segflg = UIO_SYSSPACE;
-#else
-       uio.uio_segflg = UIO_SYSSPACE32;
-#endif
-       io.iov_len = length;
-       uio_uio_resid_set(&uio, io.iov_len);
-       uio.uio_offset = NBOFF(bp) + offset;
-       io.iov_base = (uintptr_t) bp->nb_data + offset;
+       offset = cb.rcb_args.offset;
+       rlen = length = cb.rcb_args.length;
+
+       auio = uio_createwithbuffer(1, NBOFF(bp) + offset, UIO_SYSSPACE,
+           UIO_READ, &uio_buf, sizeof(uio_buf));
+       uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + offset), length);
 
        /* finish the RPC */
-       error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req, &uio, &rlen, &eof);
+       error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req, auio, &rlen, &eof);
        if ((error == EINPROGRESS) && cb.rcb_func) {
                /* async request restarted */
-               if (IS_VALID_CRED(cred))
+               if (cb.rcb_func) {
+                       nfs_request_rele(req);
+               }
+               if (IS_VALID_CRED(cred)) {
                        kauth_cred_unref(&cred);
+               }
                return;
        }
-
+#if CONFIG_NFS4
+       if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) && !ISSET(bp->nb_flags, NB_ERROR)) {
+               lck_mtx_lock(&nmp->nm_lock);
+               if ((error != NFSERR_OLD_STATEID) && (error != NFSERR_GRACE) && (cb.rcb_args.stategenid == nmp->nm_stategenid)) {
+                       NP(np, "nfs_buf_read_rpc_finish: error %d @ 0x%llx, 0x%x 0x%x, initiating recovery",
+                           error, NBOFF(bp) + offset, cb.rcb_args.stategenid, nmp->nm_stategenid);
+                       nfs_need_recover(nmp, error);
+               }
+               lck_mtx_unlock(&nmp->nm_lock);
+               if (np->n_flag & NREVOKE) {
+                       error = EIO;
+               } else {
+                       if (error == NFSERR_GRACE) {
+                               if (cb.rcb_func) {
+                                       /*
+                                        * For an async I/O request, handle a grace delay just like
+                                        * jukebox errors.  Set the resend time and queue it up.
+                                        */
+                                       struct timeval now;
+                                       if (req->r_nmrep.nmc_mhead) {
+                                               mbuf_freem(req->r_nmrep.nmc_mhead);
+                                               req->r_nmrep.nmc_mhead = NULL;
+                                       }
+                                       req->r_error = 0;
+                                       microuptime(&now);
+                                       lck_mtx_lock(&req->r_mtx);
+                                       req->r_resendtime = now.tv_sec + 2;
+                                       req->r_xid = 0;                 // get a new XID
+                                       req->r_flags |= R_RESTART;
+                                       req->r_start = 0;
+                                       nfs_asyncio_resend(req);
+                                       lck_mtx_unlock(&req->r_mtx);
+                                       if (IS_VALID_CRED(cred)) {
+                                               kauth_cred_unref(&cred);
+                                       }
+                                       /* Note: nfsreq reference taken will be dropped later when finished */
+                                       return;
+                               }
+                               /* otherwise, just pause a couple seconds and retry */
+                               tsleep(&nmp->nm_state, (PZERO - 1), "nfsgrace", 2 * hz);
+                       }
+                       if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
+                               rlen = 0;
+                               goto readagain;
+                       }
+               }
+       }
+#endif
        if (error) {
                SET(bp->nb_flags, NB_ERROR);
                bp->nb_error = error;
                goto out;
        }
 
-       if ((rlen > 0) && (bp->nb_endio < (offset + (int)rlen)))
+       if ((rlen > 0) && (bp->nb_endio < (offset + (int)rlen))) {
                bp->nb_endio = offset + rlen;
+       }
 
        if ((nfsvers == NFS_VER2) || eof || (rlen == 0)) {
                /* zero out the remaining data (up to EOF) */
@@ -1684,9 +1928,10 @@ finish:
                rpcrem = (length - rlen);
                eofrem = np->n_size - (NBOFF(bp) + offset + rlen);
                rem = (rpcrem < eofrem) ? rpcrem : eofrem;
-               if (rem > 0)
-                       bzero(bp->nb_data + offset + rlen, rem);
-       } else if (((int)rlen < length) && !ISSET(bp->nb_flags, NB_ERROR)) {
+               if (rem > 0) {
+                       NFS_BZERO(bp->nb_data + offset + rlen, rem);
+               }
+       } else if ((rlen < length) && !ISSET(bp->nb_flags, NB_ERROR)) {
                /*
                 * short read
                 *
@@ -1694,19 +1939,30 @@ finish:
                 * requested, so we need to issue another read for the rest.
                 * (Don't bother if the buffer already hit an error.)
                 */
+#if CONFIG_NFS4
+readagain:
+#endif
                offset += rlen;
                length -= rlen;
-               cb.rcb_args[0] = offset;
-               cb.rcb_args[1] = length;
-               error = nmp->nm_funcs->nf_read_rpc_async(np, offset, length, thd, cred, &cb, &rreq);
+               cb.rcb_args.offset = offset;
+               cb.rcb_args.length = length;
+#if CONFIG_NFS4
+               if (nmp->nm_vers >= NFS_VER4) {
+                       cb.rcb_args.stategenid = nmp->nm_stategenid;
+               }
+#endif
+               error = nmp->nm_funcs->nf_read_rpc_async(np, NBOFF(bp) + offset, length, thd, cred, &cb, &rreq);
                if (!error) {
-                       if (IS_VALID_CRED(cred))
+                       if (IS_VALID_CRED(cred)) {
                                kauth_cred_unref(&cred);
+                       }
                        if (!cb.rcb_func) {
                                /* if !async we'll need to wait for this RPC to finish */
                                req = rreq;
+                               rreq = NULL;
                                goto finish;
                        }
+                       nfs_request_rele(req);
                        /*
                         * We're done here.
                         * Outstanding RPC count is unchanged.
@@ -1719,8 +1975,12 @@ finish:
        }
 
 out:
-       if (IS_VALID_CRED(cred))
+       if (cb.rcb_func) {
+               nfs_request_rele(req);
+       }
+       if (IS_VALID_CRED(cred)) {
                kauth_cred_unref(&cred);
+       }
 
        /*
         * Decrement outstanding RPC count on buffer
@@ -1732,21 +1992,25 @@ out:
         */
 
        multasyncrpc = ISSET(bp->nb_flags, NB_MULTASYNCRPC);
-       if (multasyncrpc)
+       if (multasyncrpc) {
                lck_mtx_lock(nfs_buf_mutex);
+       }
 
        bp->nb_rpcs--;
        finished = (bp->nb_rpcs == 0);
 
-       if (multasyncrpc)
+       if (multasyncrpc) {
                lck_mtx_unlock(nfs_buf_mutex);
+       }
 
        if (finished) {
-               if (multasyncrpc)
+               if (multasyncrpc) {
                        wakeme = &bp->nb_rpcs;
+               }
                nfs_buf_read_finish(bp);
-               if (wakeme)
+               if (wakeme) {
                        wakeup(wakeme);
+               }
        }
 }
 
@@ -1754,277 +2018,248 @@ out:
  * Do buffer readahead.
  * Initiate async I/O to read buffers not in cache.
  */
-static int
+int
 nfs_buf_readahead(nfsnode_t np, int ioflag, daddr64_t *rabnp, daddr64_t lastrabn, thread_t thd, kauth_cred_t cred)
 {
        struct nfsmount *nmp = NFSTONMP(np);
        struct nfsbuf *bp;
-       int error = 0, nra;
+       int error = 0;
+       uint32_t nra;
 
-       if (!nmp)
-               return (ENXIO);
-       if (nmp->nm_readahead <= 0)
-               return (0);
-       if (*rabnp > lastrabn)
-               return (0);
+       if (nfs_mount_gone(nmp)) {
+               return ENXIO;
+       }
+       if (nmp->nm_readahead <= 0) {
+               return 0;
+       }
+       if (*rabnp > lastrabn) {
+               return 0;
+       }
 
        for (nra = 0; (nra < nmp->nm_readahead) && (*rabnp <= lastrabn); nra++, *rabnp = *rabnp + 1) {
                /* check if block exists and is valid. */
-               error = nfs_buf_get(np, *rabnp, nmp->nm_biosize, thd, NBLK_READ|NBLK_NOWAIT, &bp);
-               if (error)
+               if ((*rabnp * nmp->nm_biosize) >= (off_t)np->n_size) {
+                       /* stop reading ahead if we're beyond EOF */
+                       *rabnp = lastrabn;
                        break;
-               if (!bp)
+               }
+               error = nfs_buf_get(np, *rabnp, nmp->nm_biosize, thd, NBLK_READ | NBLK_NOWAIT, &bp);
+               if (error) {
+                       break;
+               }
+               nfs_node_lock_force(np);
+               np->n_lastrahead = *rabnp;
+               nfs_node_unlock(np);
+               if (!bp) {
                        continue;
+               }
                if ((ioflag & IO_NOCACHE) && ISSET(bp->nb_flags, NB_CACHE) &&
-                   !bp->nb_dirty && !ISSET(bp->nb_flags, (NB_DELWRI|NB_NCRDAHEAD))) {
+                   !nfs_buf_pgs_is_set(&bp->nb_dirty) && !ISSET(bp->nb_flags, (NB_DELWRI | NB_NCRDAHEAD))) {
                        CLR(bp->nb_flags, NB_CACHE);
-                       bp->nb_valid = 0;
+                       NBPGS_ERASE(&bp->nb_valid);
                        bp->nb_validoff = bp->nb_validend = -1;
                }
-               if ((bp->nb_dirtyend <= 0) && !bp->nb_dirty &&
-                   !ISSET(bp->nb_flags, (NB_CACHE|NB_DELWRI))) {
-                       SET(bp->nb_flags, (NB_READ|NB_ASYNC));
-                       if (ioflag & IO_NOCACHE)
+               if ((bp->nb_dirtyend <= 0) && !nfs_buf_pgs_is_set(&bp->nb_dirty) &&
+                   !ISSET(bp->nb_flags, (NB_CACHE | NB_DELWRI))) {
+                       SET(bp->nb_flags, (NB_READ | NB_ASYNC));
+                       if (ioflag & IO_NOCACHE) {
                                SET(bp->nb_flags, NB_NCRDAHEAD);
+                       }
                        if (!IS_VALID_CRED(bp->nb_rcred) && IS_VALID_CRED(cred)) {
                                kauth_cred_ref(cred);
                                bp->nb_rcred = cred;
                        }
-                       if ((error = nfs_buf_read(bp)))
+                       if ((error = nfs_buf_read(bp))) {
                                break;
+                       }
                        continue;
                }
                nfs_buf_release(bp, 1);
        }
-       return (error);
+       return error;
 }
 
 /*
- * NFS buffer I/O for reading files/directories.
+ * NFS buffer I/O for reading files.
  */
 int
-nfs_bioread(nfsnode_t np, struct uio *uio, int ioflag, int *eofflag, vfs_context_t ctx)
+nfs_bioread(nfsnode_t np, uio_t uio, int ioflag, vfs_context_t ctx)
 {
        vnode_t vp = NFSTOV(np);
        struct nfsbuf *bp = NULL;
-       struct nfs_vattr nvattr;
        struct nfsmount *nmp = VTONMP(vp);
-       daddr64_t lbn, rabn = 0, lastrabn, maxrabn = -1, tlbn;
-       off_t diff;
-       int error = 0, n = 0, on = 0;
-       int nfsvers, biosize;
-       caddr_t dp;
-       struct dirent *direntp = NULL;
-       enum vtype vtype;
+       daddr64_t lbn, rabn = 0, lastrabn, maxrabn = -1;
+       off_t diff, on = 0, n = 0;
+       int error = 0, n32;
+       int nfsvers, biosize, modified, readaheads = 0;
        thread_t thd;
        kauth_cred_t cred;
+       int64_t io_resid;
 
-       FSDBG_TOP(514, np, uio->uio_offset, uio_uio_resid(uio), ioflag);
-
-       if (uio_uio_resid(uio) == 0) {
-               FSDBG_BOT(514, np, 0xd1e0001, 0, 0);
-               return (0);
-       }
-       if (uio->uio_offset < 0) {
-               FSDBG_BOT(514, np, 0xd1e0002, 0, EINVAL);
-               return (EINVAL);
-       }
+       FSDBG_TOP(514, np, uio_offset(uio), uio_resid(uio), ioflag);
 
        nfsvers = nmp->nm_vers;
        biosize = nmp->nm_biosize;
        thd = vfs_context_thread(ctx);
        cred = vfs_context_ucred(ctx);
 
-       vtype = vnode_vtype(vp);
-       if ((vtype != VREG) && (vtype != VDIR)) {
-               printf("nfs_bioread: type %x unexpected\n", vtype);
+       if (vnode_vtype(vp) != VREG) {
+               printf("nfs_bioread: type %x unexpected\n", vnode_vtype(vp));
                FSDBG_BOT(514, np, 0xd1e0016, 0, EINVAL);
-               return (EINVAL);
+               return EINVAL;
        }
 
        /*
-        * For nfs, cache consistency can only be maintained approximately.
+        * For NFS, cache consistency can only be maintained approximately.
         * Although RFC1094 does not specify the criteria, the following is
         * believed to be compatible with the reference port.
-        * For nfs:
-        * If the file's modify time on the server has changed since the
-        * last read rpc or you have written to the file,
-        * you may have lost data cache consistency with the
-        * server, so flush all of the file's data out of the cache.
-        * Then force a getattr rpc to ensure that you have up to date
-        * attributes.
+        *
+        * If the file has changed since the last read RPC or you have
+        * written to the file, you may have lost data cache consistency
+        * with the server.  So, check for a change, and flush all of the
+        * file's data out of the cache.
         * NB: This implies that cache data can be read when up to
-        * NFS_MAXATTRTIMEO seconds out of date. If you find that you need
-        * current attributes this could be forced by calling
-        * NATTRINVALIDATE() before the nfs_getattr() call.
+        * NFS_MAXATTRTIMO seconds out of date. If you find that you
+        * need current attributes, nfs_getattr() can be forced to fetch
+        * new attributes (via NATTRINVALIDATE() or NGA_UNCACHED).
         */
 
-       if (ISSET(np->n_flag, NUPDATESIZE))
+       if (ISSET(np->n_flag, NUPDATESIZE)) {
                nfs_data_update_size(np, 0);
+       }
 
-       if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) {
+       if ((error = nfs_node_lock(np))) {
                FSDBG_BOT(514, np, 0xd1e0222, 0, error);
-               return (error);
+               return error;
        }
 
        if (np->n_flag & NNEEDINVALIDATE) {
                np->n_flag &= ~NNEEDINVALIDATE;
-               nfs_unlock(np);
-               nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1);
-               if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) {
+               nfs_node_unlock(np);
+               error = nfs_vinvalbuf(vp, V_SAVE | V_IGNORE_WRITEERR, ctx, 1);
+               if (!error) {
+                       error = nfs_node_lock(np);
+               }
+               if (error) {
                        FSDBG_BOT(514, np, 0xd1e0322, 0, error);
-                       return (error);
+                       return error;
                }
        }
 
-       if (np->n_flag & NMODIFIED) {
-               if (vtype == VDIR) {
-                       nfs_invaldir(np);
-                       nfs_unlock(np);
-                       error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
-                       if (!error)
-                               error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE);
-                       if (error) {
-                               FSDBG_BOT(514, np, 0xd1e0003, 0, error);
-                               return (error);
+       modified = (np->n_flag & NMODIFIED);
+       nfs_node_unlock(np);
+       /* nfs_getattr() will check changed and purge caches */
+       error = nfs_getattr(np, NULL, ctx, modified ? NGA_UNCACHED : NGA_CACHED);
+       if (error) {
+               FSDBG_BOT(514, np, 0xd1e0004, 0, error);
+               return error;
+       }
+
+       if (uio_resid(uio) == 0) {
+               FSDBG_BOT(514, np, 0xd1e0001, 0, 0);
+               return 0;
+       }
+       if (uio_offset(uio) < 0) {
+               FSDBG_BOT(514, np, 0xd1e0002, 0, EINVAL);
+               return EINVAL;
+       }
+
+       /*
+        * set up readahead - which may be limited by:
+        * + current request length (for IO_NOCACHE)
+        * + readahead setting
+        * + file size
+        */
+       if (nmp->nm_readahead > 0) {
+               off_t end = uio_offset(uio) + uio_resid(uio);
+               if (end > (off_t)np->n_size) {
+                       end = np->n_size;
+               }
+               rabn = uio_offset(uio) / biosize;
+               maxrabn = (end - 1) / biosize;
+               nfs_node_lock_force(np);
+               if (!(ioflag & IO_NOCACHE) &&
+                   (!rabn || (rabn == np->n_lastread) || (rabn == (np->n_lastread + 1)))) {
+                       maxrabn += nmp->nm_readahead;
+                       if ((maxrabn * biosize) >= (off_t)np->n_size) {
+                               maxrabn = ((off_t)np->n_size - 1) / biosize;
                        }
                }
-               NATTRINVALIDATE(np);
-               error = nfs_getattr(np, &nvattr, ctx, 1);
-               if (error) {
-                       nfs_unlock(np);
-                       FSDBG_BOT(514, np, 0xd1e0004, 0, error);
-                       return (error);
+               if (maxrabn < np->n_lastrahead) {
+                       np->n_lastrahead = -1;
                }
-               if (vtype == VDIR) {
-                       /* if directory changed, purge any name cache entries */
-                       if (NFS_CHANGED_NC(nfsvers, np, &nvattr))
-                               cache_purge(vp);
-                       NFS_CHANGED_UPDATE_NC(nfsvers, np, &nvattr);
+               if (rabn < np->n_lastrahead) {
+                       rabn = np->n_lastrahead + 1;
                }
-               NFS_CHANGED_UPDATE(nfsvers, np, &nvattr);
+               nfs_node_unlock(np);
        } else {
-               error = nfs_getattr(np, &nvattr, ctx, 1);
-               if (error) {
-                       nfs_unlock(np);
-                       FSDBG_BOT(514, np, 0xd1e0005, 0, error);
-                       return (error);
-               }
-               if (NFS_CHANGED(nfsvers, np, &nvattr)) {
-                       if (vtype == VDIR) {
-                               nfs_invaldir(np);
-                               /* purge name cache entries */
-                               if (NFS_CHANGED_NC(nfsvers, np, &nvattr))
-                                       cache_purge(vp);
-                       }
-                       nfs_unlock(np);
-                       error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
-                       if (!error)
-                               error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE);
-                       if (error) {
-                               FSDBG_BOT(514, np, 0xd1e0006, 0, error);
-                               return (error);
-                       }
-                       if (vtype == VDIR)
-                               NFS_CHANGED_UPDATE_NC(nfsvers, np, &nvattr);
-                       NFS_CHANGED_UPDATE(nfsvers, np, &nvattr);
-               }
+               rabn = maxrabn = 0;
        }
 
-       nfs_unlock(np);
-
-       if (vtype == VREG) {
-               if ((ioflag & IO_NOCACHE) && (uio_uio_resid(uio) < (2*biosize))) {
-                       /* We have only a block or so to read, just do the rpc directly. */
-                       error = nfs_read_rpc(np, uio, ctx);
-                       FSDBG_BOT(514, np, uio->uio_offset, uio_uio_resid(uio), error);
-                       return (error);
-               }
-               /*
-                * set up readahead - which may be limited by:
-                * + current request length (for IO_NOCACHE)
-                * + readahead setting
-                * + file size
-                */
-               if (nmp->nm_readahead > 0) {
-                       off_t end = uio->uio_offset + uio_uio_resid(uio);
-                       if (end > (off_t)np->n_size)
-                               end = np->n_size;
-                       rabn = uio->uio_offset / biosize;
-                       maxrabn = (end - 1) / biosize;
-                       if (!(ioflag & IO_NOCACHE) &&
-                           (!rabn || (rabn == np->n_lastread) || (rabn == (np->n_lastread+1)))) {
-                               maxrabn += nmp->nm_readahead;
-                               if ((maxrabn * biosize) >= (off_t)np->n_size)
-                                       maxrabn = ((off_t)np->n_size - 1)/biosize;
-                       }
-               } else {
-                       rabn = maxrabn = 0;
-               }
-       }
-
-       do {
-
-           if (vtype == VREG) {
-               nfs_data_lock(np, NFS_NODE_LOCK_SHARED);
-               lbn = uio->uio_offset / biosize;
+       do {
+               nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
+               lbn = uio_offset(uio) / biosize;
 
                /*
                 * Copy directly from any cached pages without grabbing the bufs.
-                *
-                * Note: for "nocache" reads, we don't copy directly from UBC
-                * because any cached pages will be for readahead buffers that
-                * need to be invalidated anyway before we finish this request.
+                * (If we are NOCACHE and we've issued readahead requests, we need
+                * to grab the NB_NCRDAHEAD bufs to drop them.)
                 */
-               if (!(ioflag & IO_NOCACHE) &&
-                       (uio->uio_segflg == UIO_USERSPACE32 ||
-                        uio->uio_segflg == UIO_USERSPACE64 ||
-                        uio->uio_segflg == UIO_USERSPACE)) {
-                       // LP64todo - fix this!
-                       int io_resid = uio_uio_resid(uio);
-                       diff = np->n_size - uio->uio_offset;
-                       if (diff < io_resid)
+               if ((!(ioflag & IO_NOCACHE) || !readaheads) &&
+                   ((uio->uio_segflg == UIO_USERSPACE32 ||
+                   uio->uio_segflg == UIO_USERSPACE64 ||
+                   uio->uio_segflg == UIO_USERSPACE))) {
+                       io_resid = uio_resid(uio);
+                       diff = np->n_size - uio_offset(uio);
+                       if (diff < io_resid) {
                                io_resid = diff;
+                       }
                        if (io_resid > 0) {
-                               error = cluster_copy_ubc_data(vp, uio, &io_resid, 0);
+                               int count = (io_resid > INT_MAX) ? INT_MAX : (int)io_resid;
+                               error = cluster_copy_ubc_data(vp, uio, &count, 0);
                                if (error) {
                                        nfs_data_unlock(np);
-                                       FSDBG_BOT(514, np, uio->uio_offset, 0xcacefeed, error);
-                                       return (error);
+                                       FSDBG_BOT(514, np, uio_offset(uio), 0xcacefeed, error);
+                                       return error;
                                }
                        }
                        /* count any biocache reads that we just copied directly */
-                       if (lbn != (uio->uio_offset / biosize)) {
-                               OSAddAtomic((uio->uio_offset / biosize) - lbn, (SInt32*)&nfsstats.biocache_reads);
-                               FSDBG(514, np, 0xcacefeed, uio->uio_offset, error);
+                       if (lbn != (uio_offset(uio) / biosize)) {
+                               OSAddAtomic64(NFS_ROUND_BLOCK(uio_offset(uio), biosize) - lbn, &nfsstats.biocache_reads);
+                               FSDBG(514, np, 0xcacefeed, uio_offset(uio), error);
                        }
                }
 
-               lbn = uio->uio_offset / biosize;
-               on = uio->uio_offset % biosize;
-               np->n_lastread = (uio->uio_offset - 1) / biosize;
+               lbn = uio_offset(uio) / biosize;
+               on = uio_offset(uio) % biosize;
+               nfs_node_lock_force(np);
+               np->n_lastread = (uio_offset(uio) - 1) / biosize;
+               nfs_node_unlock(np);
+
+               if ((uio_resid(uio) <= 0) || (uio_offset(uio) >= (off_t)np->n_size)) {
+                       nfs_data_unlock(np);
+                       FSDBG_BOT(514, np, uio_offset(uio), uio_resid(uio), 0xaaaaaaaa);
+                       return 0;
+               }
 
                /* adjust readahead block number, if necessary */
-               if (rabn < lbn)
+               if (rabn < lbn) {
                        rabn = lbn;
+               }
                lastrabn = MIN(maxrabn, lbn + nmp->nm_readahead);
                if (rabn <= lastrabn) { /* start readaheads */
                        error = nfs_buf_readahead(np, ioflag, &rabn, lastrabn, thd, cred);
                        if (error) {
                                nfs_data_unlock(np);
                                FSDBG_BOT(514, np, 0xd1e000b, 1, error);
-                               return (error);
+                               return error;
                        }
+                       readaheads = 1;
+                       OSAddAtomic64(rabn - lbn, &nfsstats.biocache_reads);
+               } else {
+                       OSAddAtomic64(1, &nfsstats.biocache_reads);
                }
 
-               if ((uio_uio_resid(uio) <= 0) || (uio->uio_offset >= (off_t)np->n_size)) {
-                       nfs_data_unlock(np);
-                       FSDBG_BOT(514, np, uio->uio_offset, uio_uio_resid(uio), 0xaaaaaaaa);
-                       return (0);
-               }
-
-               OSAddAtomic(1, (SInt32*)&nfsstats.biocache_reads);
-
                /*
                 * If the block is in the cache and has the required data
                 * in a valid region, just copy it out.
@@ -2032,17 +2267,18 @@ nfs_bioread(nfsnode_t np, struct uio *uio, int ioflag, int *eofflag, vfs_context
                 * as required.
                 */
 again:
-               // LP64todo - fix this!
-               n = min((unsigned)(biosize - on), uio_uio_resid(uio));
-               diff = np->n_size - uio->uio_offset;
-               if (diff < n)
+               io_resid = uio_resid(uio);
+               n = (io_resid > (biosize - on)) ? (biosize - on) : io_resid;
+               diff = np->n_size - uio_offset(uio);
+               if (diff < n) {
                        n = diff;
+               }
 
                error = nfs_buf_get(np, lbn, biosize, thd, NBLK_READ, &bp);
                if (error) {
                        nfs_data_unlock(np);
                        FSDBG_BOT(514, np, 0xd1e000c, 0, error);
-                       return (error);
+                       return error;
                }
 
                if ((ioflag & IO_NOCACHE) && ISSET(bp->nb_flags, NB_CACHE)) {
@@ -2052,32 +2288,32 @@ again:
                         * Invalidate the data if it wasn't just read
                         * in as part of a "nocache readahead".
                         */
-                       if (bp->nb_dirty || (bp->nb_dirtyend > 0)) {
+                       if (nfs_buf_pgs_is_set(&bp->nb_dirty) || (bp->nb_dirtyend > 0)) {
                                /* so write the buffer out and try again */
                                SET(bp->nb_flags, NB_NOCACHE);
                                goto flushbuffer;
                        }
-                       if (!ISSET(bp->nb_flags, NB_NCRDAHEAD)) {
-                               CLR(bp->nb_flags, NB_CACHE);
-                               bp->nb_valid = 0;
-                       } else {
+                       if (ISSET(bp->nb_flags, NB_NCRDAHEAD)) {
                                CLR(bp->nb_flags, NB_NCRDAHEAD);
+                               SET(bp->nb_flags, NB_NOCACHE);
                        }
                }
 
                /* if any pages are valid... */
-               if (bp->nb_valid) {
+               if (nfs_buf_pgs_is_set(&bp->nb_valid)) {
                        /* ...check for any invalid pages in the read range */
-                       int pg, firstpg, lastpg, dirtypg;
+                       off_t pg, firstpg, lastpg, dirtypg;
                        dirtypg = firstpg = lastpg = -1;
-                       pg = on/PAGE_SIZE;
-                       while (pg <= (on + n - 1)/PAGE_SIZE) {
-                               if (!NBPGVALID(bp,pg)) {
-                                       if (firstpg < 0)
+                       pg = on / PAGE_SIZE;
+                       while (pg <= (on + n - 1) / PAGE_SIZE) {
+                               if (!NBPGVALID(bp, pg)) {
+                                       if (firstpg < 0) {
                                                firstpg = pg;
+                                       }
                                        lastpg = pg;
-                               } else if (firstpg >= 0 && dirtypg < 0 && NBPGDIRTY(bp,pg))
+                               } else if (firstpg >= 0 && dirtypg < 0 && NBPGDIRTY(bp, pg)) {
                                        dirtypg = pg;
+                               }
                                pg++;
                        }
 
@@ -2086,8 +2322,8 @@ again:
                                if (bp->nb_validoff < 0) {
                                        /* valid range isn't set up, so */
                                        /* set it to what we know is valid */
-                                       bp->nb_validoff = trunc_page(on);
-                                       bp->nb_validend = round_page(on+n);
+                                       bp->nb_validoff = trunc_page_64(on);
+                                       bp->nb_validend = round_page_64(on + n);
                                        nfs_buf_normalize_valid_range(np, bp);
                                }
                                goto buffer_ready;
@@ -2095,7 +2331,7 @@ again:
 
                        /* there are invalid pages in the read range */
                        if (((dirtypg > firstpg) && (dirtypg < lastpg)) ||
-                           (((firstpg*PAGE_SIZE) < bp->nb_dirtyend) && (((lastpg+1)*PAGE_SIZE) > bp->nb_dirtyoff))) {
+                           (((firstpg * PAGE_SIZE) < bp->nb_dirtyend) && (((lastpg + 1) * PAGE_SIZE) > bp->nb_dirtyoff))) {
                                /* there are also dirty page(s) (or range) in the read range, */
                                /* so write the buffer out and try again */
 flushbuffer:
@@ -2109,41 +2345,42 @@ flushbuffer:
                                if (error) {
                                        nfs_data_unlock(np);
                                        FSDBG_BOT(514, np, 0xd1e000d, 0, error);
-                                       return (error);
+                                       return error;
                                }
                                goto again;
                        }
-                       if (!bp->nb_dirty && bp->nb_dirtyend <= 0 &&
-                           (lastpg - firstpg + 1) > (biosize/PAGE_SIZE)/2) {
+                       if (!nfs_buf_pgs_is_set(&bp->nb_dirty) && bp->nb_dirtyend <= 0 &&
+                           (lastpg - firstpg + 1) > (biosize / PAGE_SIZE) / 2) {
                                /* we need to read in more than half the buffer and the */
                                /* buffer's not dirty, so just fetch the whole buffer */
-                               bp->nb_valid = 0;
+                               NBPGS_ERASE(&bp->nb_valid);
                        } else {
                                /* read the page range in */
                                uio_t auio;
-                               char uio_buf[ UIO_SIZEOF(1) ];
-                               
+                               char uio_buf[UIO_SIZEOF(1)];
+
                                NFS_BUF_MAP(bp);
                                auio = uio_createwithbuffer(1, (NBOFF(bp) + firstpg * PAGE_SIZE_64),
-                                               UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
+                                   UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
                                if (!auio) {
                                        error = ENOMEM;
                                } else {
-                                       uio_addiov(auio, CAST_USER_ADDR_T((bp->nb_data + firstpg * PAGE_SIZE)),
-                                                       ((lastpg - firstpg + 1) * PAGE_SIZE));
+                                       NFS_UIO_ADDIOV(auio, CAST_USER_ADDR_T(bp->nb_data + (firstpg * PAGE_SIZE)),
+                                           ((lastpg - firstpg + 1) * PAGE_SIZE));
                                        error = nfs_read_rpc(np, auio, ctx);
                                }
                                if (error) {
-                                       if (ioflag & IO_NOCACHE)
+                                       if (ioflag & IO_NOCACHE) {
                                                SET(bp->nb_flags, NB_NOCACHE);
+                                       }
                                        nfs_buf_release(bp, 1);
                                        nfs_data_unlock(np);
                                        FSDBG_BOT(514, np, 0xd1e000e, 0, error);
-                                       return (error);
+                                       return error;
                                }
                                /* Make sure that the valid range is set to cover this read. */
-                               bp->nb_validoff = trunc_page_32(on);
-                               bp->nb_validend = round_page_32(on+n);
+                               bp->nb_validoff = trunc_page_64(on);
+                               bp->nb_validend = round_page_64(on + n);
                                nfs_buf_normalize_valid_range(np, bp);
                                if (uio_resid(auio) > 0) {
                                        /* if short read, must have hit EOF, */
@@ -2151,12 +2388,13 @@ flushbuffer:
                                        bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
                                }
                                /* mark the pages (successfully read) as valid */
-                               for (pg=firstpg; pg <= lastpg; pg++)
-                                       NBPGVALID_SET(bp,pg);
+                               for (pg = firstpg; pg <= lastpg; pg++) {
+                                       NBPGVALID_SET(bp, pg);
+                               }
                        }
                }
                /* if no pages are valid, read the whole block */
-               if (!bp->nb_valid) {
+               if (!nfs_buf_pgs_is_set(&bp->nb_valid)) {
                        if (!IS_VALID_CRED(bp->nb_rcred) && IS_VALID_CRED(cred)) {
                                kauth_cred_ref(cred);
                                bp->nb_rcred = cred;
@@ -2164,196 +2402,80 @@ flushbuffer:
                        SET(bp->nb_flags, NB_READ);
                        CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
                        error = nfs_buf_read(bp);
+                       if (ioflag & IO_NOCACHE) {
+                               SET(bp->nb_flags, NB_NOCACHE);
+                       }
                        if (error) {
                                nfs_data_unlock(np);
                                nfs_buf_release(bp, 1);
                                FSDBG_BOT(514, np, 0xd1e000f, 0, error);
-                               return (error);
+                               return error;
                        }
                }
 buffer_ready:
                /* validate read range against valid range and clip */
                if (bp->nb_validend > 0) {
                        diff = (on >= bp->nb_validend) ? 0 : (bp->nb_validend - on);
-                       if (diff < n)
+                       if (diff < n) {
                                n = diff;
-               }
-               if (n > 0)
-                       NFS_BUF_MAP(bp);
-           } else if (vtype == VDIR) {
-               OSAddAtomic(1, (SInt32*)&nfsstats.biocache_readdirs);
-               error = nfs_lock(np, NFS_NODE_LOCK_SHARED);
-               if (error || (np->n_direofoffset && (uio->uio_offset >= np->n_direofoffset))) {
-                       if (!error)
-                               nfs_unlock(np);
-                       if (eofflag)
-                               *eofflag = 1;
-                       FSDBG_BOT(514, np, 0xde0f0001, 0, 0);
-                       return (0);
-               }
-               nfs_unlock(np);
-               lbn = uio->uio_offset / NFS_DIRBLKSIZ;
-               on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
-               error = nfs_buf_get(np, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ, &bp);
-               if (error) {
-                       FSDBG_BOT(514, np, 0xd1e0012, 0, error);
-                       return (error);
-               }
-               if (!ISSET(bp->nb_flags, NB_CACHE)) {
-                   SET(bp->nb_flags, NB_READ);
-                   error = nfs_buf_readdir(bp, ctx);
-                   if (error)
-                       nfs_buf_release(bp, 1);
-                   while (error == NFSERR_BAD_COOKIE) {
-                       error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE);
-                       if (!error) {
-                               nfs_invaldir(np);
-                               nfs_unlock(np);
                        }
-                       error = nfs_vinvalbuf(vp, 0, ctx, 1);
-                       /*
-                        * Yuck! The directory has been modified on the
-                        * server. The only way to get the block is by
-                        * reading from the beginning to get all the
-                        * offset cookies.
-                        */
-                       for (tlbn = 0; tlbn <= lbn && !error; tlbn++) {
-                           if ((error = nfs_lock(np, NFS_NODE_LOCK_SHARED)))
-                                   break;
-                           if (np->n_direofoffset
-                               && (tlbn * NFS_DIRBLKSIZ) >= np->n_direofoffset) {
-                                   nfs_unlock(np);
-                                   if (eofflag)
-                                           *eofflag = 1;
-                                   FSDBG_BOT(514, np, 0xde0f0002, 0, 0);
-                                   return (0);
-                           }
-                           nfs_unlock(np);
-                           error = nfs_buf_get(np, tlbn, NFS_DIRBLKSIZ, thd, NBLK_READ, &bp);
-                           if (error) {
-                                   FSDBG_BOT(514, np, 0xd1e0013, 0, error);
-                                   return (error);
-                           }
-                           if (!ISSET(bp->nb_flags, NB_CACHE)) {
-                                   SET(bp->nb_flags, NB_READ);
-                                   error = nfs_buf_readdir(bp, ctx);
-                                   /*
-                                    * no error + NB_INVAL == directory EOF,
-                                    * use the block.
-                                    */
-                                   if (error == 0 && ISSET(bp->nb_flags, NB_INVAL)) {
-                                           if (eofflag)
-                                                   *eofflag = 1;
-                                           break;
-                                   }
-                           }
-                           /*
-                            * An error will throw away the block and the
-                            * for loop will break out.  If no error and this
-                            * is not the block we want, we throw away the
-                            * block and go for the next one via the for loop.
-                            */
-                           if (error || (tlbn < lbn))
-                                   nfs_buf_release(bp, 1);
-                       }
-                   }
-                   /*
-                    * The above while is repeated if we hit another cookie
-                    * error.  If we hit an error and it wasn't a cookie error,
-                    * we give up.
-                    */
-                   if (error) {
-                       FSDBG_BOT(514, np, 0xd1e0014, 0, error);
-                       return (error);
-                   }
                }
-               /*
-                * Make sure we use a signed variant of min() since
-                * the second term may be negative.
-                */
-               // LP64todo - fix this!
-               n = lmin(uio_uio_resid(uio), bp->nb_validend - on);
-               /*
-                * We keep track of the directory eof in
-                * np->n_direofoffset and chop it off as an
-                * extra step right here.
-                */
-               if ((error = nfs_lock(np, NFS_NODE_LOCK_SHARED))) {
-                       FSDBG_BOT(514, np, 0xd1e0115, 0, error);
-                       return (error);
-               }
-               if (np->n_direofoffset &&
-                   n > np->n_direofoffset - uio->uio_offset)
-                       n = np->n_direofoffset - uio->uio_offset;
-               nfs_unlock(np);
-               /*
-                * Make sure that we return an integral number of entries so
-                * that any subsequent calls will start copying from the start
-                * of the next entry.
-                *
-                * If the current value of n has the last entry cut short,
-                * set n to copy everything up to the last entry instead.
-                */
                if (n > 0) {
-                       dp = bp->nb_data + on;
-                       while (dp < (bp->nb_data + on + n)) {
-                               direntp = (struct dirent *)dp;
-                               dp += direntp->d_reclen;
+                       NFS_BUF_MAP(bp);
+                       n32 = n > INT_MAX ? INT_MAX : (int)n;
+                       error = uiomove(bp->nb_data + on, n32, uio);
+                       if (!error && n > n32) {
+                               error = uiomove(bp->nb_data + on + n32, (int)(n - n32), uio);
                        }
-                       if (dp > (bp->nb_data + on + n))
-                               n = (dp - direntp->d_reclen) - (bp->nb_data + on);
                }
-           }
 
-           if (n > 0)
-               error = uiomove(bp->nb_data + on, (int)n, uio);
 
-           if (vtype == VREG) {
-               if (ioflag & IO_NOCACHE)
-                       SET(bp->nb_flags, NB_NOCACHE);
                nfs_buf_release(bp, 1);
                nfs_data_unlock(np);
-               np->n_lastread = (uio->uio_offset - 1) / biosize;
-           } else {
-               nfs_buf_release(bp, 1);
-           }
-       } while (error == 0 && uio_uio_resid(uio) > 0 && n > 0);
-       FSDBG_BOT(514, np, uio->uio_offset, uio_uio_resid(uio), error);
-       return (error);
+               nfs_node_lock_force(np);
+               np->n_lastread = (uio_offset(uio) - 1) / biosize;
+               nfs_node_unlock(np);
+       } while (error == 0 && uio_resid(uio) > 0 && n > 0);
+       FSDBG_BOT(514, np, uio_offset(uio), uio_resid(uio), error);
+       return error;
 }
 
 /*
  * limit the number of outstanding async I/O writes
  */
-static int
+int
 nfs_async_write_start(struct nfsmount *nmp)
 {
-       int error = 0, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
-       struct timespec ts = {1, 0};
+       int error = 0, slpflag = NMFLAG(nmp, INTR) ? PCATCH : 0;
+       struct timespec ts = { .tv_sec = 1, .tv_nsec = 0 };
 
-       if (nfs_max_async_writes <= 0)
-               return (0);
+       if (nfs_max_async_writes <= 0) {
+               return 0;
+       }
        lck_mtx_lock(&nmp->nm_lock);
-       while (!error && (nfs_max_async_writes > 0) && (nmp->nm_asyncwrites >= nfs_max_async_writes)) {
-               if ((error = nfs_sigintr(nmp, NULL, current_thread(), 1)))
+       while ((nfs_max_async_writes > 0) && (nmp->nm_asyncwrites >= nfs_max_async_writes)) {
+               if ((error = nfs_sigintr(nmp, NULL, current_thread(), 1))) {
                        break;
-               error = msleep(&nmp->nm_asyncwrites, &nmp->nm_lock, slpflag|(PZERO-1), "nfsasyncwrites", &ts);
-               if (error == EWOULDBLOCK)
-                       error = 0;
+               }
+               msleep(&nmp->nm_asyncwrites, &nmp->nm_lock, slpflag | (PZERO - 1), "nfsasyncwrites", &ts);
+               slpflag = 0;
        }
-       if (!error)
+       if (!error) {
                nmp->nm_asyncwrites++;
+       }
        lck_mtx_unlock(&nmp->nm_lock);
-       return (error);
+       return error;
 }
-static void
+void
 nfs_async_write_done(struct nfsmount *nmp)
 {
-       if (nmp->nm_asyncwrites <= 0)
+       if (nmp->nm_asyncwrites <= 0) {
                return;
+       }
        lck_mtx_lock(&nmp->nm_lock);
-       if (nmp->nm_asyncwrites-- >= nfs_max_async_writes)
+       if (nmp->nm_asyncwrites-- >= nfs_max_async_writes) {
                wakeup(&nmp->nm_asyncwrites);
+       }
        lck_mtx_unlock(&nmp->nm_lock);
 }
 
@@ -2376,19 +2498,20 @@ nfs_buf_write(struct nfsbuf *bp)
        thread_t thd;
        kauth_cred_t cred;
        proc_t p = current_proc();
-       int iomode, doff, dend, firstpg, lastpg;
-       uint32_t pagemask;
+       int iomode;
+       off_t doff, dend, firstpg, lastpg;
 
        FSDBG_TOP(553, bp, NBOFF(bp), bp->nb_flags, 0);
 
-       if (!ISSET(bp->nb_lflags, NBL_BUSY))
+       if (!ISSET(bp->nb_lflags, NBL_BUSY)) {
                panic("nfs_buf_write: buffer is not busy???");
+       }
 
        np = bp->nb_np;
        async = ISSET(bp->nb_flags, NB_ASYNC);
        oldflags = bp->nb_flags;
 
-       CLR(bp->nb_flags, (NB_READ|NB_DONE|NB_ERROR|NB_DELWRI));
+       CLR(bp->nb_flags, (NB_READ | NB_DONE | NB_ERROR | NB_DELWRI));
        if (ISSET(oldflags, NB_DELWRI)) {
                lck_mtx_lock(nfs_buf_mutex);
                nfs_nbdwrite--;
@@ -2398,46 +2521,63 @@ nfs_buf_write(struct nfsbuf *bp)
        }
 
        /* move to clean list */
-       if (ISSET(oldflags, (NB_ASYNC|NB_DELWRI))) {
+       if (ISSET(oldflags, (NB_ASYNC | NB_DELWRI))) {
                lck_mtx_lock(nfs_buf_mutex);
-               if (bp->nb_vnbufs.le_next != NFSNOLIST)
+               if (bp->nb_vnbufs.le_next != NFSNOLIST) {
                        LIST_REMOVE(bp, nb_vnbufs);
+               }
                LIST_INSERT_HEAD(&np->n_cleanblkhd, bp, nb_vnbufs);
                lck_mtx_unlock(nfs_buf_mutex);
        }
+       nfs_node_lock_force(np);
+       np->n_numoutput++;
+       nfs_node_unlock(np);
        vnode_startwrite(NFSTOV(np));
 
-       if (p && p->p_stats)
-               OSIncrementAtomic(&p->p_stats->p_ru.ru_oublock);
+       if (p && p->p_stats) {
+               OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock);
+       }
 
        cred = bp->nb_wcred;
-       if (!IS_VALID_CRED(cred) && ISSET(bp->nb_flags, NB_READ))
+       if (!IS_VALID_CRED(cred) && ISSET(bp->nb_flags, NB_READ)) {
                cred = bp->nb_rcred;  /* shouldn't really happen, but... */
-       if (IS_VALID_CRED(cred))
+       }
+       if (IS_VALID_CRED(cred)) {
                kauth_cred_ref(cred);
+       }
        thd = async ? NULL : current_thread();
 
        /* We need to make sure the pages are locked before doing I/O.  */
-       if (!ISSET(bp->nb_flags, NB_META) && UBCINFOEXISTS(NFSTOV(np))) {
-               if (!ISSET(bp->nb_flags, NB_PAGELIST)) {
-                       error = nfs_buf_upl_setup(bp);
-                       if (error) {
-                               printf("nfs_buf_write: upl create failed %d\n", error);
-                               SET(bp->nb_flags, NB_ERROR);
-                               bp->nb_error = error = EIO;
-                               nfs_buf_iodone(bp);
-                               goto out;
+       if (!ISSET(bp->nb_flags, NB_META)) {
+               if (UBCINFOEXISTS(NFSTOV(np))) {
+                       if (!ISSET(bp->nb_flags, NB_PAGELIST)) {
+                               error = nfs_buf_upl_setup(bp);
+                               if (error) {
+                                       printf("nfs_buf_write: upl create failed %d\n", error);
+                                       SET(bp->nb_flags, NB_ERROR);
+                                       bp->nb_error = error = EIO;
+                                       nfs_buf_iodone(bp);
+                                       goto out;
+                               }
+                               nfs_buf_upl_check(bp);
                        }
-                       nfs_buf_upl_check(bp);
+               } else {
+                       /* We should never be in nfs_buf_write() with no UBCINFO. */
+                       printf("nfs_buf_write: ubcinfo already gone\n");
+                       SET(bp->nb_flags, NB_ERROR);
+                       bp->nb_error = error = EIO;
+                       nfs_buf_iodone(bp);
+                       goto out;
                }
        }
 
        /* If NB_NEEDCOMMIT is set, a commit RPC may do the trick. */
-       if (ISSET(bp->nb_flags, NB_NEEDCOMMIT))
+       if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
                nfs_buf_check_write_verifier(np, bp);
+       }
        if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
                struct nfsmount *nmp = NFSTONMP(np);
-               if (!nmp) {
+               if (nfs_mount_gone(nmp)) {
                        SET(bp->nb_flags, NB_ERROR);
                        bp->nb_error = error = EIO;
                        nfs_buf_iodone(bp);
@@ -2445,7 +2585,7 @@ nfs_buf_write(struct nfsbuf *bp)
                }
                SET(bp->nb_flags, NB_WRITEINPROG);
                error = nmp->nm_funcs->nf_commit_rpc(np, NBOFF(bp) + bp->nb_dirtyoff,
-                               bp->nb_dirtyend - bp->nb_dirtyoff, bp->nb_wcred);
+                   bp->nb_dirtyend - bp->nb_dirtyoff, bp->nb_wcred, bp->nb_verf);
                CLR(bp->nb_flags, NB_WRITEINPROG);
                if (error) {
                        if (error != NFSERR_STALEWRITEVERF) {
@@ -2457,66 +2597,82 @@ nfs_buf_write(struct nfsbuf *bp)
                }
                bp->nb_dirtyoff = bp->nb_dirtyend = 0;
                CLR(bp->nb_flags, NB_NEEDCOMMIT);
-               nfs_lock(np, NFS_NODE_LOCK_FORCE);
+               nfs_node_lock_force(np);
                np->n_needcommitcnt--;
                CHECK_NEEDCOMMITCNT(np);
-               nfs_unlock(np);
+               nfs_node_unlock(np);
        }
        if (!error && (bp->nb_dirtyend > 0)) {
                /* sanity check the dirty range */
                if (NBOFF(bp) + bp->nb_dirtyend > (off_t) np->n_size) {
                        bp->nb_dirtyend = np->n_size - NBOFF(bp);
-                       if (bp->nb_dirtyoff >= bp->nb_dirtyend)
+                       if (bp->nb_dirtyoff >= bp->nb_dirtyend) {
                                bp->nb_dirtyoff = bp->nb_dirtyend = 0;
+                       }
                }
        }
        if (!error && (bp->nb_dirtyend > 0)) {
                /* there's a dirty range that needs to be written out */
+               nfsbufpgs pagemask, pagemaskand;
                NFS_BUF_MAP(bp);
 
                doff = bp->nb_dirtyoff;
                dend = bp->nb_dirtyend;
 
                /* if doff page is dirty, move doff to start of page */
-               if (NBPGDIRTY(bp, doff / PAGE_SIZE))
+               if (NBPGDIRTY(bp, doff / PAGE_SIZE)) {
                        doff -= doff & PAGE_MASK;
+               }
                /* try to expand write range to include preceding dirty pages */
-               if (!(doff & PAGE_MASK))
-                       while ((doff > 0) && NBPGDIRTY(bp, (doff - 1) / PAGE_SIZE))
+               if (!(doff & PAGE_MASK)) {
+                       while ((doff > 0) && NBPGDIRTY(bp, (doff - 1) / PAGE_SIZE)) {
                                doff -= PAGE_SIZE;
+                       }
+               }
                /* if dend page is dirty, move dend to start of next page */
-               if ((dend & PAGE_MASK) && NBPGDIRTY(bp, dend / PAGE_SIZE))
-                       dend = round_page_32(dend);
+               if ((dend & PAGE_MASK) && NBPGDIRTY(bp, dend / PAGE_SIZE)) {
+                       dend = round_page_64(dend);
+               }
                /* try to expand write range to include trailing dirty pages */
-               if (!(dend & PAGE_MASK))
-                       while ((dend < bp->nb_bufsize) && NBPGDIRTY(bp, dend / PAGE_SIZE))
+               if (!(dend & PAGE_MASK)) {
+                       while ((dend < (int)bp->nb_bufsize) && NBPGDIRTY(bp, dend / PAGE_SIZE)) {
                                dend += PAGE_SIZE;
+                       }
+               }
                /* make sure to keep dend clipped to EOF */
-               if ((NBOFF(bp) + dend) > (off_t) np->n_size)
+               if ((NBOFF(bp) + dend) > (off_t) np->n_size) {
                        dend = np->n_size - NBOFF(bp);
+               }
                /* calculate range of complete pages being written */
-               firstpg = round_page_32(doff) / PAGE_SIZE;
-               lastpg = (trunc_page_32(dend) - 1) / PAGE_SIZE;
-               /* calculate mask for that page range */
-               pagemask = ((1 << (lastpg + 1)) - 1) & ~((1 << firstpg) - 1);
+               if (dend > doff) {
+                       firstpg = doff / PAGE_SIZE;
+                       lastpg = (dend - 1) / PAGE_SIZE;
+                       /* calculate mask for that page range */
+                       nfs_buf_pgs_set_pages_between(&pagemask, firstpg, lastpg + 1);
+               } else {
+                       NBPGS_ERASE(&pagemask);
+               }
 
                /*
                 * compare page mask to nb_dirty; if there are other dirty pages
                 * then write FILESYNC; otherwise, write UNSTABLE if async and
                 * not needcommit/stable; otherwise write FILESYNC
                 */
-               if (bp->nb_dirty & ~pagemask)
+               nfs_buf_pgs_bit_not(&pagemask);
+               nfs_buf_pgs_bit_and(&bp->nb_dirty, &pagemask, &pagemaskand);
+               if (nfs_buf_pgs_is_set(&pagemaskand)) {
                        iomode = NFS_WRITE_FILESYNC;
-               else if ((bp->nb_flags & (NB_ASYNC | NB_NEEDCOMMIT | NB_STABLE)) == NB_ASYNC)
+               } else if ((bp->nb_flags & (NB_ASYNC | NB_NEEDCOMMIT | NB_STABLE)) == NB_ASYNC) {
                        iomode = NFS_WRITE_UNSTABLE;
-               else
+               } else {
                        iomode = NFS_WRITE_FILESYNC;
+               }
 
                /* write the whole contiguous dirty range */
                bp->nb_offio = doff;
                bp->nb_endio = dend;
 
-               OSAddAtomic(1, (SInt32 *)&nfsstats.write_bios);
+               OSAddAtomic64(1, &nfsstats.write_bios);
 
                SET(bp->nb_flags, NB_WRITEINPROG);
                error = nfs_buf_write_rpc(bp, iomode, thd, cred);
@@ -2527,8 +2683,9 @@ nfs_buf_write(struct nfsbuf *bp)
                 * pages pushed out.
                 */
        } else {
-               if (!error && bp->nb_dirty) /* write out any dirty pages */
+               if (!error && nfs_buf_pgs_is_set(&bp->nb_dirty)) { /* write out any dirty pages */
                        error = nfs_buf_write_dirty_pages(bp, thd, cred);
+               }
                nfs_buf_iodone(bp);
        }
        /* note: bp is still valid only for !async case */
@@ -2538,8 +2695,9 @@ out:
                /* move to clean list */
                if (oldflags & NB_DELWRI) {
                        lck_mtx_lock(nfs_buf_mutex);
-                       if (bp->nb_vnbufs.le_next != NFSNOLIST)
+                       if (bp->nb_vnbufs.le_next != NFSNOLIST) {
                                LIST_REMOVE(bp, nb_vnbufs);
+                       }
                        LIST_INSERT_HEAD(&np->n_cleanblkhd, bp, nb_vnbufs);
                        lck_mtx_unlock(nfs_buf_mutex);
                }
@@ -2547,14 +2705,14 @@ out:
                nfs_buf_release(bp, 1);
                /* check if we need to invalidate (and we can) */
                if ((np->n_flag & NNEEDINVALIDATE) &&
-                   !(np->n_bflag & (NBINVALINPROG|NBFLUSHINPROG))) {
+                   !(np->n_bflag & (NBINVALINPROG | NBFLUSHINPROG))) {
                        int invalidate = 0;
-                       nfs_lock(np, NFS_NODE_LOCK_FORCE);
+                       nfs_node_lock_force(np);
                        if (np->n_flag & NNEEDINVALIDATE) {
                                invalidate = 1;
                                np->n_flag &= ~NNEEDINVALIDATE;
                        }
-                       nfs_unlock(np);
+                       nfs_node_unlock(np);
                        if (invalidate) {
                                /*
                                 * There was a write error and we need to
@@ -2567,14 +2725,15 @@ out:
                                 * the buffer busy.  So we call vinvalbuf() after
                                 * releasing the buffer.
                                 */
-                               nfs_vinvalbuf2(NFSTOV(np), V_SAVE|V_IGNORE_WRITEERR, thd, cred, 1);
+                               nfs_vinvalbuf2(NFSTOV(np), V_SAVE | V_IGNORE_WRITEERR, thd, cred, 1);
                        }
                }
        }
 
-       if (IS_VALID_CRED(cred))
+       if (IS_VALID_CRED(cred)) {
                kauth_cred_unref(&cred);
-       return (error);
+       }
+       return error;
 }
 
 /*
@@ -2585,8 +2744,7 @@ nfs_buf_write_finish(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred)
 {
        nfsnode_t np = bp->nb_np;
        int error = (bp->nb_flags & NB_ERROR) ? bp->nb_error : 0;
-       int firstpg, lastpg;
-       uint32_t pagemask;
+       off_t firstpg, lastpg;
 
        if ((error == EINTR) || (error == ERESTART)) {
                CLR(bp->nb_flags, NB_ERROR);
@@ -2594,31 +2752,37 @@ nfs_buf_write_finish(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred)
        }
 
        if (!error) {
+               nfsbufpgs pagemask;
                /* calculate range of complete pages being written */
-               firstpg = round_page_32(bp->nb_offio) / PAGE_SIZE;
-               lastpg = (trunc_page_32(bp->nb_endio) - 1) / PAGE_SIZE;
-               /* calculate mask for that page range written */
-               pagemask = ((1 << (lastpg + 1)) - 1) & ~((1 << firstpg) - 1);
+               if (bp->nb_endio > bp->nb_offio) {
+                       firstpg = bp->nb_offio / PAGE_SIZE;
+                       lastpg = (bp->nb_endio - 1) / PAGE_SIZE;
+                       /* calculate mask for that page range written */
+                       nfs_buf_pgs_set_pages_between(&pagemask, firstpg, lastpg + 1);
+               } else {
+                       NBPGS_ERASE(&pagemask);
+               }
                /* clear dirty bits for pages we've written */
-               bp->nb_dirty &= ~pagemask;
+               nfs_buf_pgs_bit_not(&pagemask);
+               nfs_buf_pgs_bit_and(&bp->nb_dirty, &pagemask, &bp->nb_dirty);
        }
 
        /* manage needcommit state */
        if (!error && (bp->nb_commitlevel == NFS_WRITE_UNSTABLE)) {
                if (!ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
-                       nfs_lock(np, NFS_NODE_LOCK_FORCE);
+                       nfs_node_lock_force(np);
                        np->n_needcommitcnt++;
-                       nfs_unlock(np);
+                       nfs_node_unlock(np);
                        SET(bp->nb_flags, NB_NEEDCOMMIT);
                }
                /* make sure nb_dirtyoff/nb_dirtyend reflect actual range written */
                bp->nb_dirtyoff = bp->nb_offio;
                bp->nb_dirtyend = bp->nb_endio;
        } else if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
-               nfs_lock(np, NFS_NODE_LOCK_FORCE);
+               nfs_node_lock_force(np);
                np->n_needcommitcnt--;
                CHECK_NEEDCOMMITCNT(np);
-               nfs_unlock(np);
+               nfs_node_unlock(np);
                CLR(bp->nb_flags, NB_NEEDCOMMIT);
        }
 
@@ -2649,8 +2813,9 @@ nfs_buf_write_finish(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred)
                if (ISSET(bp->nb_flags, NB_ASYNC)) {
                        /* move to dirty list */
                        lck_mtx_lock(nfs_buf_mutex);
-                       if (bp->nb_vnbufs.le_next != NFSNOLIST)
+                       if (bp->nb_vnbufs.le_next != NFSNOLIST) {
                                LIST_REMOVE(bp, nb_vnbufs);
+                       }
                        LIST_INSERT_HEAD(&np->n_dirtyblkhd, bp, nb_vnbufs);
                        lck_mtx_unlock(nfs_buf_mutex);
                }
@@ -2667,18 +2832,19 @@ nfs_buf_write_finish(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred)
                         * buffer busy.  Set a flag to do it after releasing
                         * the buffer.
                         */
-                       nfs_lock(np, NFS_NODE_LOCK_FORCE);
+                       nfs_node_lock_force(np);
                        np->n_error = error;
                        np->n_flag |= (NWRITEERR | NNEEDINVALIDATE);
                        NATTRINVALIDATE(np);
-                       nfs_unlock(np);
+                       nfs_node_unlock(np);
                }
                /* clear the dirty range */
                bp->nb_dirtyoff = bp->nb_dirtyend = 0;
        }
 
-       if (!error && bp->nb_dirty)
+       if (!error && nfs_buf_pgs_is_set(&bp->nb_dirty)) {
                nfs_buf_write_dirty_pages(bp, thd, cred);
+       }
        nfs_buf_iodone(bp);
 }
 
@@ -2695,57 +2861,55 @@ nfs_buf_write_dirty_pages(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred)
        nfsnode_t np = bp->nb_np;
        struct nfsmount *nmp = NFSTONMP(np);
        int error = 0, commit, iomode, iomode2, len, pg, count, npages, off;
-       uint32_t dirty = bp->nb_dirty;
+       nfsbufpgs dirty;
        uint64_t wverf;
-       struct uio uio;
-       struct iovec_32 io;
+       uio_t auio;
+       char uio_buf[UIO_SIZEOF(1)];
 
-       if (!bp->nb_dirty)
-               return (0);
+       if (!nfs_buf_pgs_is_set(&bp->nb_dirty)) {
+               return 0;
+       }
 
        /* there are pages marked dirty that need to be written out */
-       OSAddAtomic(1, (SInt32 *)&nfsstats.write_bios);
+       OSAddAtomic64(1, &nfsstats.write_bios);
        NFS_BUF_MAP(bp);
        SET(bp->nb_flags, NB_WRITEINPROG);
        npages = bp->nb_bufsize / PAGE_SIZE;
        iomode = NFS_WRITE_UNSTABLE;
 
-       uio.uio_iovs.iov32p = &io;
-       uio.uio_iovcnt = 1;
-       uio.uio_rw = UIO_WRITE;
-#if 1   /* LP64todo - can't use new segment flags until the drivers are ready */
-       uio.uio_segflg = UIO_SYSSPACE;
-#else
-       uio.uio_segflg = UIO_SYSSPACE32;
-#endif
+       auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_WRITE,
+           &uio_buf, sizeof(uio_buf));
 
 again:
-       dirty = bp->nb_dirty;
+       NBPGS_COPY(&dirty, &bp->nb_dirty);
        wverf = bp->nb_verf;
        commit = NFS_WRITE_FILESYNC;
        for (pg = 0; pg < npages; pg++) {
-               if (!NBPGDIRTY(bp, pg))
+               if (!NBPGDIRTY(bp, pg)) {
                        continue;
+               }
                count = 1;
-               while (((pg + count) < npages) && NBPGDIRTY(bp, pg + count))
+               while (((pg + count) < npages) && NBPGDIRTY(bp, pg + count)) {
                        count++;
+               }
                /* write count pages starting with page pg */
                off = pg * PAGE_SIZE;
                len = count * PAGE_SIZE;
                /* clip writes to EOF */
-               if (NBOFF(bp) + off + len > (off_t) np->n_size)
+               if (NBOFF(bp) + off + len > (off_t) np->n_size) {
                        len -= (NBOFF(bp) + off + len) - np->n_size;
+               }
                if (len > 0) {
                        iomode2 = iomode;
-                       io.iov_len = len;
-                       uio_uio_resid_set(&uio, io.iov_len);
-                       uio.uio_offset = NBOFF(bp) + off;
-                       io.iov_base = (uintptr_t) bp->nb_data + off;
-                       error = nfs_write_rpc2(np, &uio, thd, cred, &iomode2, &bp->nb_verf);
-                       if (error)
+                       uio_reset(auio, NBOFF(bp) + off, UIO_SYSSPACE, UIO_WRITE);
+                       uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + off), len);
+                       error = nfs_write_rpc2(np, auio, thd, cred, &iomode2, &bp->nb_verf);
+                       if (error) {
                                break;
-                       if (iomode2 < commit) /* Retain the lowest commitment level returned. */
+                       }
+                       if (iomode2 < commit) { /* Retain the lowest commitment level returned. */
                                commit = iomode2;
+                       }
                        if ((commit != NFS_WRITE_FILESYNC) && (wverf != bp->nb_verf)) {
                                /* verifier changed, redo all the writes filesync */
                                iomode = NFS_WRITE_FILESYNC;
@@ -2754,15 +2918,16 @@ again:
                }
                /* clear dirty bits */
                while (count--) {
-                       dirty &= ~(1 << pg);
-                       if (count) /* leave pg on last page */
+                       NBPGS_UNSET(&dirty, pg);
+                       if (count) /* leave pg on last page */
                                pg++;
+                       }
                }
        }
        CLR(bp->nb_flags, NB_WRITEINPROG);
 
        if (!error && (commit != NFS_WRITE_FILESYNC)) {
-               error = nmp->nm_funcs->nf_commit_rpc(np, NBOFF(bp), bp->nb_bufsize, cred);
+               error = nmp->nm_funcs->nf_commit_rpc(np, NBOFF(bp), bp->nb_bufsize, cred, wverf);
                if (error == NFSERR_STALEWRITEVERF) {
                        /* verifier changed, so we need to restart all the writes */
                        iomode = NFS_WRITE_FILESYNC;
@@ -2770,12 +2935,12 @@ again:
                }
        }
        if (!error) {
-               bp->nb_dirty = dirty;
+               NBPGS_COPY(&bp->nb_dirty, &dirty);
        } else {
                SET(bp->nb_flags, NB_ERROR);
                bp->nb_error = error;
        }
-       return (error);
+       return error;
 }
 
 /*
@@ -2787,18 +2952,21 @@ nfs_buf_write_rpc(struct nfsbuf *bp, int iomode, thread_t thd, kauth_cred_t cred
        struct nfsmount *nmp;
        nfsnode_t np = bp->nb_np;
        int error = 0, nfsvers, async;
-       int offset, length, nmwsize, nrpcs, len;
+       int64_t nrpcs;
+       size_t len;
+       uint32_t nmwsize;
        struct nfsreq *req;
        struct nfsreq_cbinfo cb;
-       struct uio uio;
-       struct iovec_32 io;
+       uio_t auio;
+       char uio_buf[UIO_SIZEOF(1)];
+       off_t offset, length;
 
        nmp = NFSTONMP(np);
-       if (!nmp) {
+       if (nfs_mount_gone(nmp)) {
                bp->nb_error = error = ENXIO;
                SET(bp->nb_flags, NB_ERROR);
                nfs_buf_iodone(bp);
-               return (error);
+               return error;
        }
        nfsvers = nmp->nm_vers;
        nmwsize = nmp->nm_wsize;
@@ -2816,21 +2984,23 @@ nfs_buf_write_rpc(struct nfsbuf *bp, int iomode, thread_t thd, kauth_cred_t cred
                bp->nb_error = error = EFBIG;
                SET(bp->nb_flags, NB_ERROR);
                nfs_buf_iodone(bp);
-               return (error);
+               return error;
        }
 
-       uio.uio_iovs.iov32p = &io;
-       uio.uio_iovcnt = 1;
-       uio.uio_rw = UIO_WRITE;
-#if 1   /* LP64todo - can't use new segment flags until the drivers are ready */
-       uio.uio_segflg = UIO_SYSSPACE;
+       if (length == 0) {
+               /* We should never get here  */
+#if DEVELOPMENT
+               printf("nfs_buf_write_rpc: Got request with zero length. np %p, bp %p, offset %lld\n", np, bp, offset);
 #else
-       uio.uio_segflg = UIO_SYSSPACE32;
-#endif
-       io.iov_len = length;
-       uio_uio_resid_set(&uio, io.iov_len);
-       uio.uio_offset = NBOFF(bp) + offset;
-       io.iov_base = (uintptr_t) bp->nb_data + offset;
+               printf("nfs_buf_write_rpc: Got request with zero length.\n");
+#endif /* DEVELOPMENT */
+               nfs_buf_iodone(bp);
+               return 0;
+       }
+
+       auio = uio_createwithbuffer(1, NBOFF(bp) + offset, UIO_SYSSPACE,
+           UIO_WRITE, &uio_buf, sizeof(uio_buf));
+       NFS_UIO_ADDIOV(auio, CAST_USER_ADDR_T(bp->nb_data + offset), length);
 
        bp->nb_rpcs = nrpcs = (length + nmwsize - 1) / nmwsize;
        if (async && (nrpcs > 1)) {
@@ -2844,23 +3014,31 @@ nfs_buf_write_rpc(struct nfsbuf *bp, int iomode, thread_t thd, kauth_cred_t cred
                        error = bp->nb_error;
                        break;
                }
-               len = (length > nmwsize) ? nmwsize : length;
-               cb.rcb_args[0] = offset;
-               cb.rcb_args[1] = len;
-               if (async && ((error = nfs_async_write_start(nmp))))
+               len = (length > nmwsize) ? nmwsize : (uint32_t)length;
+               cb.rcb_args.offset = offset;
+               cb.rcb_args.length = len;
+#if CONFIG_NFS4
+               if (nmp->nm_vers >= NFS_VER4) {
+                       cb.rcb_args.stategenid = nmp->nm_stategenid;
+               }
+#endif
+               if (async && ((error = nfs_async_write_start(nmp)))) {
                        break;
+               }
                req = NULL;
-               error = nmp->nm_funcs->nf_write_rpc_async(np, &uio, len, thd, cred,
-                               iomode, &cb, &req);
+               error = nmp->nm_funcs->nf_write_rpc_async(np, auio, len, thd, cred,
+                   iomode, &cb, &req);
                if (error) {
-                       if (async)
+                       if (async) {
                                nfs_async_write_done(nmp);
+                       }
                        break;
                }
                offset += len;
                length -= len;
-               if (async)
+               if (async) {
                        continue;
+               }
                nfs_buf_write_rpc_finish(req);
        }
 
@@ -2881,17 +3059,22 @@ nfs_buf_write_rpc(struct nfsbuf *bp, int iomode, thread_t thd, kauth_cred_t cred
                                nfs_buf_write_finish(bp, thd, cred);
                        } else {
                                /* wait for the last RPC to mark it done */
-                               while (bp->nb_rpcs > 0)
+                               while (bp->nb_rpcs > 0) {
                                        msleep(&bp->nb_rpcs, nfs_buf_mutex, 0,
-                                               "nfs_buf_write_rpc_cancel", NULL);
+                                           "nfs_buf_write_rpc_cancel", NULL);
+                               }
                                lck_mtx_unlock(nfs_buf_mutex);
                        }
                } else {
                        nfs_buf_write_finish(bp, thd, cred);
                }
+               /* It may have just been an interrupt... that's OK */
+               if (!ISSET(bp->nb_flags, NB_ERROR)) {
+                       error = 0;
+               }
        }
 
-       return (error);
+       return error;
 }
 
 /*
@@ -2900,10 +3083,11 @@ nfs_buf_write_rpc(struct nfsbuf *bp, int iomode, thread_t thd, kauth_cred_t cred
 void
 nfs_buf_write_rpc_finish(struct nfsreq *req)
 {
-       int error = 0, nfsvers, offset, length, multasyncrpc, finished;
+       int error = 0, nfsvers, multasyncrpc, finished;
        int committed = NFS_WRITE_FILESYNC;
        uint64_t wverf = 0;
-       size_t rlen;
+       off_t offset;
+       size_t rlen, length;
        void *wakeme = NULL;
        struct nfsreq_cbinfo cb;
        struct nfsreq *wreq = NULL;
@@ -2912,20 +3096,24 @@ nfs_buf_write_rpc_finish(struct nfsreq *req)
        nfsnode_t np;
        thread_t thd;
        kauth_cred_t cred;
-       struct uio uio;
-       struct iovec_32 io;
+       uio_t auio;
+       char uio_buf[UIO_SIZEOF(1)];
 
 finish:
        np = req->r_np;
        thd = req->r_thread;
        cred = req->r_cred;
-       if (IS_VALID_CRED(cred))
+       if (IS_VALID_CRED(cred)) {
                kauth_cred_ref(cred);
+       }
        cb = req->r_callback;
        bp = cb.rcb_bp;
+       if (cb.rcb_func) { /* take an extra reference on the nfsreq in case we want to resend it later due to grace error */
+               nfs_request_ref(req, 0);
+       }
 
        nmp = NFSTONMP(np);
-       if (!nmp) {
+       if (nfs_mount_gone(nmp)) {
                SET(bp->nb_flags, NB_ERROR);
                bp->nb_error = error = ENXIO;
        }
@@ -2936,24 +3124,76 @@ finish:
        }
        nfsvers = nmp->nm_vers;
 
-       offset = cb.rcb_args[0];
-       rlen = length = cb.rcb_args[1];
+       offset = cb.rcb_args.offset;
+       rlen = length = cb.rcb_args.length;
 
        /* finish the RPC */
        error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req, &committed, &rlen, &wverf);
        if ((error == EINPROGRESS) && cb.rcb_func) {
                /* async request restarted */
-               if (IS_VALID_CRED(cred))
+               if (cb.rcb_func) {
+                       nfs_request_rele(req);
+               }
+               if (IS_VALID_CRED(cred)) {
                        kauth_cred_unref(&cred);
+               }
                return;
        }
-
+#if CONFIG_NFS4
+       if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) && !ISSET(bp->nb_flags, NB_ERROR)) {
+               lck_mtx_lock(&nmp->nm_lock);
+               if ((error != NFSERR_OLD_STATEID) && (error != NFSERR_GRACE) && (cb.rcb_args.stategenid == nmp->nm_stategenid)) {
+                       NP(np, "nfs_buf_write_rpc_finish: error %d @ 0x%llx, 0x%x 0x%x, initiating recovery",
+                           error, NBOFF(bp) + offset, cb.rcb_args.stategenid, nmp->nm_stategenid);
+                       nfs_need_recover(nmp, error);
+               }
+               lck_mtx_unlock(&nmp->nm_lock);
+               if (np->n_flag & NREVOKE) {
+                       error = EIO;
+               } else {
+                       if (error == NFSERR_GRACE) {
+                               if (cb.rcb_func) {
+                                       /*
+                                        * For an async I/O request, handle a grace delay just like
+                                        * jukebox errors.  Set the resend time and queue it up.
+                                        */
+                                       struct timeval now;
+                                       if (req->r_nmrep.nmc_mhead) {
+                                               mbuf_freem(req->r_nmrep.nmc_mhead);
+                                               req->r_nmrep.nmc_mhead = NULL;
+                                       }
+                                       req->r_error = 0;
+                                       microuptime(&now);
+                                       lck_mtx_lock(&req->r_mtx);
+                                       req->r_resendtime = now.tv_sec + 2;
+                                       req->r_xid = 0;                 // get a new XID
+                                       req->r_flags |= R_RESTART;
+                                       req->r_start = 0;
+                                       nfs_asyncio_resend(req);
+                                       lck_mtx_unlock(&req->r_mtx);
+                                       if (IS_VALID_CRED(cred)) {
+                                               kauth_cred_unref(&cred);
+                                       }
+                                       /* Note: nfsreq reference taken will be dropped later when finished */
+                                       return;
+                               }
+                               /* otherwise, just pause a couple seconds and retry */
+                               tsleep(&nmp->nm_state, (PZERO - 1), "nfsgrace", 2 * hz);
+                       }
+                       if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
+                               rlen = 0;
+                               goto writeagain;
+                       }
+               }
+       }
+#endif
        if (error) {
                SET(bp->nb_flags, NB_ERROR);
                bp->nb_error = error;
        }
-       if (error || (nfsvers == NFS_VER2))
+       if (error || (nfsvers == NFS_VER2)) {
                goto out;
+       }
        if (rlen <= 0) {
                SET(bp->nb_flags, NB_ERROR);
                bp->nb_error = error = EIO;
@@ -2961,8 +3201,9 @@ finish:
        }
 
        /* save lowest commit level returned */
-       if (committed < bp->nb_commitlevel)
+       if (committed < bp->nb_commitlevel) {
                bp->nb_commitlevel = committed;
+       }
 
        /* check the write verifier */
        if (!bp->nb_verf) {
@@ -2974,6 +3215,10 @@ finish:
                bp->nb_verf = wverf;
        }
 
+       if (!ISSET(bp->nb_flags, NB_STALEWVERF) && rlen > 0 && (bp->nb_offio < (offset + (int)rlen))) {
+               bp->nb_offio = offset + rlen;
+       }
+
        /*
         * check for a short write
         *
@@ -2981,36 +3226,38 @@ finish:
         * need to issue another write for the rest of it.
         * (Don't bother if the buffer hit an error or stale wverf.)
         */
-       if (((int)rlen < length) && !(bp->nb_flags & (NB_STALEWVERF|NB_ERROR))) {
+       if ((rlen < length) && !(bp->nb_flags & (NB_STALEWVERF | NB_ERROR))) {
+#if CONFIG_NFS4
+writeagain:
+#endif
                offset += rlen;
                length -= rlen;
 
-               uio.uio_iovs.iov32p = &io;
-               uio.uio_iovcnt = 1;
-               uio.uio_rw = UIO_WRITE;
-#if 1   /* LP64todo - can't use new segment flags until the drivers are ready */
-               uio.uio_segflg = UIO_SYSSPACE;
-#else
-               uio.uio_segflg = UIO_SYSSPACE32;
-#endif
-               io.iov_len = length;
-               uio_uio_resid_set(&uio, io.iov_len);
-               uio.uio_offset = NBOFF(bp) + offset;
-               io.iov_base = (uintptr_t) bp->nb_data + offset;
-
-               cb.rcb_args[0] = offset;
-               cb.rcb_args[1] = length;
+               auio = uio_createwithbuffer(1, NBOFF(bp) + offset, UIO_SYSSPACE,
+                   UIO_WRITE, &uio_buf, sizeof(uio_buf));
+               uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + offset), length);
 
-               error = nmp->nm_funcs->nf_write_rpc_async(np, &uio, length, thd, cred,
-                               NFS_WRITE_FILESYNC, &cb, &wreq);
+               cb.rcb_args.offset = offset;
+               cb.rcb_args.length = length;
+#if CONFIG_NFS4
+               if (nmp->nm_vers >= NFS_VER4) {
+                       cb.rcb_args.stategenid = nmp->nm_stategenid;
+               }
+#endif
+               // XXX iomode should really match the original request
+               error = nmp->nm_funcs->nf_write_rpc_async(np, auio, length, thd, cred,
+                   NFS_WRITE_FILESYNC, &cb, &wreq);
                if (!error) {
-                       if (IS_VALID_CRED(cred))
+                       if (IS_VALID_CRED(cred)) {
                                kauth_cred_unref(&cred);
+                       }
                        if (!cb.rcb_func) {
                                /* if !async we'll need to wait for this RPC to finish */
                                req = wreq;
+                               wreq = NULL;
                                goto finish;
                        }
+                       nfs_request_rele(req);
                        /*
                         * We're done here.
                         * Outstanding RPC count is unchanged.
@@ -3023,8 +3270,10 @@ finish:
        }
 
 out:
-       if (cb.rcb_func)
+       if (cb.rcb_func) {
                nfs_async_write_done(nmp);
+               nfs_request_rele(req);
+       }
        /*
         * Decrement outstanding RPC count on buffer
         * and call nfs_buf_write_finish on last RPC.
@@ -3034,40 +3283,50 @@ out:
         * aborting a partially-initiated set of RPCs)
         */
        multasyncrpc = ISSET(bp->nb_flags, NB_MULTASYNCRPC);
-       if (multasyncrpc)
+       if (multasyncrpc) {
                lck_mtx_lock(nfs_buf_mutex);
+       }
 
        bp->nb_rpcs--;
        finished = (bp->nb_rpcs == 0);
 
-       if (multasyncrpc)
+       if (multasyncrpc) {
                lck_mtx_unlock(nfs_buf_mutex);
+       }
 
        if (finished) {
-               if (multasyncrpc)
+               if (multasyncrpc) {
                        wakeme = &bp->nb_rpcs;
+               }
                nfs_buf_write_finish(bp, thd, cred);
-               if (wakeme)
+               if (wakeme) {
                        wakeup(wakeme);
+               }
        }
 
-       if (IS_VALID_CRED(cred))
+       if (IS_VALID_CRED(cred)) {
                kauth_cred_unref(&cred);
+       }
+
+       if (cb.rcb_func && np->n_needcommitcnt >= NFS_A_LOT_OF_NEEDCOMMITS) {
+               nfs_flushcommits(np, 1);
+       }
 }
 
 /*
- * Send commit(s) for the given node's "needcommit" buffers 
+ * Send commit(s) for the given node's "needcommit" buffers
  */
 int
 nfs_flushcommits(nfsnode_t np, int nowait)
 {
        struct nfsmount *nmp;
-       struct nfsbuf *bp;
+       struct nfsbuf *bp, *prevlbp, *lbp;
        struct nfsbuflists blist, commitlist;
-       int error = 0, retv, wcred_set, flags, dirty;
+       int error = 0, retv, wcred_set, flags;
        u_quad_t off, endoff, toff;
-       u_int32_t count;
+       uint64_t wverf, count;
        kauth_cred_t wcred = NULL;
+       nfsbufpgs dirty;
 
        FSDBG_TOP(557, np, 0, 0, 0);
 
@@ -3078,11 +3337,12 @@ nfs_flushcommits(nfsnode_t np, int nowait)
         * and the commit rpc is done.
         */
        if (!LIST_EMPTY(&np->n_dirtyblkhd)) {
-               error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE);
-               if (error)
+               error = nfs_node_lock(np);
+               if (error) {
                        goto done;
+               }
                np->n_flag |= NMODIFIED;
-               nfs_unlock(np);
+               nfs_node_unlock(np);
        }
 
        off = (u_quad_t)-1;
@@ -3091,7 +3351,7 @@ nfs_flushcommits(nfsnode_t np, int nowait)
        LIST_INIT(&commitlist);
 
        nmp = NFSTONMP(np);
-       if (!nmp) {
+       if (nfs_mount_gone(nmp)) {
                error = ENXIO;
                goto done;
        }
@@ -3101,57 +3361,34 @@ nfs_flushcommits(nfsnode_t np, int nowait)
        }
 
        flags = NBI_DIRTY;
-       if (nowait)
+       if (nowait) {
                flags |= NBI_NOWAIT;
+       }
        lck_mtx_lock(nfs_buf_mutex);
+       wverf = nmp->nm_verf;
        if (!nfs_buf_iterprepare(np, &blist, flags)) {
                while ((bp = LIST_FIRST(&blist))) {
                        LIST_REMOVE(bp, nb_vnbufs);
                        LIST_INSERT_HEAD(&np->n_dirtyblkhd, bp, nb_vnbufs);
                        error = nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0);
-                       if (error)
+                       if (error) {
                                continue;
-                       if (ISSET(bp->nb_flags, NB_NEEDCOMMIT))
+                       }
+                       if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
                                nfs_buf_check_write_verifier(np, bp);
-                       if (((bp->nb_flags & (NB_DELWRI | NB_NEEDCOMMIT))
-                               != (NB_DELWRI | NB_NEEDCOMMIT))) {
+                       }
+                       if (((bp->nb_flags & (NB_DELWRI | NB_NEEDCOMMIT)) != (NB_DELWRI | NB_NEEDCOMMIT)) ||
+                           (bp->nb_verf != wverf)) {
                                nfs_buf_drop(bp);
                                continue;
                        }
                        nfs_buf_remfree(bp);
-                       lck_mtx_unlock(nfs_buf_mutex);
-                       /*
-                        * we need a upl to see if the page has been
-                        * dirtied (think mmap) since the unstable write, and
-                        * also to prevent vm from paging it during our commit rpc
-                        */
-                       if (!ISSET(bp->nb_flags, NB_PAGELIST)) {
-                               retv = nfs_buf_upl_setup(bp);
-                               if (retv) {
-                                       /* unable to create upl */
-                                       /* vm object must no longer exist */
-                                       /* this could be fatal if we need */
-                                       /* to write the data again, we'll see...  */
-                                       printf("nfs_flushcommits: upl create failed %d\n", retv);
-                                       bp->nb_valid = bp->nb_dirty = 0;
-                               }
-                       }
-                       nfs_buf_upl_check(bp);
-                       lck_mtx_lock(nfs_buf_mutex);
+
+                       /* buffer UPLs will be grabbed *in order* below */
 
                        FSDBG(557, bp, bp->nb_flags, bp->nb_valid, bp->nb_dirty);
                        FSDBG(557, bp->nb_validoff, bp->nb_validend,
-                             bp->nb_dirtyoff, bp->nb_dirtyend);
-
-                       /*
-                        * We used to check for dirty pages here; if there were any
-                        * we'd abort the commit and force the entire buffer to be
-                        * written again.
-                        *
-                        * Instead of doing that, we now go ahead and commit the dirty
-                        * range, and then leave the buffer around with dirty pages
-                        * that will be written out later.
-                        */
+                           bp->nb_dirtyoff, bp->nb_dirtyend);
 
                        /*
                         * Work out if all buffers are using the same cred
@@ -3162,8 +3399,9 @@ nfs_flushcommits(nfsnode_t np, int nowait)
                         */
                        if (wcred_set == 0) {
                                wcred = bp->nb_wcred;
-                               if (!IS_VALID_CRED(wcred))
+                               if (!IS_VALID_CRED(wcred)) {
                                        panic("nfs: needcommit w/out wcred");
+                               }
                                wcred_set = 1;
                        } else if ((wcred_set == 1) && wcred != bp->nb_wcred) {
                                wcred_set = -1;
@@ -3171,20 +3409,33 @@ nfs_flushcommits(nfsnode_t np, int nowait)
                        SET(bp->nb_flags, NB_WRITEINPROG);
 
                        /*
-                        * A list of these buffers is kept so that the
-                        * second loop knows which buffers have actually
-                        * been committed. This is necessary, since there
-                        * may be a race between the commit rpc and new
-                        * uncommitted writes on the file.
+                        * Add this buffer to the list of buffers we are committing.
+                        * Buffers are inserted into the list in ascending order so that
+                        * we can take the UPLs in order after the list is complete.
                         */
+                       prevlbp = NULL;
+                       LIST_FOREACH(lbp, &commitlist, nb_vnbufs) {
+                               if (bp->nb_lblkno < lbp->nb_lblkno) {
+                                       break;
+                               }
+                               prevlbp = lbp;
+                       }
                        LIST_REMOVE(bp, nb_vnbufs);
-                       LIST_INSERT_HEAD(&commitlist, bp, nb_vnbufs);
+                       if (prevlbp) {
+                               LIST_INSERT_AFTER(prevlbp, bp, nb_vnbufs);
+                       } else {
+                               LIST_INSERT_HEAD(&commitlist, bp, nb_vnbufs);
+                       }
+
+                       /* update commit range start, end */
                        toff = NBOFF(bp) + bp->nb_dirtyoff;
-                       if (toff < off)
+                       if (toff < off) {
                                off = toff;
+                       }
                        toff += (u_quad_t)(bp->nb_dirtyend - bp->nb_dirtyoff);
-                       if (toff > endoff)
+                       if (toff > endoff) {
                                endoff = toff;
+                       }
                }
                nfs_buf_itercomplete(np, &blist, NBI_DIRTY);
        }
@@ -3195,6 +3446,29 @@ nfs_flushcommits(nfsnode_t np, int nowait)
                goto done;
        }
 
+       /*
+        * We need a UPL to prevent others from accessing the buffers during
+        * our commit RPC(s).
+        *
+        * We used to also check for dirty pages here; if there were any we'd
+        * abort the commit and force the entire buffer to be written again.
+        * Instead of doing that, we just go ahead and commit the dirty range,
+        * and then leave the buffer around with dirty pages that will be
+        * written out later.
+        */
+       LIST_FOREACH(bp, &commitlist, nb_vnbufs) {
+               if (!ISSET(bp->nb_flags, NB_PAGELIST)) {
+                       retv = nfs_buf_upl_setup(bp);
+                       if (retv) {
+                               /* Unable to create the UPL, the VM object probably no longer exists. */
+                               printf("nfs_flushcommits: upl create failed %d\n", retv);
+                               NBPGS_ERASE(&bp->nb_valid);
+                               NBPGS_ERASE(&bp->nb_dirty);
+                       }
+               }
+               nfs_buf_upl_check(bp);
+       }
+
        /*
         * Commit data on the server, as required.
         * If all bufs are using the same wcred, then use that with
@@ -3206,19 +3480,21 @@ nfs_flushcommits(nfsnode_t np, int nowait)
                 * Note, it's possible the commit range could be >2^32-1.
                 * If it is, we'll send one commit that covers the whole file.
                 */
-               if ((endoff - off) > 0xffffffff)
+               if ((endoff - off) > 0xffffffff) {
                        count = 0;
-               else
+               } else {
                        count = (endoff - off);
-               retv = nmp->nm_funcs->nf_commit_rpc(np, off, count, wcred);
+               }
+               retv = nmp->nm_funcs->nf_commit_rpc(np, off, count, wcred, wverf);
        } else {
                retv = 0;
                LIST_FOREACH(bp, &commitlist, nb_vnbufs) {
                        toff = NBOFF(bp) + bp->nb_dirtyoff;
                        count = bp->nb_dirtyend - bp->nb_dirtyoff;
-                       retv = nmp->nm_funcs->nf_commit_rpc(np, toff, count, bp->nb_wcred);
-                       if (retv)
+                       retv = nmp->nm_funcs->nf_commit_rpc(np, toff, count, bp->nb_wcred, wverf);
+                       if (retv) {
                                break;
+                       }
                }
        }
 
@@ -3230,11 +3506,11 @@ nfs_flushcommits(nfsnode_t np, int nowait)
        while ((bp = LIST_FIRST(&commitlist))) {
                LIST_REMOVE(bp, nb_vnbufs);
                FSDBG(557, bp, retv, bp->nb_flags, bp->nb_dirty);
-               nfs_lock(np, NFS_NODE_LOCK_FORCE);
+               nfs_node_lock_force(np);
                CLR(bp->nb_flags, (NB_NEEDCOMMIT | NB_WRITEINPROG));
                np->n_needcommitcnt--;
                CHECK_NEEDCOMMITCNT(np);
-               nfs_unlock(np);
+               nfs_node_unlock(np);
 
                if (retv) {
                        /* move back to dirty list */
@@ -3245,6 +3521,9 @@ nfs_flushcommits(nfsnode_t np, int nowait)
                        continue;
                }
 
+               nfs_node_lock_force(np);
+               np->n_numoutput++;
+               nfs_node_unlock(np);
                vnode_startwrite(NFSTOV(np));
                if (ISSET(bp->nb_flags, NB_DELWRI)) {
                        lck_mtx_lock(nfs_buf_mutex);
@@ -3253,13 +3532,15 @@ nfs_flushcommits(nfsnode_t np, int nowait)
                        lck_mtx_unlock(nfs_buf_mutex);
                        wakeup(&nfs_nbdwrite);
                }
-               CLR(bp->nb_flags, (NB_READ|NB_DONE|NB_ERROR|NB_DELWRI));
+               CLR(bp->nb_flags, (NB_READ | NB_DONE | NB_ERROR | NB_DELWRI));
                /* if block still has dirty pages, we don't want it to */
                /* be released in nfs_buf_iodone().  So, don't set NB_ASYNC. */
-               if (!(dirty = bp->nb_dirty))
+               NBPGS_COPY(&dirty, &bp->nb_dirty);
+               if (!nfs_buf_pgs_is_set(&dirty)) {
                        SET(bp->nb_flags, NB_ASYNC);
-               else
+               } else {
                        CLR(bp->nb_flags, NB_ASYNC);
+               }
 
                /* move to clean list */
                lck_mtx_lock(nfs_buf_mutex);
@@ -3269,7 +3550,7 @@ nfs_flushcommits(nfsnode_t np, int nowait)
                bp->nb_dirtyoff = bp->nb_dirtyend = 0;
 
                nfs_buf_iodone(bp);
-               if (dirty) {
+               if (nfs_buf_pgs_is_set(&dirty)) {
                        /* throw it back in as a delayed write buffer */
                        CLR(bp->nb_flags, NB_DONE);
                        nfs_buf_write_delayed(bp);
@@ -3278,12 +3559,12 @@ nfs_flushcommits(nfsnode_t np, int nowait)
 
 done:
        FSDBG_BOT(557, np, 0, 0, error);
-       return (error);
+       return error;
 }
 
 /*
  * Flush all the blocks associated with a vnode.
- *     Walk through the buffer pool and push any dirty pages
+ *      Walk through the buffer pool and push any dirty pages
  *     associated with the vnode.
  */
 int
@@ -3297,25 +3578,27 @@ nfs_flush(nfsnode_t np, int waitfor, thread_t thd, int ignore_writeerr)
 
        FSDBG_TOP(517, np, waitfor, ignore_writeerr, 0);
 
-       if (!nmp) {
+       if (nfs_mount_gone(nmp)) {
                error = ENXIO;
                goto out;
        }
        nfsvers = nmp->nm_vers;
-       if (nmp->nm_flag & NFSMNT_INT)
+       if (NMFLAG(nmp, INTR)) {
                slpflag = PCATCH;
+       }
 
        if (!LIST_EMPTY(&np->n_dirtyblkhd)) {
-               nfs_lock(np, NFS_NODE_LOCK_FORCE);
+               nfs_node_lock_force(np);
                np->n_flag |= NMODIFIED;
-               nfs_unlock(np);
+               nfs_node_unlock(np);
        }
 
        lck_mtx_lock(nfs_buf_mutex);
        while (np->n_bflag & NBFLUSHINPROG) {
                np->n_bflag |= NBFLUSHWANT;
                error = msleep(&np->n_bflag, nfs_buf_mutex, slpflag, "nfs_flush", NULL);
-               if (error) {
+               if ((error && (error != EWOULDBLOCK)) ||
+                   ((error = nfs_sigintr(NFSTONMP(np), NULL, thd, 0)))) {
                        lck_mtx_unlock(nfs_buf_mutex);
                        goto out;
                }
@@ -3342,18 +3625,21 @@ again:
                while ((bp = LIST_FIRST(&blist))) {
                        LIST_REMOVE(bp, nb_vnbufs);
                        LIST_INSERT_HEAD(&np->n_dirtyblkhd, bp, nb_vnbufs);
-                       flags = (passone || (waitfor != MNT_WAIT)) ? NBAC_NOWAIT : 0;
-                       if (flags != NBAC_NOWAIT)
+                       flags = (passone || !(waitfor == MNT_WAIT || waitfor == MNT_DWAIT)) ? NBAC_NOWAIT : 0;
+                       if (flags != NBAC_NOWAIT) {
                                nfs_buf_refget(bp);
+                       }
                        while ((error = nfs_buf_acquire(bp, flags, slpflag, slptimeo))) {
                                FSDBG(524, bp, flags, bp->nb_lflags, bp->nb_flags);
-                               if (error == EBUSY)
+                               if (error == EBUSY) {
                                        break;
+                               }
                                if (error) {
                                        error2 = nfs_sigintr(NFSTONMP(np), NULL, thd, 0);
                                        if (error2) {
-                                               if (flags != NBAC_NOWAIT)
+                                               if (flags != NBAC_NOWAIT) {
                                                        nfs_buf_refrele(bp);
+                                               }
                                                nfs_buf_itercomplete(np, &blist, NBI_DIRTY);
                                                lck_mtx_unlock(nfs_buf_mutex);
                                                error = error2;
@@ -3365,24 +3651,27 @@ again:
                                        }
                                }
                        }
-                       if (flags != NBAC_NOWAIT)
+                       if (flags != NBAC_NOWAIT) {
                                nfs_buf_refrele(bp);
-                       if (error == EBUSY)
+                       }
+                       if (error == EBUSY) {
                                continue;
+                       }
                        if (!bp->nb_np) {
                                /* buffer is no longer valid */
                                nfs_buf_drop(bp);
                                continue;
                        }
-                       if (ISSET(bp->nb_flags, NB_NEEDCOMMIT))
+                       if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
                                nfs_buf_check_write_verifier(np, bp);
+                       }
                        if (!ISSET(bp->nb_flags, NB_DELWRI)) {
                                /* buffer is no longer dirty */
                                nfs_buf_drop(bp);
                                continue;
                        }
                        FSDBG(525, bp, passone, bp->nb_lflags, bp->nb_flags);
-                       if ((passone || (waitfor != MNT_WAIT)) &&
+                       if ((passone || !(waitfor == MNT_WAIT || waitfor == MNT_DWAIT)) &&
                            ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
                                nfs_buf_drop(bp);
                                continue;
@@ -3390,10 +3679,10 @@ again:
                        nfs_buf_remfree(bp);
                        lck_mtx_unlock(nfs_buf_mutex);
                        if (ISSET(bp->nb_flags, NB_ERROR)) {
-                               nfs_lock(np, NFS_NODE_LOCK_FORCE);
+                               nfs_node_lock_force(np);
                                np->n_error = bp->nb_error ? bp->nb_error : EIO;
                                np->n_flag |= NWRITEERR;
-                               nfs_unlock(np);
+                               nfs_node_unlock(np);
                                nfs_buf_release(bp, 1);
                                lck_mtx_lock(nfs_buf_mutex);
                                continue;
@@ -3410,11 +3699,11 @@ again:
        }
        lck_mtx_unlock(nfs_buf_mutex);
 
-       if (waitfor == MNT_WAIT) {
-               while ((error = vnode_waitforwrites(NFSTOV(np), 0, slpflag, slptimeo, "nfsflush"))) {
-                       error2 = nfs_sigintr(NFSTONMP(np), NULL, thd, 0);
+       if (waitfor == MNT_WAIT || waitfor == MNT_DWAIT) {
+               while ((error = vnode_waitforwrites(NFSTOV(np), 0, slpflag, slptimeo, "nfsflush"))) {
+                       error2 = nfs_sigintr(NFSTONMP(np), NULL, thd, 0);
                        if (error2) {
-                               error = error2;
+                               error = error2;
                                goto done;
                        }
                        if (slpflag == PCATCH) {
@@ -3427,38 +3716,56 @@ again:
        if (nfsvers != NFS_VER2) {
                /* loop while it looks like there are still buffers to be */
                /* commited and nfs_flushcommits() seems to be handling them. */
-               while (np->n_needcommitcnt)
-                       if (nfs_flushcommits(np, 0))
+               while (np->n_needcommitcnt) {
+                       if (nfs_flushcommits(np, 0)) {
                                break;
+                       }
+               }
        }
 
        if (passone) {
                passone = 0;
                if (!LIST_EMPTY(&np->n_dirtyblkhd)) {
-                       nfs_lock(np, NFS_NODE_LOCK_FORCE);
+                       nfs_node_lock_force(np);
                        np->n_flag |= NMODIFIED;
-                       nfs_unlock(np);
+                       nfs_node_unlock(np);
                }
                lck_mtx_lock(nfs_buf_mutex);
                goto again;
        }
 
-       if (waitfor == MNT_WAIT) {
+       if (waitfor == MNT_WAIT || waitfor == MNT_DWAIT) {
                if (!LIST_EMPTY(&np->n_dirtyblkhd)) {
-                       nfs_lock(np, NFS_NODE_LOCK_FORCE);
+                       nfs_node_lock_force(np);
                        np->n_flag |= NMODIFIED;
-                       nfs_unlock(np);
+                       nfs_node_unlock(np);
                }
                lck_mtx_lock(nfs_buf_mutex);
-               if (!LIST_EMPTY(&np->n_dirtyblkhd))
+               if (!LIST_EMPTY(&np->n_dirtyblkhd)) {
                        goto again;
+               }
                lck_mtx_unlock(nfs_buf_mutex);
-               nfs_lock(np, NFS_NODE_LOCK_FORCE);
-               /* if we have no dirty blocks, we can clear the modified flag */
-               if (!np->n_wrbusy)
+               nfs_node_lock_force(np);
+               /*
+                * OK, it looks like there are no dirty blocks.  If we have no
+                * writes in flight and no one in the write code, we can clear
+                * the modified flag.  In order to make sure we see the latest
+                * attributes and size, we also invalidate the attributes and
+                * advance the attribute cache XID to guarantee that attributes
+                * newer than our clearing of NMODIFIED will get loaded next.
+                * (If we don't do this, it's possible for the flush's final
+                * write/commit (xid1) to be executed in parallel with a subsequent
+                * getattr request (xid2).  The getattr could return attributes
+                * from *before* the write/commit completed but the stale attributes
+                * would be preferred because of the xid ordering.)
+                */
+               if (!np->n_wrbusy && !np->n_numoutput) {
                        np->n_flag &= ~NMODIFIED;
+                       NATTRINVALIDATE(np);
+                       nfs_get_xid(&np->n_xid);
+               }
        } else {
-               nfs_lock(np, NFS_NODE_LOCK_FORCE);
+               nfs_node_lock_force(np);
        }
 
        FSDBG(526, np->n_flag, np->n_error, 0, 0);
@@ -3466,24 +3773,25 @@ again:
                error = np->n_error;
                np->n_flag &= ~NWRITEERR;
        }
-       nfs_unlock(np);
+       nfs_node_unlock(np);
 done:
        lck_mtx_lock(nfs_buf_mutex);
        flags = np->n_bflag;
-       np->n_bflag &= ~(NBFLUSHINPROG|NBFLUSHWANT);
+       np->n_bflag &= ~(NBFLUSHINPROG | NBFLUSHWANT);
        lck_mtx_unlock(nfs_buf_mutex);
-       if (flags & NBFLUSHWANT)
+       if (flags & NBFLUSHWANT) {
                wakeup(&np->n_bflag);
+       }
 out:
        FSDBG_BOT(517, np, error, ignore_writeerr, 0);
-       return (error);
+       return error;
 }
 
 /*
  * Flush out and invalidate all buffers associated with a vnode.
  * Called with the underlying object locked.
  */
-static int
+int
 nfs_vinvalbuf_internal(
        nfsnode_t np,
        int flags,
@@ -3497,8 +3805,9 @@ nfs_vinvalbuf_internal(
        int list, error = 0;
 
        if (flags & V_SAVE) {
-               if ((error = nfs_flush(np, MNT_WAIT, thd, (flags & V_IGNORE_WRITEERR))))
-                       return (error);
+               if ((error = nfs_flush(np, MNT_WAIT, thd, (flags & V_IGNORE_WRITEERR)))) {
+                       return error;
+               }
        }
 
        lck_mtx_lock(nfs_buf_mutex);
@@ -3506,15 +3815,17 @@ nfs_vinvalbuf_internal(
                list = NBI_CLEAN;
                if (nfs_buf_iterprepare(np, &blist, list)) {
                        list = NBI_DIRTY;
-                       if (nfs_buf_iterprepare(np, &blist, list))
+                       if (nfs_buf_iterprepare(np, &blist, list)) {
                                break;
+                       }
                }
                while ((bp = LIST_FIRST(&blist))) {
                        LIST_REMOVE(bp, nb_vnbufs);
-                       if (list == NBI_CLEAN)
+                       if (list == NBI_CLEAN) {
                                LIST_INSERT_HEAD(&np->n_cleanblkhd, bp, nb_vnbufs);
-                       else
+                       } else {
                                LIST_INSERT_HEAD(&np->n_dirtyblkhd, bp, nb_vnbufs);
+                       }
                        nfs_buf_refget(bp);
                        while ((error = nfs_buf_acquire(bp, NBAC_REMOVE, slpflag, slptimeo))) {
                                FSDBG(556, np, bp, NBOFF(bp), bp->nb_flags);
@@ -3523,7 +3834,7 @@ nfs_vinvalbuf_internal(
                                        nfs_buf_refrele(bp);
                                        nfs_buf_itercomplete(np, &blist, list);
                                        lck_mtx_unlock(nfs_buf_mutex);
-                                       return (error);
+                                       return error;
                                }
                        }
                        nfs_buf_refrele(bp);
@@ -3533,18 +3844,21 @@ nfs_vinvalbuf_internal(
                            (NBOFF(bp) < (off_t)np->n_size)) {
                                /* extra paranoia: make sure we're not */
                                /* somehow leaving any dirty data around */
+                               nfsbufpgs pagemask;
                                int mustwrite = 0;
-                               int end = (NBOFF(bp) + bp->nb_bufsize > (off_t)np->n_size) ?
-                                   ((off_t)np->n_size - NBOFF(bp)) : bp->nb_bufsize;
+                               off_t end = (NBOFF(bp) + bp->nb_bufsize > (off_t)np->n_size) ?
+                                   (np->n_size - NBOFF(bp)) : bp->nb_bufsize;
                                if (!ISSET(bp->nb_flags, NB_PAGELIST)) {
                                        error = nfs_buf_upl_setup(bp);
                                        if (error == EINVAL) {
                                                /* vm object must no longer exist */
                                                /* hopefully we don't need to do */
                                                /* anything for this buffer */
-                                       } else if (error)
+                                       } else if (error) {
                                                printf("nfs_vinvalbuf: upl setup failed %d\n", error);
-                                       bp->nb_valid = bp->nb_dirty = 0;
+                                       }
+                                       NBPGS_ERASE(&bp->nb_valid);
+                                       NBPGS_ERASE(&bp->nb_dirty);
                                }
                                nfs_buf_upl_check(bp);
                                /* check for any dirty data before the EOF */
@@ -3552,15 +3866,19 @@ nfs_vinvalbuf_internal(
                                        /* clip dirty range to EOF */
                                        if (bp->nb_dirtyend > end) {
                                                bp->nb_dirtyend = end;
-                                               if (bp->nb_dirtyoff >= bp->nb_dirtyend)
+                                               if (bp->nb_dirtyoff >= bp->nb_dirtyend) {
                                                        bp->nb_dirtyoff = bp->nb_dirtyend = 0;
+                                               }
                                        }
-                                       if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < end))
+                                       if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < end)) {
                                                mustwrite++;
+                                       }
                                }
-                               bp->nb_dirty &= (1 << (round_page_32(end)/PAGE_SIZE)) - 1;
-                               if (bp->nb_dirty)
+                               nfs_buf_pgs_get_page_mask(&pagemask, round_page_64(end) / PAGE_SIZE);
+                               nfs_buf_pgs_bit_and(&bp->nb_dirty, &pagemask, &bp->nb_dirty);
+                               if (nfs_buf_pgs_is_set(&bp->nb_dirty)) {
                                        mustwrite++;
+                               }
                                /* also make sure we'll have a credential to do the write */
                                if (mustwrite && !IS_VALID_CRED(bp->nb_wcred) && !IS_VALID_CRED(cred)) {
                                        printf("nfs_vinvalbuf: found dirty buffer with no write creds\n");
@@ -3568,8 +3886,9 @@ nfs_vinvalbuf_internal(
                                }
                                if (mustwrite) {
                                        FSDBG(554, np, bp, 0xd00dee, bp->nb_flags);
-                                       if (!ISSET(bp->nb_flags, NB_PAGELIST))
+                                       if (!ISSET(bp->nb_flags, NB_PAGELIST)) {
                                                panic("nfs_vinvalbuf: dirty buffer without upl");
+                                       }
                                        /* gotta write out dirty data before invalidating */
                                        /* (NB_STABLE indicates that data writes should be FILESYNC) */
                                        /* (NB_NOCACHE indicates buffer should be discarded) */
@@ -3583,10 +3902,11 @@ nfs_vinvalbuf_internal(
                                        // Note: bp has been released
                                        if (error) {
                                                FSDBG(554, bp, 0xd00dee, 0xbad, error);
-                                               nfs_lock(np, NFS_NODE_LOCK_FORCE);
-                                               np->n_error = error;
-                                               np->n_flag |= NWRITEERR;
-                                               nfs_unlock(np);
+                                               nfs_node_lock_force(np);
+                                               if ((error != EINTR) && (error != ERESTART)) {
+                                                       np->n_error = error;
+                                                       np->n_flag |= NWRITEERR;
+                                               }
                                                /*
                                                 * There was a write error and we need to
                                                 * invalidate attrs to sync with server.
@@ -3594,6 +3914,18 @@ nfs_vinvalbuf_internal(
                                                 * we may no longer know the correct size)
                                                 */
                                                NATTRINVALIDATE(np);
+                                               nfs_node_unlock(np);
+                                               if ((error == EINTR) || (error == ERESTART)) {
+                                                       /*
+                                                        * Abort on EINTR.  If we don't, we could
+                                                        * be stuck in this loop forever because
+                                                        * the buffer will continue to stay dirty.
+                                                        */
+                                                       lck_mtx_lock(nfs_buf_mutex);
+                                                       nfs_buf_itercomplete(np, &blist, list);
+                                                       lck_mtx_unlock(nfs_buf_mutex);
+                                                       return error;
+                                               }
                                                error = 0;
                                        }
                                        lck_mtx_lock(nfs_buf_mutex);
@@ -3607,16 +3939,20 @@ nfs_vinvalbuf_internal(
                }
                nfs_buf_itercomplete(np, &blist, list);
        }
-       if (!LIST_EMPTY(&(np)->n_dirtyblkhd) || !LIST_EMPTY(&(np)->n_cleanblkhd))
+       if (!LIST_EMPTY(&(np)->n_dirtyblkhd) || !LIST_EMPTY(&(np)->n_cleanblkhd)) {
                panic("nfs_vinvalbuf: flush/inval failed");
+       }
        lck_mtx_unlock(nfs_buf_mutex);
+       nfs_node_lock_force(np);
        if (!(flags & V_SAVE)) {
-               nfs_lock(np, NFS_NODE_LOCK_FORCE);
                np->n_flag &= ~NMODIFIED;
-               nfs_unlock(np);
        }
+       if (vnode_vtype(NFSTOV(np)) == VREG) {
+               np->n_lastrahead = -1;
+       }
+       nfs_node_unlock(np);
        NFS_BUF_FREEUP();
-       return (0);
+       return 0;
 }
 
 
@@ -3635,13 +3971,25 @@ nfs_vinvalbuf2(vnode_t vp, int flags, thread_t thd, kauth_cred_t cred, int intrf
 {
        nfsnode_t np = VTONFS(vp);
        struct nfsmount *nmp = VTONMP(vp);
-       int error, rv, slpflag, slptimeo, nflags;
+       int error, slpflag, slptimeo, nflags, retry = 0;
+       int ubcflags = UBC_PUSHALL | UBC_SYNC | UBC_INVALIDATE;
+       struct timespec ts = { .tv_sec = 2, .tv_nsec = 0 };
        off_t size;
 
        FSDBG_TOP(554, np, flags, intrflg, 0);
 
-       if (nmp && !(nmp->nm_flag & NFSMNT_INT))
+       /*
+        * If the mount is gone no sense to try and write anything.
+        * and hang trying to do IO.
+        */
+       if (nfs_mount_gone(nmp)) {
+               flags &= ~V_SAVE;
+               ubcflags &= ~UBC_PUSHALL;
+       }
+
+       if (nmp && !NMFLAG(nmp, INTR)) {
                intrflg = 0;
+       }
        if (intrflg) {
                slpflag = PCATCH;
                slptimeo = 2 * hz;
@@ -3654,40 +4002,109 @@ nfs_vinvalbuf2(vnode_t vp, int flags, thread_t thd, kauth_cred_t cred, int intrf
        lck_mtx_lock(nfs_buf_mutex);
        while (np->n_bflag & NBINVALINPROG) {
                np->n_bflag |= NBINVALWANT;
-               error = msleep(&np->n_bflag, nfs_buf_mutex, slpflag, "nfs_vinvalbuf", NULL);
-               if (error) {
+               msleep(&np->n_bflag, nfs_buf_mutex, slpflag, "nfs_vinvalbuf", &ts);
+               if ((error = nfs_sigintr(VTONMP(vp), NULL, thd, 0))) {
                        lck_mtx_unlock(nfs_buf_mutex);
-                       return (error);
+                       return error;
+               }
+               if (np->n_bflag & NBINVALINPROG) {
+                       slpflag = 0;
                }
        }
        np->n_bflag |= NBINVALINPROG;
        lck_mtx_unlock(nfs_buf_mutex);
 
        /* Now, flush as required.  */
+again:
        error = nfs_vinvalbuf_internal(np, flags, thd, cred, slpflag, 0);
        while (error) {
                FSDBG(554, np, 0, 0, error);
-               if ((error = nfs_sigintr(VTONMP(vp), NULL, thd, 0)))
+               if ((error = nfs_sigintr(VTONMP(vp), NULL, thd, 0))) {
                        goto done;
+               }
                error = nfs_vinvalbuf_internal(np, flags, thd, cred, 0, slptimeo);
        }
 
        /* get the pages out of vm also */
-       if (UBCINFOEXISTS(vp) && (size = ubc_getsize(vp)))
-               if (!(rv = ubc_sync_range(vp, 0, size, UBC_PUSHALL | UBC_SYNC | UBC_INVALIDATE)))
-                       panic("nfs_vinvalbuf(): ubc_sync_range failed!");
+       if (UBCINFOEXISTS(vp) && (size = ubc_getsize(vp))) {
+               if ((error = ubc_msync(vp, 0, size, NULL, ubcflags))) {
+                       if (error == EINVAL) {
+                               panic("nfs_vinvalbuf(): ubc_msync failed!, error %d", error);
+                       }
+                       if (retry++ < 10) { /* retry invalidating a few times */
+                               if (retry > 1 || error == ENXIO) {
+                                       ubcflags &= ~UBC_PUSHALL;
+                               }
+                               goto again;
+                       }
+                       /* give up */
+                       printf("nfs_vinvalbuf(): ubc_msync failed!, error %d\n", error);
+               }
+       }
 done:
        lck_mtx_lock(nfs_buf_mutex);
        nflags = np->n_bflag;
-       np->n_bflag &= ~(NBINVALINPROG|NBINVALWANT);
+       np->n_bflag &= ~(NBINVALINPROG | NBINVALWANT);
        lck_mtx_unlock(nfs_buf_mutex);
-       if (nflags & NBINVALWANT)
+       if (nflags & NBINVALWANT) {
                wakeup(&np->n_bflag);
+       }
 
        FSDBG_BOT(554, np, flags, intrflg, error);
-       return (error);
+       return error;
 }
 
+/*
+ * Wait for any busy buffers to complete.
+ */
+void
+nfs_wait_bufs(nfsnode_t np)
+{
+       struct nfsbuf *bp;
+       struct nfsbuflists blist;
+       int error = 0;
+
+       lck_mtx_lock(nfs_buf_mutex);
+       if (!nfs_buf_iterprepare(np, &blist, NBI_CLEAN)) {
+               while ((bp = LIST_FIRST(&blist))) {
+                       LIST_REMOVE(bp, nb_vnbufs);
+                       LIST_INSERT_HEAD(&np->n_cleanblkhd, bp, nb_vnbufs);
+                       nfs_buf_refget(bp);
+                       while ((error = nfs_buf_acquire(bp, 0, 0, 0))) {
+                               if (error != EAGAIN) {
+                                       nfs_buf_refrele(bp);
+                                       nfs_buf_itercomplete(np, &blist, NBI_CLEAN);
+                                       lck_mtx_unlock(nfs_buf_mutex);
+                                       return;
+                               }
+                       }
+                       nfs_buf_refrele(bp);
+                       nfs_buf_drop(bp);
+               }
+               nfs_buf_itercomplete(np, &blist, NBI_CLEAN);
+       }
+       if (!nfs_buf_iterprepare(np, &blist, NBI_DIRTY)) {
+               while ((bp = LIST_FIRST(&blist))) {
+                       LIST_REMOVE(bp, nb_vnbufs);
+                       LIST_INSERT_HEAD(&np->n_dirtyblkhd, bp, nb_vnbufs);
+                       nfs_buf_refget(bp);
+                       while ((error = nfs_buf_acquire(bp, 0, 0, 0))) {
+                               if (error != EAGAIN) {
+                                       nfs_buf_refrele(bp);
+                                       nfs_buf_itercomplete(np, &blist, NBI_DIRTY);
+                                       lck_mtx_unlock(nfs_buf_mutex);
+                                       return;
+                               }
+                       }
+                       nfs_buf_refrele(bp);
+                       nfs_buf_drop(bp);
+               }
+               nfs_buf_itercomplete(np, &blist, NBI_DIRTY);
+       }
+       lck_mtx_unlock(nfs_buf_mutex);
+}
+
+
 /*
  * Add an async I/O request to the mount's async I/O queue and make
  * sure that an nfsiod will service it.
@@ -3701,8 +4118,12 @@ nfs_asyncio_finish(struct nfsreq *req)
 
        FSDBG_TOP(552, nmp, 0, 0, 0);
 again:
-       if (((nmp = req->r_nmp)) == NULL)
+       nmp = req->r_nmp;
+
+       if (nmp == NULL) {
                return;
+       }
+
        lck_mtx_lock(nfsiod_mutex);
        niod = nmp->nm_niod;
 
@@ -3721,14 +4142,38 @@ again:
                         */
                        lck_mtx_unlock(nfsiod_mutex);
                        started++;
-                       if (!nfsiod_start())
+                       if (!nfsiod_start()) {
                                goto again;
+                       }
                        lck_mtx_lock(nfsiod_mutex);
                }
        }
 
-       if (req->r_achain.tqe_next == NFSREQNOLIST)
+       /*
+        * If we got here while being on the resendq we need to get off. This
+        * happens when the timer fires and errors out requests from nfs_sigintr
+        * or we receive a reply (UDP case) while being on the resend queue so
+        * we're just finishing up and are not going to be resent.
+        */
+       lck_mtx_lock(&req->r_mtx);
+       if (req->r_flags & R_RESENDQ) {
+               lck_mtx_lock(&nmp->nm_lock);
+               if ((req->r_flags & R_RESENDQ) && req->r_rchain.tqe_next != NFSREQNOLIST) {
+                       NFS_BIO_DBG("Proccessing async request on resendq. Removing");
+                       TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
+                       req->r_flags &= ~R_RESENDQ;
+                       req->r_rchain.tqe_next = NFSREQNOLIST;
+                       assert(req->r_refs > 1);
+                       /* Remove resendq reference */
+                       req->r_refs--;
+               }
+               lck_mtx_unlock(&nmp->nm_lock);
+       }
+       lck_mtx_unlock(&req->r_mtx);
+
+       if (req->r_achain.tqe_next == NFSREQNOLIST) {
                TAILQ_INSERT_TAIL(&nmp->nm_iodq, req, r_achain);
+       }
 
        /* If this mount doesn't already have an nfsiod working on it... */
        if (!nmp->nm_niod) {
@@ -3737,8 +4182,10 @@ again:
                        lck_mtx_unlock(nfsiod_mutex);
                        wakeup(niod);
                } else if (nfsiod_thread_count > 0) {
-                       /* just queue it up on nfsiod mounts queue */
-                       TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
+                       /* just queue it up on nfsiod mounts queue if needed */
+                       if (nmp->nm_iodlink.tqe_next == NFSNOLIST) {
+                               TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
+                       }
                        lck_mtx_unlock(nfsiod_mutex);
                } else {
                        printf("nfs_asyncio(): no nfsiods? %d %d (%d)\n", nfsiod_thread_count, NFSIOD_MAX, started);
@@ -3756,83 +4203,64 @@ again:
 
 /*
  * queue up async I/O request for resend
+ * Must be called with req->r_mtx locked.
  */
 void
 nfs_asyncio_resend(struct nfsreq *req)
 {
        struct nfsmount *nmp = req->r_nmp;
 
-       if (!nmp)
+       if (nfs_mount_gone(nmp)) {
                return;
+       }
+
+#if CONFIG_NFS_GSS
        nfs_gss_clnt_rpcdone(req);
+#endif
        lck_mtx_lock(&nmp->nm_lock);
-       if (req->r_rchain.tqe_next == NFSREQNOLIST) {
+       if (!(req->r_flags & R_RESENDQ)) {
                TAILQ_INSERT_TAIL(&nmp->nm_resendq, req, r_rchain);
                req->r_flags |= R_RESENDQ;
+               /*
+                * We take a reference on this request so that it can't be
+                * destroyed while a resend is queued or in progress.
+                */
+               nfs_request_ref(req, 1);
        }
        nfs_mount_sock_thread_wake(nmp);
        lck_mtx_unlock(&nmp->nm_lock);
 }
 
 /*
- * Read an NFS buffer for a directory.
+ * Read directory data into a buffer.
+ *
+ * Buffer will be filled (unless EOF is hit).
+ * Buffers after this one may also be completely/partially filled.
  */
 int
 nfs_buf_readdir(struct nfsbuf *bp, vfs_context_t ctx)
 {
-       nfsnode_t np;
-       vnode_t vp;
-       struct nfsmount *nmp;
-       int error = 0, nfsvers;
-       struct uio uio;
-       struct iovec_32 io;
-
-       np = bp->nb_np;
-       vp = NFSTOV(np);
-       nmp = VTONMP(vp);
-       nfsvers = nmp->nm_vers;
-       uio.uio_iovs.iov32p = &io;
-       uio.uio_iovcnt = 1;
-#if 1   /* LP64todo - can't use new segment flags until the drivers are ready */
-       uio.uio_segflg = UIO_SYSSPACE;
-#else
-       uio.uio_segflg = UIO_SYSSPACE32;
-#endif
-
-       /* sanity check */
-       if (ISSET(bp->nb_flags, NB_DONE))
-               CLR(bp->nb_flags, NB_DONE);
+       nfsnode_t np = bp->nb_np;
+       struct nfsmount *nmp = NFSTONMP(np);
+       int error = 0;
 
-       uio.uio_rw = UIO_READ;
-       io.iov_len = bp->nb_bufsize;
-       uio_uio_resid_set(&uio, io.iov_len);
-       io.iov_base = (uintptr_t) bp->nb_data;
-       uio.uio_offset = NBOFF(bp);
+       if (nfs_mount_gone(nmp)) {
+               return ENXIO;
+       }
 
-       OSAddAtomic(1, (SInt32*)&nfsstats.readdir_bios);
-       if (nfsvers < NFS_VER4) {
-               if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
-                       error = nfs3_readdirplus_rpc(np, &uio, ctx);
-                       if (error == NFSERR_NOTSUPP) {
-                               lck_mtx_lock(&nmp->nm_lock);
-                               nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
-                               lck_mtx_unlock(&nmp->nm_lock);
-                       }
-               }
-               if (!(nmp->nm_flag & NFSMNT_RDIRPLUS))
-                       error = nfs3_readdir_rpc(np, &uio, ctx);
-       } else {
-               error = nfs4_readdir_rpc(np, &uio, ctx);
+       if (nmp->nm_vers < NFS_VER4) {
+               error = nfs3_readdir_rpc(np, bp, ctx);
        }
-       if (error) {
+#if CONFIG_NFS4
+       else {
+               error = nfs4_readdir_rpc(np, bp, ctx);
+       }
+#endif
+       if (error && (error != NFSERR_DIRBUFDROPPED)) {
                SET(bp->nb_flags, NB_ERROR);
                bp->nb_error = error;
-       } else {
-               bp->nb_validoff = 0;
-               bp->nb_validend = uio.uio_offset - NBOFF(bp);
-               bp->nb_valid = (1 << (round_page_32(bp->nb_validend)/PAGE_SIZE)) - 1;
        }
-
-       nfs_buf_iodone(bp);
-       return (error);
+       return error;
 }
+
+#endif /* CONFIG_NFS_CLIENT */