X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/2d21ac55c334faf3a56e5634905ed6987fc787d4..060df5ea7c632b1ac8cc8aac1fb59758165c2084:/bsd/nfs/nfs4_vnops.c?ds=sidebyside

diff --git a/bsd/nfs/nfs4_vnops.c b/bsd/nfs/nfs4_vnops.c
index d8247eafc..ffd12d88f 100644
--- a/bsd/nfs/nfs4_vnops.c
+++ b/bsd/nfs/nfs4_vnops.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2006-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -46,7 +46,7 @@
 #include <sys/ubc_internal.h>
 #include <sys/attr.h>
 #include <sys/signalvar.h>
-#include <sys/uio_internal.h>
+#include <sys/uio.h>
 
 #include <vfs/vfs_support.h>
 
@@ -77,15 +77,14 @@
 #include <kern/task.h>
 #include <kern/sched_prim.h>
 
-
 int
-nfs4_access_rpc(nfsnode_t np, u_long *mode, vfs_context_t ctx)
+nfs4_access_rpc(nfsnode_t np, u_int32_t *mode, vfs_context_t ctx)
 {
-	int error = 0, status, numops, slot;
+	int error = 0, lockerror = ENOENT, status, numops, slot;
 	u_int64_t xid;
 	struct nfsm_chain nmreq, nmrep;
 	struct timeval now;
-	uint32_t access, supported = 0, missing;
+	uint32_t access = 0, supported = 0, missing;
 	struct nfsmount *nmp = NFSTONMP(np);
 	int nfsvers = nmp->nm_vers;
 	uid_t uid;
@@ -93,7 +92,8 @@ nfs4_access_rpc(nfsnode_t np, u_long *mode, vfs_context_t ctx)
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
-	numops = 3; // PUTFH + ACCESS + GETATTR
+	// PUTFH, ACCESS, GETATTR
+	numops = 3;
 	nfsm_chain_build_alloc_init(error, &nmreq, 17 * NFSX_UNSIGNED);
 	nfsm_chain_add_compound_header(error, &nmreq, "access", numops);
 	numops--;
@@ -111,6 +111,8 @@ nfs4_access_rpc(nfsnode_t np, u_long *mode, vfs_context_t ctx)
 	nfsmout_if(error);
 	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
 
+	if ((lockerror = nfs_node_lock(np)))
+		error = lockerror;
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
@@ -130,6 +132,9 @@ nfs4_access_rpc(nfsnode_t np, u_long *mode, vfs_context_t ctx)
 			access |= NFS_ACCESS_DELETE;
 		}
 	}
+	/* Some servers report DELETE support but erroneously give a denied answer. */
+	if ((*mode & NFS_ACCESS_DELETE) && nfs_access_delete && !(access & NFS_ACCESS_DELETE))
+		access |= NFS_ACCESS_DELETE;
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
 	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
 	nfsmout_if(error);
@@ -144,6 +149,8 @@ nfs4_access_rpc(nfsnode_t np, u_long *mode, vfs_context_t ctx)
 	/* pass back the mode returned with this request */
 	*mode = np->n_mode[slot];
 nfsmout:
+	if (!lockerror)
+		nfs_node_unlock(np);
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
 	return (error);
@@ -170,7 +177,8 @@ nfs4_getattr_rpc(
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
-	numops = 2; // PUTFH + GETATTR
+	// PUTFH, GETATTR
+	numops = 2;
 	nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED);
 	nfsm_chain_add_compound_header(error, &nmreq, "getattr", numops);
 	numops--;
@@ -213,7 +221,8 @@ nfs4_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx)
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
-	numops = 3; // PUTFH + GETATTR + READLINK
+	// PUTFH, GETATTR, READLINK
+	numops = 3;
 	nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED);
 	nfsm_chain_add_compound_header(error, &nmreq, "readlink", numops);
 	numops--;
@@ -230,7 +239,7 @@ nfs4_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx)
 	nfsmout_if(error);
 	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
 
-	if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
+	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
@@ -251,7 +260,7 @@ nfs4_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx)
 		*buflenp = len;
 nfsmout:
 	if (!lockerror)
-		nfs_unlock(np);
+		nfs_node_unlock(np);
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
 	return (error);
@@ -269,6 +278,7 @@ nfs4_read_rpc_async(
 {
 	struct nfsmount *nmp;
 	int error = 0, nfsvers, numops;
+	nfs_stateid stateid;
 	struct nfsm_chain nmreq;
 
 	nmp = NFSTONMP(np);
@@ -278,7 +288,7 @@ nfs4_read_rpc_async(
 
 	nfsm_chain_null(&nmreq);
 
-	// PUTFH + READ + GETATTR
+	// PUTFH, READ, GETATTR
 	numops = 3;
 	nfsm_chain_build_alloc_init(error, &nmreq, 22 * NFSX_UNSIGNED);
 	nfsm_chain_add_compound_header(error, &nmreq, "read", numops);
@@ -287,13 +297,8 @@ nfs4_read_rpc_async(
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_READ);
-
-	/* XXX use special stateid for now */
-	nfsm_chain_add_32(error, &nmreq, 0xffffffff);
-	nfsm_chain_add_32(error, &nmreq, 0xffffffff);
-	nfsm_chain_add_32(error, &nmreq, 0xffffffff);
-	nfsm_chain_add_32(error, &nmreq, 0xffffffff);
-
+	nfs_get_stateid(np, thd, cred, &stateid);
+	nfsm_chain_add_stateid(error, &nmreq, &stateid);
 	nfsm_chain_add_64(error, &nmreq, offset);
 	nfsm_chain_add_32(error, &nmreq, len);
 	numops--;
@@ -313,7 +318,7 @@ int
 nfs4_read_rpc_async_finish(
 	nfsnode_t np,
 	struct nfsreq *req,
-	struct uio *uiop,
+	uio_t uio,
 	size_t *lenp,
 	int *eofp)
 {
@@ -336,7 +341,7 @@ nfs4_read_rpc_async_finish(
 	if (error == EINPROGRESS) /* async request restarted */
 		return (error);
 
-	if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
+	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
@@ -346,12 +351,12 @@ nfs4_read_rpc_async_finish(
 	nfsm_chain_get_32(error, &nmrep, retlen);
 	if (!error) {
 		*lenp = MIN(retlen, *lenp);
-		error = nfsm_chain_get_uio(&nmrep, *lenp, uiop);
+		error = nfsm_chain_get_uio(&nmrep, *lenp, uio);
 	}
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
 	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
 	if (!lockerror)
-		nfs_unlock(np);
+		nfs_node_unlock(np);
 	if (eofp) {
 		if (!eof && !retlen)
 			eof = 1;
@@ -364,7 +369,7 @@ nfs4_read_rpc_async_finish(
 int
 nfs4_write_rpc_async(
 	nfsnode_t np,
-	struct uio *uiop,
+	uio_t uio,
 	size_t len,
 	thread_t thd,
 	kauth_cred_t cred,
@@ -374,7 +379,7 @@ nfs4_write_rpc_async(
 {
 	struct nfsmount *nmp;
 	int error = 0, nfsvers, numops;
-	off_t offset;
+	nfs_stateid stateid;
 	struct nfsm_chain nmreq;
 
 	nmp = NFSTONMP(np);
@@ -382,11 +387,9 @@ nfs4_write_rpc_async(
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
 
-	offset = uiop->uio_offset;
-
 	nfsm_chain_null(&nmreq);
 
-	// PUTFH + WRITE + GETATTR
+	// PUTFH, WRITE, GETATTR
 	numops = 3;
 	nfsm_chain_build_alloc_init(error, &nmreq, 25 * NFSX_UNSIGNED + len);
 	nfsm_chain_add_compound_header(error, &nmreq, "write", numops);
@@ -395,18 +398,13 @@ nfs4_write_rpc_async(
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_WRITE);
-
-	/* XXX use special stateid for now */
-	nfsm_chain_add_32(error, &nmreq, 0xffffffff);
-	nfsm_chain_add_32(error, &nmreq, 0xffffffff);
-	nfsm_chain_add_32(error, &nmreq, 0xffffffff);
-	nfsm_chain_add_32(error, &nmreq, 0xffffffff);
-
-	nfsm_chain_add_64(error, &nmreq, uiop->uio_offset);
+	nfs_get_stateid(np, thd, cred, &stateid);
+	nfsm_chain_add_stateid(error, &nmreq, &stateid);
+	nfsm_chain_add_64(error, &nmreq, uio_offset(uio));
 	nfsm_chain_add_32(error, &nmreq, iomode);
 	nfsm_chain_add_32(error, &nmreq, len);
 	if (!error)
-		error = nfsm_chain_add_uio(&nmreq, uiop, len);
+		error = nfsm_chain_add_uio(&nmreq, uio, len);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
 	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
@@ -452,7 +450,7 @@ nfs4_write_rpc_async_finish(
 	nmp = NFSTONMP(np);
 	if (!nmp)
 		error = ENXIO;
-	if (!error && (lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
+	if (!error && (lockerror = nfs_node_lock(np)))
 		error = lockerror;
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
@@ -480,7 +478,7 @@ nfs4_write_rpc_async_finish(
 	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
 nfsmout:
 	if (!lockerror)
-		nfs_unlock(np);
+		nfs_node_unlock(np);
 	nfsm_chain_cleanup(&nmrep);
 	if ((committed != NFS_WRITE_FILESYNC) && nfs_allow_async &&
 	    ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC))
@@ -497,7 +495,7 @@ nfs4_remove_rpc(
 	thread_t thd,
 	kauth_cred_t cred)
 {
-	int error = 0, remove_error = 0, status;
+	int error = 0, lockerror = ENOENT, remove_error = 0, status;
 	struct nfsmount *nmp;
 	int nfsvers, numops;
 	u_int64_t xid;
@@ -507,7 +505,7 @@ nfs4_remove_rpc(
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
-
+restart:
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
@@ -531,6 +529,8 @@ nfs4_remove_rpc(
 
 	error = nfs_request2(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, 0, &nmrep, &xid, &status);
 
+	if ((lockerror = nfs_node_lock(dnp)))
+		error = lockerror;
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
@@ -539,13 +539,20 @@ nfs4_remove_rpc(
 	nfsm_chain_check_change_info(error, &nmrep, dnp);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
 	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, NULL, &xid);
-	if (error)
+	if (error && !lockerror)
 		NATTRINVALIDATE(dnp);
 nfsmout:
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
 
-	dnp->n_flag |= NMODIFIED;
+	if (!lockerror) {
+		dnp->n_flag |= NMODIFIED;
+		nfs_node_unlock(dnp);
+	}
+	if (error == NFSERR_GRACE) {
+		tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
+		goto restart;
+	}
 
 	return (remove_error);
 }
@@ -560,7 +567,7 @@ nfs4_rename_rpc(
 	int tnamelen,
 	vfs_context_t ctx)
 {
-	int error = 0, status, nfsvers, numops;
+	int error = 0, lockerror = ENOENT, status, nfsvers, numops;
 	struct nfsmount *nmp;
 	u_int64_t xid, savedxid;
 	struct nfsm_chain nmreq, nmrep;
@@ -605,6 +612,8 @@ nfs4_rename_rpc(
 
 	error = nfs_request(fdnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
 
+	if ((lockerror = nfs_node_lock2(fdnp, tdnp)))
+		error = lockerror;
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
@@ -617,19 +626,22 @@ nfs4_rename_rpc(
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
 	savedxid = xid;
 	nfsm_chain_loadattr(error, &nmrep, tdnp, nfsvers, NULL, &xid);
-	if (error)
+	if (error && !lockerror)
 		NATTRINVALIDATE(tdnp);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
 	xid = savedxid;
 	nfsm_chain_loadattr(error, &nmrep, fdnp, nfsvers, NULL, &xid);
-	if (error)
+	if (error && !lockerror)
 		NATTRINVALIDATE(fdnp);
 nfsmout:
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
-	fdnp->n_flag |= NMODIFIED;
-	tdnp->n_flag |= NMODIFIED;
+	if (!lockerror) {
+		fdnp->n_flag |= NMODIFIED;
+		tdnp->n_flag |= NMODIFIED;
+		nfs_node_unlock2(fdnp, tdnp);
+	}
 	/* Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */
 	if (error == EEXIST)
 		error = 0;
@@ -639,59 +651,48 @@ nfsmout:
 /*
  * NFS V4 readdir RPC.
  */
-#define	DIRHDSIZ	((int)(sizeof(struct dirent) - (MAXNAMLEN + 1)))
 int
-nfs4_readdir_rpc(nfsnode_t dnp, struct uio *uiop, vfs_context_t ctx)
-{
-	size_t len, tlen, skiplen, left;
-	struct dirent *dp = NULL;
-	vnode_t newvp;
-	nfsuint64 *cookiep;
-	struct componentname cn, *cnp = &cn;
-	nfsuint64 cookie;
+nfs4_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
+{
 	struct nfsmount *nmp;
-	nfsnode_t np;
-	int error = 0, lockerror, status, more_entries = 1, blksiz = 0, bigenough = 1;
-	int nfsvers, rdirplus, nmreaddirsize, nmrsize, eof, i, numops;
-	u_int64_t xid, savexid;
-	struct nfs_vattr nvattr;
-	struct nfsm_chain nmreq, nmrep;
-	char *cp;
+	int error = 0, lockerror, nfsvers, rdirplus, bigcookies, numops;
+	int i, status, more_entries = 1, eof, bp_dropped = 0;
+	uint32_t nmreaddirsize, nmrsize;
+	uint32_t namlen, skiplen, fhlen, xlen, attrlen, reclen, space_free, space_needed;
+	uint64_t cookie, lastcookie, xid, savedxid;
+	struct nfsm_chain nmreq, nmrep, nmrepsave;
+	fhandle_t fh;
+	struct nfs_vattr nvattr, *nvattrp;
+	struct nfs_dir_buf_header *ndbhp;
+	struct direntry *dp;
+	char *padstart, padlen;
 	const char *tag;
 	uint32_t entry_attrs[NFS_ATTR_BITMAP_LEN];
-	fhandle_t fh;
+	struct timeval now;
 
-#if DIAGNOSTIC
-	/* XXX limitation based on need to adjust uio */
-	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
-		(uio_uio_resid(uiop) & (DIRBLKSIZ - 1)))
-		panic("nfs4_readdir_rpc: bad uio");
-#endif
 	nmp = NFSTONMP(dnp);
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
 	nmreaddirsize = nmp->nm_readdirsize;
 	nmrsize = nmp->nm_rsize;
-	rdirplus = (nmp->nm_flag & NFSMNT_RDIRPLUS) ? 1 : 0;
-
-	bzero(cnp, sizeof(*cnp));
-	newvp = NULLVP;
+	bigcookies = nmp->nm_state & NFSSTA_BIGCOOKIES;
+	rdirplus = ((nfsvers > NFS_VER2) && (nmp->nm_flag & NFSMNT_RDIRPLUS)) ? 1 : 0;
 
 	/*
 	 * Set up attribute request for entries.
 	 * For READDIRPLUS functionality, get everything.
-	 * Otherwise, just get what we need for struct dirent.
+	 * Otherwise, just get what we need for struct direntry.
 	 */
 	if (rdirplus) {
-		tag = "READDIRPLUS";
+		tag = "readdirplus";
 		for (i=0; i < NFS_ATTR_BITMAP_LEN; i++)
 			entry_attrs[i] =
 				nfs_getattr_bitmap[i] &
 				nmp->nm_fsattr.nfsa_supp_attr[i];
 		NFS_BITMAP_SET(entry_attrs, NFS_FATTR_FILEHANDLE);
 	} else {
-		tag = "READDIR";
+		tag = "readdir";
 		NFS_CLEAR_ATTRIBUTES(entry_attrs);
 		NFS_BITMAP_SET(entry_attrs, NFS_FATTR_TYPE);
 		NFS_BITMAP_SET(entry_attrs, NFS_FATTR_FILEID);
@@ -699,78 +700,89 @@ nfs4_readdir_rpc(nfsnode_t dnp, struct uio *uiop, vfs_context_t ctx)
 	/* XXX NFS_BITMAP_SET(entry_attrs, NFS_FATTR_MOUNTED_ON_FILEID); */
 	NFS_BITMAP_SET(entry_attrs, NFS_FATTR_RDATTR_ERROR);
 
-	if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_SHARED)))
+	/* lock to protect access to cookie verifier */
+	if ((lockerror = nfs_node_lock(dnp)))
 		return (lockerror);
 
-	/*
-	 * If there is no cookie, assume directory was stale.
-	 */
-	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
-	if (cookiep)
-		cookie = *cookiep;
-	else {
-		nfs_unlock(dnp);
-		return (NFSERR_BAD_COOKIE);
+	/* determine cookie to use, and move dp to the right offset */
+	ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
+	dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
+	if (ndbhp->ndbh_count) {
+		for (i=0; i < ndbhp->ndbh_count-1; i++)
+			dp = NFS_DIRENTRY_NEXT(dp);
+		cookie = dp->d_seekoff;
+		dp = NFS_DIRENTRY_NEXT(dp);
+	} else {
+		cookie = bp->nb_lblkno;
+		/* increment with every buffer read */
+		OSAddAtomic(1, &nfsstats.readdir_bios);
 	}
+	lastcookie = cookie;
 
 	/*
-	 * The NFS client is responsible for the "." and ".."
-	 * entries in the directory.  So, we put them at the top.
+	 * The NFS client is responsible for the "." and ".." entries in the
+	 * directory.  So, we put them at the start of the first buffer.
 	 */
-	if ((uiop->uio_offset == 0) &&
-	    ((2*(4 + DIRHDSIZ)) <= uio_uio_resid(uiop))) {
-		/* add "." entry */
-		len = 2;
-		tlen = nfsm_rndup(len);
-		// LP64todo - fix this!
-		dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
+	if ((bp->nb_lblkno == 0) && (ndbhp->ndbh_count == 0)) {
+		fh.fh_len = 0;
+		fhlen = rdirplus ? fh.fh_len + 1 : 0;
+		xlen = rdirplus ? (fhlen + sizeof(time_t)) : 0;
+		/* "." */
+		namlen = 1;
+		reclen = NFS_DIRENTRY_LEN(namlen + xlen);
+		if (xlen)
+			bzero(&dp->d_name[namlen+1], xlen);
+		dp->d_namlen = namlen;
+		strlcpy(dp->d_name, ".", namlen+1);
 		dp->d_fileno = dnp->n_vattr.nva_fileid;
-		dp->d_namlen = len;
-		dp->d_reclen = tlen + DIRHDSIZ;
 		dp->d_type = DT_DIR;
-		strlcpy(dp->d_name, ".", len);
-		blksiz += dp->d_reclen;
-		if (blksiz == DIRBLKSIZ)
-			blksiz = 0;
-		uiop->uio_offset += DIRHDSIZ + tlen;
-		uio_iov_base_add(uiop, DIRHDSIZ + tlen);
-		uio_uio_resid_add(uiop, -(DIRHDSIZ + tlen));
-		uio_iov_len_add(uiop, -(DIRHDSIZ + tlen));
-		/* add ".." entry */
-		len = 3;
-		tlen = nfsm_rndup(len);
-		// LP64todo - fix this!
-		dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
+		dp->d_reclen = reclen;
+		dp->d_seekoff = 1;
+		padstart = dp->d_name + dp->d_namlen + 1 + xlen;
+		dp = NFS_DIRENTRY_NEXT(dp);
+		padlen = (char*)dp - padstart;
+		if (padlen > 0)
+			bzero(padstart, padlen);
+		if (rdirplus) /* zero out attributes */
+			bzero(NFS_DIR_BUF_NVATTR(bp, 0), sizeof(struct nfs_vattr));
+
+		/* ".." */
+		namlen = 2;
+		reclen = NFS_DIRENTRY_LEN(namlen + xlen);
+		if (xlen)
+			bzero(&dp->d_name[namlen+1], xlen);
+		dp->d_namlen = namlen;
+		strlcpy(dp->d_name, "..", namlen+1);
 		if (dnp->n_parent)
 			dp->d_fileno = VTONFS(dnp->n_parent)->n_vattr.nva_fileid;
 		else
 			dp->d_fileno = dnp->n_vattr.nva_fileid;
-		dp->d_namlen = len;
-		dp->d_reclen = tlen + DIRHDSIZ;
 		dp->d_type = DT_DIR;
-		strlcpy(dp->d_name, "..", len);
-		blksiz += dp->d_reclen;
-		if (blksiz == DIRBLKSIZ)
-			blksiz = 0;
-		uiop->uio_offset += DIRHDSIZ + tlen;
-		uio_iov_base_add(uiop, DIRHDSIZ + tlen);
-		uio_uio_resid_add(uiop, -(DIRHDSIZ + tlen));
-		uio_iov_len_add(uiop, -(DIRHDSIZ + tlen));
-		cookie.nfsuquad[0] = 0;
-		cookie.nfsuquad[1] = 2;
+		dp->d_reclen = reclen;
+		dp->d_seekoff = 2;
+		padstart = dp->d_name + dp->d_namlen + 1 + xlen;
+		dp = NFS_DIRENTRY_NEXT(dp);
+		padlen = (char*)dp - padstart;
+		if (padlen > 0)
+			bzero(padstart, padlen);
+		if (rdirplus) /* zero out attributes */
+			bzero(NFS_DIR_BUF_NVATTR(bp, 1), sizeof(struct nfs_vattr));
+
+		ndbhp->ndbh_entry_end = (char*)dp - bp->nb_data;
+		ndbhp->ndbh_count = 2;
 	}
 
 	/*
-	 * Loop around doing readdir rpc's of size nm_readdirsize
-	 * truncated to a multiple of DIRBLKSIZ.
-	 * The stopping criteria is EOF or buffer full.
+	 * Loop around doing readdir(plus) RPCs of size nm_readdirsize until
+	 * the buffer is full (or we hit EOF).  Then put the remainder of the
+	 * results in the next buffer(s).
 	 */
-	while (more_entries && bigenough) {
-		nfsm_chain_null(&nmreq);
-		nfsm_chain_null(&nmrep);
-		nfsm_assert(error, NFSTONMP(dnp), ENXIO);
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
+	while (nfs_dir_buf_freespace(bp, rdirplus) && !(ndbhp->ndbh_flags & NDB_FULL)) {
 
-		numops = 3; // PUTFH + GETATTR + READDIR
+		// PUTFH, GETATTR, READDIR
+		numops = 3;
 		nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED);
 		nfsm_chain_add_compound_header(error, &nmreq, tag, numops);
 		numops--;
@@ -782,195 +794,203 @@ nfs4_readdir_rpc(nfsnode_t dnp, struct uio *uiop, vfs_context_t ctx)
 			NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
 		numops--;
 		nfsm_chain_add_32(error, &nmreq, NFS_OP_READDIR);
-		/* opaque values don't need swapping, but as long */
-		/* as we are consistent about it, it should be ok */
-		nfsm_chain_add_32(error, &nmreq, cookie.nfsuquad[0]);
-		if ((cookie.nfsuquad[0] == 0) && (cookie.nfsuquad[1] <= 2))
-			nfsm_chain_add_32(error, &nmreq, 0);
-		else
-			nfsm_chain_add_32(error, &nmreq, cookie.nfsuquad[1]);
-		nfsm_chain_add_32(error, &nmreq, dnp->n_cookieverf.nfsuquad[0]);
-		nfsm_chain_add_32(error, &nmreq, dnp->n_cookieverf.nfsuquad[1]);
+		nfsm_chain_add_64(error, &nmreq, (cookie <= 2) ? 0 : cookie);
+		nfsm_chain_add_64(error, &nmreq, dnp->n_cookieverf);
 		nfsm_chain_add_32(error, &nmreq, nmreaddirsize);
 		nfsm_chain_add_32(error, &nmreq, nmrsize);
 		nfsm_chain_add_bitmap(error, &nmreq, entry_attrs, NFS_ATTR_BITMAP_LEN);
 		nfsm_chain_build_done(error, &nmreq);
 		nfsm_assert(error, (numops == 0), EPROTO);
-		nfs_unlock(dnp);
+		nfs_node_unlock(dnp);
 		nfsmout_if(error);
 		error = nfs_request(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
 
-		if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE)))
+		if ((lockerror = nfs_node_lock(dnp)))
 			error = lockerror;
-		savexid = xid;
+
+		savedxid = xid;
 		nfsm_chain_skip_tag(error, &nmrep);
 		nfsm_chain_get_32(error, &nmrep, numops);
 		nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
 		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
 		nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, NULL, &xid);
 		nfsm_chain_op_check(error, &nmrep, NFS_OP_READDIR);
-		nfsm_chain_get_32(error, &nmrep, dnp->n_cookieverf.nfsuquad[0]);
-		nfsm_chain_get_32(error, &nmrep, dnp->n_cookieverf.nfsuquad[1]);
+		nfsm_chain_get_64(error, &nmrep, dnp->n_cookieverf);
 		nfsm_chain_get_32(error, &nmrep, more_entries);
-		nfs_unlock(dnp);
+
+		if (!lockerror) {
+			nfs_node_unlock(dnp);
+			lockerror = ENOENT;
+		}
 		nfsmout_if(error);
 
-		/* Loop through the entries, massaging them into "dirent" form. */
-		/* If READDIRPLUS, also create the vnodes. */
-		while (more_entries && bigenough) {
+		if (rdirplus)
+			microuptime(&now);
+
+		/* loop through the entries packing them into the buffer */
+		while (more_entries) {
 			/* Entry: COOKIE, NAME, FATTR */
-			nfsm_chain_get_32(error, &nmrep, cookie.nfsuquad[0]);
-			nfsm_chain_get_32(error, &nmrep, cookie.nfsuquad[1]);
-			nfsm_chain_get_32(error, &nmrep, len);
+			nfsm_chain_get_64(error, &nmrep, cookie);
+			nfsm_chain_get_32(error, &nmrep, namlen);
 			nfsmout_if(error);
-			/* Note: NFS supports longer names, but struct dirent doesn't */
-			/* so we just truncate the names to fit */
-			if (len <= 0) {
+			if (!bigcookies && (cookie >> 32) && (nmp == NFSTONMP(dnp))) {
+				/* we've got a big cookie, make sure flag is set */
+				lck_mtx_lock(&nmp->nm_lock);
+				nmp->nm_state |= NFSSTA_BIGCOOKIES;
+				lck_mtx_unlock(&nmp->nm_lock);
+				bigcookies = 1;
+			}
+			/* just truncate names that don't fit in direntry.d_name */
+			if (namlen <= 0) {
 				error = EBADRPC;
 				goto nfsmout;
 			}
-			if (len > MAXNAMLEN) {
-				skiplen = len - MAXNAMLEN;
-				len = MAXNAMLEN;
+			if (namlen > (sizeof(dp->d_name)-1)) {
+				skiplen = namlen - sizeof(dp->d_name) + 1;
+				namlen = sizeof(dp->d_name) - 1;
 			} else {
 				skiplen = 0;
 			}
-			tlen = nfsm_rndup(len);
-			if (tlen == len)
-				tlen += 4;	/* To ensure null termination */
-			left = DIRBLKSIZ - blksiz;
-			if ((tlen + DIRHDSIZ) > left) {
-				dp->d_reclen += left;
-				uio_iov_base_add(uiop, left);
-				uio_iov_len_add(uiop, -left);
-				uiop->uio_offset += left;
-				uio_uio_resid_add(uiop, -left);
-				blksiz = 0;
-			}
-			if ((tlen + DIRHDSIZ) > uio_uio_resid(uiop)) {
-				bigenough = 0;
-				break;
+			/* guess that fh size will be same as parent */
+			fhlen = rdirplus ? (1 + dnp->n_fhsize) : 0;
+			xlen = rdirplus ? (fhlen + sizeof(time_t)) : 0;
+			attrlen = rdirplus ? sizeof(struct nfs_vattr) : 0;
+			reclen = NFS_DIRENTRY_LEN(namlen + xlen);
+			space_needed = reclen + attrlen;
+			space_free = nfs_dir_buf_freespace(bp, rdirplus);
+			if (space_needed > space_free) {
+				/*
+				 * We still have entries to pack, but we've
+				 * run out of room in the current buffer.
+				 * So we need to move to the next buffer.
+				 * The block# for the next buffer is the
+				 * last cookie in the current buffer.
+				 */
+nextbuffer:
+				ndbhp->ndbh_flags |= NDB_FULL;
+				nfs_buf_release(bp, 0);
+				bp_dropped = 1;
+				bp = NULL;
+				error = nfs_buf_get(dnp, lastcookie, NFS_DIRBLKSIZ, vfs_context_thread(ctx), NBLK_READ, &bp);
+				nfsmout_if(error);
+				/* initialize buffer */
+				ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
+				ndbhp->ndbh_flags = 0;
+				ndbhp->ndbh_count = 0;
+				ndbhp->ndbh_entry_end = sizeof(*ndbhp);
+				ndbhp->ndbh_ncgen = dnp->n_ncgen;
+				space_free = nfs_dir_buf_freespace(bp, rdirplus);
+				dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
+				/* increment with every buffer read */
+				OSAddAtomic(1, &nfsstats.readdir_bios);
 			}
-			// LP64todo - fix this!
-			dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
-			dp->d_fileno = 0;
-			dp->d_namlen = len;
-			dp->d_reclen = tlen + DIRHDSIZ;
+			nmrepsave = nmrep;
+			dp->d_fileno = cookie; /* placeholder */
+			dp->d_seekoff = cookie;
+			dp->d_namlen = namlen;
+			dp->d_reclen = reclen;
 			dp->d_type = DT_UNKNOWN;
-			blksiz += dp->d_reclen;
-			if (blksiz == DIRBLKSIZ)
-				blksiz = 0;
-			uiop->uio_offset += DIRHDSIZ;
-#if LP64KERN
-			uio_uio_resid_add(uiop, -((int64_t)DIRHDSIZ));
-			uio_iov_len_add(uiop, -((int64_t)DIRHDSIZ));
-#else
-			uio_uio_resid_add(uiop, -((int)DIRHDSIZ));
-			uio_iov_len_add(uiop, -((int)DIRHDSIZ));
-#endif
-			uio_iov_base_add(uiop, DIRHDSIZ);
-			// LP64todo - fix this!
-			cnp->cn_nameptr = CAST_DOWN(caddr_t, uio_iov_base(uiop));
-			cnp->cn_namelen = len;
-			error = nfsm_chain_get_uio(&nmrep, len, uiop);
+			nfsm_chain_get_opaque(error, &nmrep, namlen, dp->d_name);
+			nfsmout_if(error);
+			dp->d_name[namlen] = '\0';
 			if (skiplen)
 				nfsm_chain_adv(error, &nmrep,
-					nfsm_rndup(len + skiplen) - nfsm_rndup(len));
+					nfsm_rndup(namlen + skiplen) - nfsm_rndup(namlen));
 			nfsmout_if(error);
-			NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap);
-			error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL);
-			if (error && NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_RDATTR_ERROR)) {
+			nvattrp = rdirplus ? NFS_DIR_BUF_NVATTR(bp, ndbhp->ndbh_count) : &nvattr;
+			NFS_CLEAR_ATTRIBUTES(nvattrp->nva_bitmap);
+			error = nfs4_parsefattr(&nmrep, NULL, nvattrp, &fh, NULL);
+			if (error && NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_RDATTR_ERROR)) {
 				/* OK, we didn't get attributes, whatever... */
-				NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap);
+				if (rdirplus) /* mark the attributes invalid */
+					bzero(nvattrp, sizeof(struct nfs_vattr));
+				else
+					NFS_CLEAR_ATTRIBUTES(nvattrp->nva_bitmap);
 				error = 0;
 			}
+			/* check for more entries after this one */
 			nfsm_chain_get_32(error, &nmrep, more_entries);
 			nfsmout_if(error);
 
-			cp = CAST_DOWN(caddr_t, uio_iov_base(uiop));
-			tlen -= len;
-			*cp = '\0';
-			uio_iov_base_add(uiop, tlen);
-			uio_iov_len_add(uiop, -tlen);
-			uiop->uio_offset += tlen;
-			uio_uio_resid_add(uiop, -tlen);
-
-			/*
-			 * Skip any "." and ".." entries returned from server.
-			 * (Actually, just leave it in place with d_fileno == 0.)
-			 */
-			if ((cnp->cn_nameptr[0] == '.') &&
-			    ((len == 1) || ((len == 2) && (cnp->cn_nameptr[1] == '.')))) {
-				/* clear the name too */
-				dp->d_namlen = 0;
-				dp->d_name[0] = '\0';
+			/* Skip any "." and ".." entries returned from server. */
+			if ((dp->d_name[0] == '.') && ((namlen == 1) || ((namlen == 2) && (dp->d_name[1] == '.')))) {
+				lastcookie = cookie;
 				continue;
 			}
 
-			if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_TYPE))
-				dp->d_type = IFTODT(VTTOIF(nvattr.nva_type));
-			if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEID))
-				dp->d_fileno = (int)nvattr.nva_fileid;
-			if (rdirplus && NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE) &&
-			    !NFS_CMPFH(dnp, fh.fh_data, fh.fh_len)) {
-				cnp->cn_hash = 0;
-				error = nfs_nget(NFSTOMP(dnp), dnp, cnp,
-						fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np);
-				if (!error) {
-					nfs_unlock(np);
-					vnode_put(NFSTOV(np));
+			if (NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_TYPE))
+				dp->d_type = IFTODT(VTTOIF(nvattrp->nva_type));
+			if (NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_FILEID))
+				dp->d_fileno = nvattrp->nva_fileid;
+			if (rdirplus) {
+				/* fileid is already in d_fileno, so stash xid in attrs */
+				nvattrp->nva_fileid = savedxid;
+				if (NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_FILEHANDLE)) {
+					fhlen = fh.fh_len + 1;
+					xlen = fhlen + sizeof(time_t);
+					reclen = NFS_DIRENTRY_LEN(namlen + xlen);
+					space_needed = reclen + attrlen;
+					if (space_needed > space_free) {
+						/* didn't actually have the room... move on to next buffer */
+						nmrep = nmrepsave;
+						goto nextbuffer;
+					}
+					/* pack the file handle into the record */
+					dp->d_name[dp->d_namlen+1] = fh.fh_len;
+					bcopy(fh.fh_data, &dp->d_name[dp->d_namlen+2], fh.fh_len);
+				} else {
+					/* mark the file handle invalid */
+					fh.fh_len = 0;
+					fhlen = fh.fh_len + 1;
+					xlen = fhlen + sizeof(time_t);
+					reclen = NFS_DIRENTRY_LEN(namlen + xlen);
+					bzero(&dp->d_name[dp->d_namlen+1], fhlen);
 				}
+				*(time_t*)(&dp->d_name[dp->d_namlen+1+fhlen]) = now.tv_sec;
+				dp->d_reclen = reclen;
 			}
-			nfsmout_if(error);
+			padstart = dp->d_name + dp->d_namlen + 1 + xlen;
+			ndbhp->ndbh_count++;
+			lastcookie = cookie;
+
+			/* advance to next direntry in buffer */
+			dp = NFS_DIRENTRY_NEXT(dp);
+			ndbhp->ndbh_entry_end = (char*)dp - bp->nb_data;
+			/* zero out the pad bytes */
+			padlen = (char*)dp - padstart;
+			if (padlen > 0)
+				bzero(padstart, padlen);
+		}
+		/* Finally, get the eof boolean */
+		nfsm_chain_get_32(error, &nmrep, eof);
+		nfsmout_if(error);
+		if (eof) {
+			ndbhp->ndbh_flags |= (NDB_FULL|NDB_EOF);
+			nfs_node_lock_force(dnp);
+			dnp->n_eofcookie = lastcookie;
+			nfs_node_unlock(dnp);
+		} else {
+			more_entries = 1;
 		}
-		/* If at end of rpc data, get the eof boolean */
-		if (!more_entries) {
-			nfsm_chain_get_32(error, &nmrep, eof);
-			if (!error)
-				more_entries = (eof == 0);
+		if (bp_dropped) {
+			nfs_buf_release(bp, 0);
+			bp = NULL;
+			break;
 		}
-		if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_SHARED)))
+		if ((lockerror = nfs_node_lock(dnp)))
 			error = lockerror;
 		nfsmout_if(error);
 		nfsm_chain_cleanup(&nmrep);
+		nfsm_chain_null(&nmreq);
 	}
-	nfs_unlock(dnp);
-	/*
-	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
-	 * by increasing d_reclen for the last record.
-	 */
-	if (blksiz > 0) {
-		left = DIRBLKSIZ - blksiz;
-		dp->d_reclen += left;
-		uio_iov_base_add(uiop, left);
-		uio_iov_len_add(uiop, -left);
-		uiop->uio_offset += left;
-		uio_uio_resid_add(uiop, -left);
-	}
-
-	if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE)))
-		error = lockerror;
-	nfsmout_if(error);
-
-	/*
-	 * We are now either at the end of the directory or have filled the
-	 * block.
-	 */
-	if (bigenough)
-		dnp->n_direofoffset = uiop->uio_offset;
-	else {
-		if (uio_uio_resid(uiop) > 0)
-			printf("EEK! nfs4_readdir_rpc resid > 0\n");
-		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
-		if (cookiep)
-			*cookiep = cookie;
-	}
-
-	nfs_unlock(dnp);
 nfsmout:
+	if (bp_dropped && bp)
+		nfs_buf_release(bp, 0);
+	if (!lockerror)
+		nfs_node_unlock(dnp);
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
-	return (error);
+	return (bp_dropped ? NFSERR_DIRBUFDROPPED : error);
 }
 
 int
@@ -1041,7 +1061,7 @@ nfs4_lookup_rpc_async_finish(
 	fhandle_t *fhp,
 	struct nfs_vattr *nvap)
 {
-	int error = 0, status, nfsvers, numops;
+	int error = 0, lockerror = ENOENT, status, nfsvers, numops;
 	uint32_t val = 0;
 	u_int64_t xid;
 	struct nfsmount *nmp;
@@ -1054,6 +1074,8 @@ nfs4_lookup_rpc_async_finish(
 
 	error = nfs_request_async_finish(req, &nmrep, &xid, &status);
 
+	if ((lockerror = nfs_node_lock(dnp)))
+		error = lockerror;
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
@@ -1078,6 +1100,8 @@ nfs4_lookup_rpc_async_finish(
 		goto nfsmout;
 	}
 nfsmout:
+	if (!lockerror)
+		nfs_node_unlock(dnp);
 	nfsm_chain_cleanup(&nmrep);
 	return (error);
 }
@@ -1132,7 +1156,7 @@ nfs4_commit_rpc(
 	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND,
 			current_thread(), cred, 0, &nmrep, &xid, &status);
 
-	if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
+	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
@@ -1142,7 +1166,7 @@ nfs4_commit_rpc(
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
 	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
 	if (!lockerror)
-		nfs_unlock(np);
+		nfs_node_unlock(np);
 	nfsmout_if(error);
 	lck_mtx_lock(&nmp->nm_lock);
 	if (nmp->nm_verf != wverf) {
@@ -1177,7 +1201,8 @@ nfs4_pathconf_rpc(
 	nfsm_chain_null(&nmrep);
 
 	/* NFSv4: fetch "pathconf" info for this node */
-	numops = 2; // PUTFH + GETATTR
+	// PUTFH, GETATTR
+	numops = 2;
 	nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED);
 	nfsm_chain_add_compound_header(error, &nmreq, "pathconf", numops);
 	numops--;
@@ -1207,11 +1232,12 @@ nfs4_pathconf_rpc(
 	NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap);
 	error = nfs4_parsefattr(&nmrep, nfsap, &nvattr, NULL, NULL);
 	nfsmout_if(error);
-	if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
+	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
-	nfs_loadattrcache(np, &nvattr, &xid, 0);
+	if (!error)
+		nfs_loadattrcache(np, &nvattr, &xid, 0);
 	if (!lockerror)
-		nfs_unlock(np);
+		nfs_node_unlock(np);
 nfsmout:
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
@@ -1231,7 +1257,7 @@ nfs4_vnop_getattr(
 	struct nfs_vattr nva;
 	int error;
 
-	error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, 0);
+	error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, NGA_CACHED);
 	if (error)
 		return (error);
 
@@ -1308,14 +1334,14 @@ int
 nfs4_setattr_rpc(
 	nfsnode_t np,
 	struct vnode_attr *vap,
-	vfs_context_t ctx,
-	int alreadylocked)
+	vfs_context_t ctx)
 {
 	struct nfsmount *nmp = NFSTONMP(np);
 	int error = 0, lockerror = ENOENT, status, nfsvers, numops;
-	u_int64_t xid;
+	u_int64_t xid, nextxid;
 	struct nfsm_chain nmreq, nmrep;
-	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen, stateid;
+	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
+	nfs_stateid stateid;
 
 	if (!nmp)
 		return (ENXIO);
@@ -1342,13 +1368,10 @@ nfs4_setattr_rpc(
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_SETATTR);
 	if (VATTR_IS_ACTIVE(vap, va_data_size))
-		stateid = 0xffffffff; /* XXX use the special stateid for now */
+		nfs_get_stateid(np, vfs_context_thread(ctx), vfs_context_ucred(ctx), &stateid);
 	else
-		stateid = 0;
-	nfsm_chain_add_32(error, &nmreq, stateid);
-	nfsm_chain_add_32(error, &nmreq, stateid);
-	nfsm_chain_add_32(error, &nmreq, stateid);
-	nfsm_chain_add_32(error, &nmreq, stateid);
+		stateid.seqid = stateid.other[0] = stateid.other[1] = stateid.other[2] = 0;
+	nfsm_chain_add_stateid(error, &nmreq, &stateid);
 	nfsm_chain_add_fattr4(error, &nmreq, vap, nmp);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
@@ -1359,7 +1382,7 @@ nfs4_setattr_rpc(
 	nfsmout_if(error);
 	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
 
-	if (!alreadylocked && ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))))
+	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
@@ -1373,333 +1396,3618 @@ nfs4_setattr_rpc(
 	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
 	if (error)
 		NATTRINVALIDATE(np);
+	/*
+	 * We just changed the attributes and we want to make sure that we
+	 * see the latest attributes.  Get the next XID.  If it's not the
+	 * next XID after the SETATTR XID, then it's possible that another
+	 * RPC was in flight at the same time and it might put stale attributes
+	 * in the cache.  In that case, we invalidate the attributes and set
+	 * the attribute cache XID to guarantee that newer attributes will
+	 * get loaded next.
+	 */
+	nextxid = 0;
+	nfs_get_xid(&nextxid);
+	if (nextxid != (xid + 1)) {
+		np->n_xid = nextxid;
+		NATTRINVALIDATE(np);
+	}
 nfsmout:
-	if (!alreadylocked && !lockerror)
-		nfs_unlock(np);
+	if (!lockerror)
+		nfs_node_unlock(np);
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
 	return (error);
 }
 
+/*
+ * Wait for any pending recovery to complete.
+ */
 int
-nfs4_vnop_open(struct vnop_open_args *ap)
+nfs_mount_state_wait_for_recovery(struct nfsmount *nmp)
 {
-	return nfs3_vnop_open(ap);
+	struct timespec ts = { 1, 0 };
+	int error = 0, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
+
+	lck_mtx_lock(&nmp->nm_lock);
+	while (nmp->nm_state & NFSSTA_RECOVER) {
+		if ((error = nfs_sigintr(nmp, NULL, current_thread(), 1)))
+			break;
+		nfs_mount_sock_thread_wake(nmp);
+		msleep(&nmp->nm_state, &nmp->nm_lock, slpflag|(PZERO-1), "nfsrecoverwait", &ts);
+	}
+	lck_mtx_unlock(&nmp->nm_lock);
+
+	return (error);
 }
 
+/*
+ * We're about to use/manipulate NFS mount's open/lock state.
+ * Wait for any pending state recovery to complete, then
+ * mark the state as being in use (which will hold off
+ * the recovery thread until we're done).
+ */
 int
-nfs4_vnop_close(struct vnop_close_args *ap)
+nfs_mount_state_in_use_start(struct nfsmount *nmp)
 {
-	return nfs3_vnop_close(ap);
+	struct timespec ts = { 1, 0 };
+	int error = 0, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
+
+	if (!nmp)
+		return (ENXIO);
+	lck_mtx_lock(&nmp->nm_lock);
+	while (nmp->nm_state & NFSSTA_RECOVER) {
+		if ((error = nfs_sigintr(nmp, NULL, current_thread(), 1)))
+			break;
+		nfs_mount_sock_thread_wake(nmp);
+		msleep(&nmp->nm_state, &nmp->nm_lock, slpflag|(PZERO-1), "nfsrecoverwait", &ts);
+	}
+	if (!error)
+		nmp->nm_stateinuse++;
+	lck_mtx_unlock(&nmp->nm_lock);
+
+	return (error);
 }
 
+/*
+ * We're done using/manipulating the NFS mount's open/lock
+ * state.  If the given error indicates that recovery should
+ * be performed, we'll initiate recovery.
+ */
 int
-nfs4_vnop_advlock(__unused struct vnop_advlock_args *ap)
+nfs_mount_state_in_use_end(struct nfsmount *nmp, int error)
 {
-	return (ENOSYS);
+	int restart = nfs_mount_state_error_should_restart(error);
+
+	if (!nmp)
+		return (restart);
+	lck_mtx_lock(&nmp->nm_lock);
+	if (restart && (error != NFSERR_OLD_STATEID) && (error != NFSERR_GRACE)) {
+		if (!(nmp->nm_state & NFSSTA_RECOVER)) {
+			printf("nfs_mount_state_in_use_end: error %d, initiating recovery\n", error);
+			nmp->nm_state |= NFSSTA_RECOVER;
+			nfs_mount_sock_thread_wake(nmp);
+		}
+	}
+	if (nmp->nm_stateinuse > 0)
+		nmp->nm_stateinuse--;
+	else
+		panic("NFS mount state in use count underrun");
+	if (!nmp->nm_stateinuse && (nmp->nm_state & NFSSTA_RECOVER))
+		wakeup(&nmp->nm_stateinuse);
+	lck_mtx_unlock(&nmp->nm_lock);
+	if (error == NFSERR_GRACE)
+		tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
+
+	return (restart);
 }
 
 /*
- * Note: the NFSv4 CREATE RPC is for everything EXCEPT regular files.
- * Files are created using the NFSv4 OPEN RPC.  So we must open the
- * file to create it and then close it immediately.
+ * Does the error mean we should restart/redo a state-related operation?
  */
 int
-nfs4_vnop_create(
-	struct vnop_create_args /* {
-		struct vnodeop_desc *a_desc;
-		vnode_t a_dvp;
-		vnode_t *a_vpp;
-		struct componentname *a_cnp;
-		struct vnode_attr *a_vap;
-		vfs_context_t a_context;
-	} */ *ap)
+nfs_mount_state_error_should_restart(int error)
 {
-	vfs_context_t ctx = ap->a_context;
-	struct componentname *cnp = ap->a_cnp;
-	struct vnode_attr *vap = ap->a_vap;
-	vnode_t dvp = ap->a_dvp;
-	vnode_t *vpp = ap->a_vpp;
-	struct nfsmount *nmp;
-	struct nfs_vattr nvattr, dnvattr;
-	int error = 0, create_error = EIO, lockerror = ENOENT, status;
-	int nfsvers, numops;
-	u_int64_t xid, savedxid = 0;
-	nfsnode_t dnp = VTONFS(dvp);
-	nfsnode_t np = NULL;
-	vnode_t newvp = NULL;
-	struct nfsm_chain nmreq, nmrep;
-	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
-	uint32_t seqid, stateid[4], rflags, delegation, val;
-	fhandle_t fh;
-	struct nfsreq *req = NULL;
-	struct nfs_dulookup dul;
+	switch (error) {
+	case NFSERR_STALE_STATEID:
+	case NFSERR_STALE_CLIENTID:
+	case NFSERR_ADMIN_REVOKED:
+	case NFSERR_EXPIRED:
+	case NFSERR_OLD_STATEID:
+	case NFSERR_BAD_STATEID:
+	case NFSERR_GRACE:
+		return (1);
+	}
+	return (0);
+}
 
-	static uint32_t nfs4_open_owner_hack = 0;
+/*
+ * In some cases we may want to limit how many times we restart a
+ * state-related operation - e.g. we're repeatedly getting NFSERR_GRACE.
+ * Base the limit on the lease (as long as it's not too short).
+ */
+uint
+nfs_mount_state_max_restarts(struct nfsmount *nmp)
+{
+	return (MAX(nmp->nm_fsattr.nfsa_lease, 60));
+}
 
-	nmp = VTONMP(dvp);
+
+/*
+ * Mark an NFS node's open state as busy.
+ */
+int
+nfs_open_state_set_busy(nfsnode_t np, vfs_context_t ctx)
+{
+	struct nfsmount *nmp;
+	thread_t thd = vfs_context_thread(ctx);
+	struct timespec ts = {2, 0};
+	int error = 0, slpflag;
+
+	nmp = NFSTONMP(np);
 	if (!nmp)
 		return (ENXIO);
-	nfsvers = nmp->nm_vers;
+	slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
 
-	seqid = stateid[0] = stateid[1] = stateid[2] = stateid[3] = 0;
-	rflags = 0;
+	lck_mtx_lock(&np->n_openlock);
+	while (np->n_openflags & N_OPENBUSY) {
+		if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
+			break;
+		np->n_openflags |= N_OPENWANT;
+		msleep(&np->n_openflags, &np->n_openlock, slpflag, "nfs_open_state_set_busy", &ts);
+	}
+	if (!error)
+		np->n_openflags |= N_OPENBUSY;
+	lck_mtx_unlock(&np->n_openlock);
 
-	nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen);
+	return (error);
+}
 
-	nfsm_chain_null(&nmreq);
-	nfsm_chain_null(&nmrep);
+/*
+ * Clear an NFS node's open state busy flag and wake up
+ * anyone wanting it.
+ */
+void
+nfs_open_state_clear_busy(nfsnode_t np)
+{
+	int wanted;
+
+	lck_mtx_lock(&np->n_openlock);
+	if (!(np->n_openflags & N_OPENBUSY))
+		panic("nfs_open_state_clear_busy");
+	wanted = (np->n_openflags & N_OPENWANT);
+	np->n_openflags &= ~(N_OPENBUSY|N_OPENWANT);
+	lck_mtx_unlock(&np->n_openlock);
+	if (wanted)
+		wakeup(&np->n_openflags);
+}
 
-	// PUTFH, SAVEFH, OPEN(CREATE), GETATTR(FH), RESTOREFH, GETATTR
-	numops = 6;
-	nfsm_chain_build_alloc_init(error, &nmreq, 53 * NFSX_UNSIGNED + cnp->cn_namelen);
-	nfsm_chain_add_compound_header(error, &nmreq, "create", numops);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
-	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN);
-	nfsm_chain_add_32(error, &nmreq, seqid);
-	seqid++;
-	nfsm_chain_add_32(error, &nmreq, NFS_OPEN_SHARE_ACCESS_BOTH);
-	nfsm_chain_add_32(error, &nmreq, NFS_OPEN_SHARE_DENY_NONE);
-	nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid); // open_owner4.clientid
-	OSAddAtomic(1, (SInt32*)&nfs4_open_owner_hack);
-	nfsm_chain_add_32(error, &nmreq, sizeof(nfs4_open_owner_hack));
-	nfsm_chain_add_opaque(error, &nmreq, &nfs4_open_owner_hack, sizeof(nfs4_open_owner_hack)); // open_owner4.owner
-	// openflag4
-	nfsm_chain_add_32(error, &nmreq, NFS_OPEN_CREATE);
-	nfsm_chain_add_32(error, &nmreq, NFS_CREATE_UNCHECKED); // XXX exclusive/guarded
-	nfsm_chain_add_fattr4(error, &nmreq, vap, nmp);
-	// open_claim4
-	nfsm_chain_add_32(error, &nmreq, NFS_CLAIM_NULL);
-	nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
-	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
-	nfsm_chain_build_done(error, &nmreq);
-	nfsm_assert(error, (numops == 0), EPROTO);
-	nfsmout_if(error);
-	if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE)))
-		error = lockerror;
-	nfsmout_if(error);
+/*
+ * Search a mount's open owner list for the owner for this credential.
+ * If not found and "alloc" is set, then allocate a new one.
+ */
+struct nfs_open_owner *
+nfs_open_owner_find(struct nfsmount *nmp, kauth_cred_t cred, int alloc)
+{
+	uid_t uid = kauth_cred_getuid(cred);
+	struct nfs_open_owner *noop, *newnoop = NULL;
 
-	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC4_COMPOUND,
-			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
-	if (!error) {
-		nfs_dulookup_start(&dul, dnp, ctx);
-		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+tryagain:
+	lck_mtx_lock(&nmp->nm_lock);
+	TAILQ_FOREACH(noop, &nmp->nm_open_owners, noo_link) {
+		if (kauth_cred_getuid(noop->noo_cred) == uid)
+			break;
 	}
-	savedxid = xid;
 
-	nfsm_chain_skip_tag(error, &nmrep);
-	nfsm_chain_get_32(error, &nmrep, numops);
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH);
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN);
-	nfsm_chain_get_32(error, &nmrep, stateid[0]);
-	nfsm_chain_get_32(error, &nmrep, stateid[1]);
-	nfsm_chain_get_32(error, &nmrep, stateid[2]);
-	nfsm_chain_get_32(error, &nmrep, stateid[3]);
-	nfsm_chain_check_change_info(error, &nmrep, dnp);
-	nfsm_chain_get_32(error, &nmrep, rflags);
-	bmlen = NFS_ATTR_BITMAP_LEN;
-	nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
-	nfsm_chain_get_32(error, &nmrep, delegation);
-	if (!error)
-		switch (delegation) {
-		case NFS_OPEN_DELEGATE_NONE:
-			break;
-		case NFS_OPEN_DELEGATE_READ:
-			printf("nfs4_vnop_create: read delegation?\n");
-			nfsm_chain_adv(error, &nmrep, 5*NFSX_UNSIGNED);
-			// ACE:
-			nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
-			nfsm_chain_get_32(error, &nmrep, val); /* string length */
-			nfsm_chain_adv(error, &nmrep, nfsm_rndup(val));
-			break;
-		case NFS_OPEN_DELEGATE_WRITE:
-			printf("nfs4_vnop_create: write delegation?\n");
-			nfsm_chain_adv(error, &nmrep, 5*NFSX_UNSIGNED);
-			nfsm_chain_adv(error, &nmrep, 3*NFSX_UNSIGNED);
-			// ACE:
-			nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
-			nfsm_chain_get_32(error, &nmrep, val); /* string length */
-			nfsm_chain_adv(error, &nmrep, nfsm_rndup(val));
-			break;
-		default:
-			error = EBADRPC;
-			break;
-		}
-	/* At this point if we have no error, the object was created. */
-	/* if we don't get attributes, then we should lookitup. */
-	create_error = error;
-	nfsmout_if(error);
-	nfs_vattr_set_supported(bitmap, vap);
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	nfsmout_if(error);
-	NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap);
-	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL);
-	nfsmout_if(error);
-	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
-		printf("nfs: open/create didn't return filehandle?\n");
+	if (!noop && !newnoop && alloc) {
+		lck_mtx_unlock(&nmp->nm_lock);
+		MALLOC(newnoop, struct nfs_open_owner *, sizeof(struct nfs_open_owner), M_TEMP, M_WAITOK);
+		if (!newnoop)
+			return (NULL);
+		bzero(newnoop, sizeof(*newnoop));
+		lck_mtx_init(&newnoop->noo_lock, nfs_open_grp, LCK_ATTR_NULL);
+		newnoop->noo_mount = nmp;
+		kauth_cred_ref(cred);
+		newnoop->noo_cred = cred;
+		newnoop->noo_name = OSAddAtomic(1, &nfs_open_owner_seqnum);
+		TAILQ_INIT(&newnoop->noo_opens);
+		goto tryagain;
+	}
+	if (!noop && newnoop) {
+		newnoop->noo_flags |= NFS_OPEN_OWNER_LINK;
+		TAILQ_INSERT_HEAD(&nmp->nm_open_owners, newnoop, noo_link);
+		noop = newnoop;
+	}
+	lck_mtx_unlock(&nmp->nm_lock);
+
+	if (newnoop && (noop != newnoop))
+		nfs_open_owner_destroy(newnoop);
+
+	if (noop)
+		nfs_open_owner_ref(noop);
+
+	return (noop);
+}
+
+/*
+ * destroy an open owner that's no longer needed
+ */
+void
+nfs_open_owner_destroy(struct nfs_open_owner *noop)
+{
+	if (noop->noo_cred)
+		kauth_cred_unref(&noop->noo_cred);
+	lck_mtx_destroy(&noop->noo_lock, nfs_open_grp);
+	FREE(noop, M_TEMP);
+}
+
+/*
+ * acquire a reference count on an open owner
+ */
+void
+nfs_open_owner_ref(struct nfs_open_owner *noop)
+{
+	lck_mtx_lock(&noop->noo_lock);
+	noop->noo_refcnt++;
+	lck_mtx_unlock(&noop->noo_lock);
+}
+
+/*
+ * drop a reference count on an open owner and destroy it if
+ * it is no longer referenced and no longer on the mount's list.
+ */
+void
+nfs_open_owner_rele(struct nfs_open_owner *noop)
+{
+	lck_mtx_lock(&noop->noo_lock);
+	if (noop->noo_refcnt < 1)
+		panic("nfs_open_owner_rele: no refcnt");
+	noop->noo_refcnt--;
+	if (!noop->noo_refcnt && (noop->noo_flags & NFS_OPEN_OWNER_BUSY))
+		panic("nfs_open_owner_rele: busy");
+	/* XXX we may potentially want to clean up idle/unused open owner structures */
+	if (noop->noo_refcnt || (noop->noo_flags & NFS_OPEN_OWNER_LINK)) {
+		lck_mtx_unlock(&noop->noo_lock);
+		return;
+	}
+	/* owner is no longer referenced or linked to mount, so destroy it */
+	lck_mtx_unlock(&noop->noo_lock);
+	nfs_open_owner_destroy(noop);
+}
+
+/*
+ * Mark an open owner as busy because we are about to
+ * start an operation that uses and updates open owner state.
+ */
+int
+nfs_open_owner_set_busy(struct nfs_open_owner *noop, thread_t thd)
+{
+	struct nfsmount *nmp;
+	struct timespec ts = {2, 0};
+	int error = 0, slpflag;
+
+	nmp = noop->noo_mount;
+	if (!nmp)
+		return (ENXIO);
+	slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
+
+	lck_mtx_lock(&noop->noo_lock);
+	while (noop->noo_flags & NFS_OPEN_OWNER_BUSY) {
+		if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
+			break;
+		noop->noo_flags |= NFS_OPEN_OWNER_WANT;
+		msleep(noop, &noop->noo_lock, slpflag, "nfs_open_owner_set_busy", &ts);
+	}
+	if (!error)
+		noop->noo_flags |= NFS_OPEN_OWNER_BUSY;
+	lck_mtx_unlock(&noop->noo_lock);
+
+	return (error);
+}
+
+/*
+ * Clear the busy flag on an open owner and wake up anyone waiting
+ * to mark it busy.
+ */
+void
+nfs_open_owner_clear_busy(struct nfs_open_owner *noop)
+{
+	int wanted;
+
+	lck_mtx_lock(&noop->noo_lock);
+	if (!(noop->noo_flags & NFS_OPEN_OWNER_BUSY))
+		panic("nfs_open_owner_clear_busy");
+	wanted = (noop->noo_flags & NFS_OPEN_OWNER_WANT);
+	noop->noo_flags &= ~(NFS_OPEN_OWNER_BUSY|NFS_OPEN_OWNER_WANT);
+	lck_mtx_unlock(&noop->noo_lock);
+	if (wanted)
+		wakeup(noop);
+}
+
+/*
+ * Given an open/lock owner and an error code, increment the
+ * sequence ID if appropriate.
+ */
+void
+nfs_owner_seqid_increment(struct nfs_open_owner *noop, struct nfs_lock_owner *nlop, int error)
+{
+	switch (error) {
+	case NFSERR_STALE_CLIENTID:
+	case NFSERR_STALE_STATEID:
+	case NFSERR_OLD_STATEID:
+	case NFSERR_BAD_STATEID:
+	case NFSERR_BAD_SEQID:
+	case NFSERR_BADXDR:
+	case NFSERR_RESOURCE:
+	case NFSERR_NOFILEHANDLE:
+		/* do not increment the open seqid on these errors */
+		return;
+	}
+	if (noop)
+		noop->noo_seqid++;
+	if (nlop)
+		nlop->nlo_seqid++;
+}
+
+/*
+ * Search a node's open file list for any conflicts with this request.
+ * Also find this open owner's open file structure.
+ * If not found and "alloc" is set, then allocate one.
+ */
+int
+nfs_open_file_find(
+	nfsnode_t np,
+	struct nfs_open_owner *noop,
+	struct nfs_open_file **nofpp,
+	uint32_t accessMode,
+	uint32_t denyMode,
+	int alloc)
+{
+	struct nfs_open_file *nofp = NULL, *nofp2, *newnofp = NULL;
+
+	if (!np)
+		goto alloc;
+tryagain:
+	lck_mtx_lock(&np->n_openlock);
+	TAILQ_FOREACH(nofp2, &np->n_opens, nof_link) {
+		if (nofp2->nof_owner == noop) {
+			nofp = nofp2;
+			if (!accessMode)
+				break;
+		}
+		if ((accessMode & nofp2->nof_deny) || (denyMode & nofp2->nof_access)) {
+			/* This request conflicts with an existing open on this client. */
+			lck_mtx_unlock(&np->n_openlock);
+			*nofpp = NULL;
+			return (EACCES);
+		}
+	}
+
+	/*
+	 * If this open owner doesn't have an open
+	 * file structure yet, we create one for it.
+	 */
+	if (!nofp && !newnofp && alloc) {
+		lck_mtx_unlock(&np->n_openlock);
+alloc:
+		MALLOC(newnofp, struct nfs_open_file *, sizeof(struct nfs_open_file), M_TEMP, M_WAITOK);
+		if (!newnofp) {
+			*nofpp = NULL;
+			return (ENOMEM);
+		}
+		bzero(newnofp, sizeof(*newnofp));
+		lck_mtx_init(&newnofp->nof_lock, nfs_open_grp, LCK_ATTR_NULL);
+		newnofp->nof_owner = noop;
+		nfs_open_owner_ref(noop);
+		newnofp->nof_np = np;
+		lck_mtx_lock(&noop->noo_lock);
+		TAILQ_INSERT_HEAD(&noop->noo_opens, newnofp, nof_oolink);
+		lck_mtx_unlock(&noop->noo_lock);
+		if (np)
+			goto tryagain;
+	}
+	if (!nofp && newnofp) {
+		if (np)
+			TAILQ_INSERT_HEAD(&np->n_opens, newnofp, nof_link);
+		nofp = newnofp;
+	}
+	if (np)
+		lck_mtx_unlock(&np->n_openlock);
+
+	if (newnofp && (nofp != newnofp))
+		nfs_open_file_destroy(newnofp);
+
+	*nofpp = nofp;
+	return (nofp ? 0 : ESRCH);
+}
+
+/*
+ * Destroy an open file structure.
+ */
+void
+nfs_open_file_destroy(struct nfs_open_file *nofp)
+{
+	lck_mtx_lock(&nofp->nof_owner->noo_lock);
+	TAILQ_REMOVE(&nofp->nof_owner->noo_opens, nofp, nof_oolink);
+	lck_mtx_unlock(&nofp->nof_owner->noo_lock);
+	nfs_open_owner_rele(nofp->nof_owner);
+	lck_mtx_destroy(&nofp->nof_lock, nfs_open_grp);
+	FREE(nofp, M_TEMP);
+}
+
+/*
+ * Mark an open file as busy because we are about to
+ * start an operation that uses and updates open file state.
+ */
+int
+nfs_open_file_set_busy(struct nfs_open_file *nofp, thread_t thd)
+{
+	struct nfsmount *nmp;
+	struct timespec ts = {2, 0};
+	int error = 0, slpflag;
+
+	nmp = nofp->nof_owner->noo_mount;
+	if (!nmp)
+		return (ENXIO);
+	slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
+
+	lck_mtx_lock(&nofp->nof_lock);
+	while (nofp->nof_flags & NFS_OPEN_FILE_BUSY) {
+		if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
+			break;
+		nofp->nof_flags |= NFS_OPEN_FILE_WANT;
+		msleep(nofp, &nofp->nof_lock, slpflag, "nfs_open_file_set_busy", &ts);
+	}
+	if (!error)
+		nofp->nof_flags |= NFS_OPEN_FILE_BUSY;
+	lck_mtx_unlock(&nofp->nof_lock);
+
+	return (error);
+}
+
+/*
+ * Clear the busy flag on an open file and wake up anyone waiting
+ * to mark it busy.
+ */
+void
+nfs_open_file_clear_busy(struct nfs_open_file *nofp)
+{
+	int wanted;
+
+	lck_mtx_lock(&nofp->nof_lock);
+	if (!(nofp->nof_flags & NFS_OPEN_FILE_BUSY))
+		panic("nfs_open_file_clear_busy");
+	wanted = (nofp->nof_flags & NFS_OPEN_FILE_WANT);
+	nofp->nof_flags &= ~(NFS_OPEN_FILE_BUSY|NFS_OPEN_FILE_WANT);
+	lck_mtx_unlock(&nofp->nof_lock);
+	if (wanted)
+		wakeup(nofp);
+}
+
+/*
+ * Get the current (delegation, lock, open, default) stateid for this node.
+ * If node has a delegation, use that stateid.
+ * If pid has a lock, use the lockowner's stateid.
+ * Or use the open file's stateid.
+ * If no open file, use a default stateid of all ones.
+ */
+void
+nfs_get_stateid(nfsnode_t np, thread_t thd, kauth_cred_t cred, nfs_stateid *sid)
+{
+	struct nfsmount *nmp = NFSTONMP(np);
+	proc_t p = thd ? get_bsdthreadtask_info(thd) : current_thread();  // XXX async I/O requests don't have a thread
+	struct nfs_open_owner *noop = NULL;
+	struct nfs_open_file *nofp = NULL;
+	struct nfs_lock_owner *nlop = NULL;
+	nfs_stateid *s = NULL;
+
+	if (np->n_openflags & N_DELEG_MASK)
+		s = &np->n_dstateid;
+	else if (p)
+		nlop = nfs_lock_owner_find(np, p, 0);
+	if (nlop && !TAILQ_EMPTY(&nlop->nlo_locks)) {
+		/* we hold locks, use lock stateid */
+		s = &nlop->nlo_stateid;
+	} else if (((noop = nfs_open_owner_find(nmp, cred, 0))) &&
+		 (nfs_open_file_find(np, noop, &nofp, 0, 0, 0) == 0) &&
+		 !(nofp->nof_flags & NFS_OPEN_FILE_LOST) &&
+		 nofp->nof_access) {
+		/* we (should) have the file open, use open stateid */
+		if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)
+			nfs4_reopen(nofp, thd);
+		if (!(nofp->nof_flags & NFS_OPEN_FILE_LOST))
+			s = &nofp->nof_stateid;
+	}
+
+	if (s) {
+		sid->seqid = s->seqid;
+		sid->other[0] = s->other[0];
+		sid->other[1] = s->other[1];
+		sid->other[2] = s->other[2];
+	} else {
+		const char *vname = vnode_getname(NFSTOV(np));
+		printf("nfs_get_stateid: no stateid for %s\n", vname ? vname : "???");
+		vnode_putname(vname);
+		sid->seqid = sid->other[0] = sid->other[1] = sid->other[2] = 0xffffffff;
+	}
+	if (nlop)
+		nfs_lock_owner_rele(nlop);
+	if (noop)
+		nfs_open_owner_rele(noop);
+}
+
+/*
+ * We always send the open RPC even if this open's mode is a subset of all
+ * the existing opens.  This makes sure that we will always be able to do a
+ * downgrade to any of the open modes.
+ *
+ * Note: local conflicts should have already been checked. (nfs_open_file_find)
+ */
+int
+nfs4_open(
+	nfsnode_t np,
+	struct nfs_open_file *nofp,
+	uint32_t accessMode,
+	uint32_t denyMode,
+	vfs_context_t ctx)
+{
+	vnode_t vp = NFSTOV(np);
+	vnode_t dvp = NULL;
+	struct componentname cn;
+	const char *vname = NULL;
+	size_t namelen;
+	char smallname[128];
+	char *filename = NULL;
+	int error = 0, readtoo = 0;
+
+	dvp = vnode_getparent(vp);
+	vname = vnode_getname(vp);
+	if (!dvp || !vname) {
+		error = EIO;
+		goto out;
+	}
+	filename = &smallname[0];
+	namelen = snprintf(filename, sizeof(smallname), "%s", vname);
+	if (namelen >= sizeof(smallname)) {
+		namelen++;  /* snprintf result doesn't include '\0' */
+		MALLOC(filename, char *, namelen, M_TEMP, M_WAITOK);
+		if (!filename) {
+			error = ENOMEM;
+			goto out;
+		}
+		snprintf(filename, namelen, "%s", vname);
+	}
+	bzero(&cn, sizeof(cn));
+	cn.cn_nameptr = filename;
+	cn.cn_namelen = namelen;
+
+	if (!(accessMode & NFS_OPEN_SHARE_ACCESS_READ)) {
+		/*
+		 * Try to open it for read access too,
+		 * so the buffer cache can read data.
+		 */
+		readtoo = 1;
+		accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
+	}
+tryagain:
+	error = nfs4_open_rpc(nofp, ctx, &cn, NULL, dvp, &vp, NFS_OPEN_NOCREATE, accessMode, denyMode);
+	if (error) {
+		if (!nfs_mount_state_error_should_restart(error) && readtoo) {
+			/* try again without the extra read access */
+			accessMode &= ~NFS_OPEN_SHARE_ACCESS_READ;
+			readtoo = 0;
+			goto tryagain;
+		}
+		goto out;
+	}
+	nofp->nof_access |= accessMode;
+	nofp->nof_deny |= denyMode;
+
+	if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
+		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
+			nofp->nof_r++;
+		else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
+			nofp->nof_w++;
+		else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
+			nofp->nof_rw++;
+	} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
+		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
+			nofp->nof_r_dw++;
+		else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
+			nofp->nof_w_dw++;
+		else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
+			nofp->nof_rw_dw++;
+	} else { /* NFS_OPEN_SHARE_DENY_BOTH */
+		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
+			nofp->nof_r_drw++;
+		else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
+			nofp->nof_w_drw++;
+		else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
+			nofp->nof_rw_drw++;
+	}
+	nofp->nof_opencnt++;
+out:
+	if (filename && (filename != &smallname[0]))
+		FREE(filename, M_TEMP);
+	if (vname)
+		vnode_putname(vname);
+	if (dvp != NULLVP)
+		vnode_put(dvp);
+	return (error);
+}
+
+
+int
+nfs4_vnop_open(
+	struct vnop_open_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_vp;
+		int a_mode;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	vfs_context_t ctx = ap->a_context;
+	vnode_t vp = ap->a_vp;
+	nfsnode_t np = VTONFS(vp);
+	struct nfsmount *nmp;
+	int error, accessMode, denyMode, opened = 0;
+	struct nfs_open_owner *noop = NULL;
+	struct nfs_open_file *nofp = NULL;
+
+	if (!(ap->a_mode & (FREAD|FWRITE)))
+		return (EINVAL);
+
+	nmp = VTONMP(vp);
+	if (!nmp)
+		return (ENXIO);
+
+	/* First, call the common code */
+	if ((error = nfs3_vnop_open(ap)))
+		return (error);
+
+	if (!vnode_isreg(vp)) {
+		/* Just mark that it was opened */
+		lck_mtx_lock(&np->n_openlock);
+		np->n_openrefcnt++;
+		lck_mtx_unlock(&np->n_openlock);
+		return (0);
+	}
+
+	/* mode contains some combination of: FREAD, FWRITE, O_SHLOCK, O_EXLOCK */
+	accessMode = 0;
+	if (ap->a_mode & FREAD)
+		accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
+	if (ap->a_mode & FWRITE)
+		accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE;
+	if (ap->a_mode & O_EXLOCK)
+		denyMode = NFS_OPEN_SHARE_DENY_BOTH;
+	else if (ap->a_mode & O_SHLOCK)
+		denyMode = NFS_OPEN_SHARE_DENY_WRITE;
+	else
+		denyMode = NFS_OPEN_SHARE_DENY_NONE;
+
+	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
+	if (!noop)
+		return (ENOMEM);
+
+restart:
+	error = nfs_mount_state_in_use_start(nmp);
+	if (error) {
+		nfs_open_owner_rele(noop);
+		return (error);
+	}
+
+	error = nfs_open_file_find(np, noop, &nofp, accessMode, denyMode, 1);
+	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
+		const char *vname = vnode_getname(NFSTOV(np));
+		printf("nfs_vnop_open: LOST %s\n", vname);
+		vnode_putname(vname);
+		error = EIO;
+	}
+	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
+		nfs_mount_state_in_use_end(nmp, 0);
+		nfs4_reopen(nofp, vfs_context_thread(ctx));
+		nofp = NULL;
+		goto restart;
+	}
+	if (!error)
+		error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
+	if (error) {
+		nofp = NULL;
+		goto out;
+	}
+
+	/*
+	 * If we just created the file and the modes match, then we simply use
+	 * the open performed in the create.  Otherwise, send the request.
+	 */
+	if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
+	    (nofp->nof_creator == current_thread()) &&
+	    (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) &&
+	    (denyMode == NFS_OPEN_SHARE_DENY_NONE)) {
+		nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
+		nofp->nof_creator = NULL;
+	} else {
+		if (!opened)
+			error = nfs4_open(np, nofp, accessMode, denyMode, ctx);
+		if ((error == EACCES) && (nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
+		    (nofp->nof_creator == current_thread())) {
+			/*
+			 * Ugh.  This can happen if we just created the file with read-only
+			 * perms and we're trying to open it for real with different modes
+			 * (e.g. write-only or with a deny mode) and the server decides to
+			 * not allow the second open because of the read-only perms.
+			 * The best we can do is to just use the create's open.
+			 * We may have access we don't need or we may not have a requested
+			 * deny mode.  We may log complaints later, but we'll try to avoid it.
+			 */
+			if (denyMode != NFS_OPEN_SHARE_DENY_NONE) {
+				const char *vname = vnode_getname(NFSTOV(np));
+				printf("nfs4_vnop_open: deny mode foregone on create, %s\n", vname);
+				vnode_putname(vname);
+			}
+			nofp->nof_creator = NULL;
+			error = 0;
+		}
+		if (error)
+			goto out;
+		opened = 1;
+		/*
+		 * If we had just created the file, we already had it open.
+		 * If the actual open mode is less than what we grabbed at
+		 * create time, then we'll downgrade the open here.
+		 */
+		if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
+		    (nofp->nof_creator == current_thread())) {
+			error = nfs4_close(np, nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, ctx);
+			if (error) {
+				const char *vname = vnode_getname(NFSTOV(np));
+				printf("nfs_vnop_open: create close error %d, %s\n", error, vname);
+				vnode_putname(vname);
+			}
+			if (!nfs_mount_state_error_should_restart(error)) {
+				error = 0;
+				nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
+			}
+		}
+	}
+
+out:
+	if (nofp)
+		nfs_open_file_clear_busy(nofp);
+	if (nfs_mount_state_in_use_end(nmp, error)) {
+		nofp = NULL;
+		goto restart;
+	}
+	if (noop)
+		nfs_open_owner_rele(noop);
+	if (error) {
+		const char *vname = vnode_getname(NFSTOV(np));
+		printf("nfs_vnop_open: error %d, %s\n", error, vname);
+		vnode_putname(vname);
+	}
+	return (error);
+}
+
+int
+nfs4_close(
+	nfsnode_t np,
+	struct nfs_open_file *nofp,
+	uint32_t accessMode,
+	uint32_t denyMode,
+	vfs_context_t ctx)
+{
+	struct nfs_lock_owner *nlop;
+	int error = 0, changed = 0, closed = 0;
+	uint32_t newAccessMode, newDenyMode;
+
+	/* warn if modes don't match current state */
+	if (((accessMode & nofp->nof_access) != accessMode) || ((denyMode & nofp->nof_deny) != denyMode)) {
+		const char *vname = vnode_getname(NFSTOV(np));
+		printf("nfs4_close: mode mismatch %d %d, current %d %d, %s\n",
+			accessMode, denyMode, nofp->nof_access, nofp->nof_deny, vname);
+		vnode_putname(vname);
+	}
+
+	/*
+	 * If we're closing a write-only open, we may not have a write-only count
+	 * if we also grabbed read access.  So, check the read-write count.
+	 */
+	if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
+		if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
+		    (nofp->nof_w == 0) && nofp->nof_rw)
+			accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
+	} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
+		if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
+		    (nofp->nof_w_dw == 0) && nofp->nof_rw_dw)
+			accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
+	} else { /* NFS_OPEN_SHARE_DENY_BOTH */
+		if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
+		    (nofp->nof_w_drw == 0) && nofp->nof_rw_drw)
+			accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
+	}
+
+	/*
+	 * Calculate new modes: a mode bit gets removed when there's only
+	 * one count in all the corresponding counts
+	 */
+	newAccessMode = nofp->nof_access;
+	newDenyMode = nofp->nof_deny;
+	if ((accessMode & NFS_OPEN_SHARE_ACCESS_READ) &&
+	    (newAccessMode & NFS_OPEN_SHARE_ACCESS_READ) &&
+	    ((nofp->nof_r + nofp->nof_rw + nofp->nof_r_dw +
+	      nofp->nof_rw_dw + nofp->nof_r_drw + nofp->nof_rw_dw) == 1)) {
+		newAccessMode &= ~NFS_OPEN_SHARE_ACCESS_READ;
+		changed = 1;
+	}
+	if ((accessMode & NFS_OPEN_SHARE_ACCESS_WRITE) &&
+	    (newAccessMode & NFS_OPEN_SHARE_ACCESS_WRITE) &&
+	    ((nofp->nof_w + nofp->nof_rw + nofp->nof_w_dw +
+	      nofp->nof_rw_dw + nofp->nof_w_drw + nofp->nof_rw_dw) == 1)) {
+		newAccessMode &= ~NFS_OPEN_SHARE_ACCESS_WRITE;
+		changed = 1;
+	}
+	if ((denyMode & NFS_OPEN_SHARE_DENY_READ) &&
+	    (newDenyMode & NFS_OPEN_SHARE_DENY_READ) &&
+	    ((nofp->nof_r_drw + nofp->nof_w_drw + nofp->nof_rw_drw) == 1)) {
+		newDenyMode &= ~NFS_OPEN_SHARE_DENY_READ;
+		changed = 1;
+	}
+	if ((denyMode & NFS_OPEN_SHARE_DENY_WRITE) &&
+	    (newDenyMode & NFS_OPEN_SHARE_DENY_WRITE) &&
+	    ((nofp->nof_r_drw + nofp->nof_w_drw + nofp->nof_rw_drw +
+	      nofp->nof_r_dw + nofp->nof_w_dw + nofp->nof_rw_dw) == 1)) {
+		newDenyMode &= ~NFS_OPEN_SHARE_DENY_WRITE;
+		changed = 1;
+	}
+
+
+	if ((newAccessMode == 0) || (nofp->nof_opencnt == 1)) {
+		/*
+		 * No more access after this close, so clean up and close it.
+		 */
+		closed = 1;
+		if (!(nofp->nof_flags & NFS_OPEN_FILE_LOST))
+			error = nfs4_close_rpc(np, nofp, vfs_context_thread(ctx), vfs_context_ucred(ctx), 0);
+		if (error == NFSERR_LOCKS_HELD) {
+			/*
+			 * Hmm... the server says we have locks we need to release first
+			 * Find the lock owner and try to unlock everything.
+			 */
+			nlop = nfs_lock_owner_find(np, vfs_context_proc(ctx), 0);
+			if (nlop) {
+				nfs4_unlock_rpc(np, nlop, F_WRLCK, 0, UINT64_MAX, ctx);
+				nfs_lock_owner_rele(nlop);
+			}
+			error = nfs4_close_rpc(np, nofp, vfs_context_thread(ctx), vfs_context_ucred(ctx), 0);
+		}
+	} else if (changed) {
+		/*
+		 * File is still open but with less access, so downgrade the open.
+		 */
+		if (!(nofp->nof_flags & NFS_OPEN_FILE_LOST))
+			error = nfs4_open_downgrade_rpc(np, nofp, ctx);
+	}
+
+	if (error) {
+		const char *vname = vnode_getname(NFSTOV(np));
+		printf("nfs4_close: error %d, %s\n", error, vname);
+		vnode_putname(vname);
+		return (error);
+	}
+
+	/* Decrement the corresponding open access/deny mode counter. */
+	if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
+		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) {
+			if (nofp->nof_r == 0)
+				printf("nfs4_close: open(R) count underrun\n");
+			else
+				nofp->nof_r--;
+		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) {
+			if (nofp->nof_w == 0)
+				printf("nfs4_close: open(W) count underrun\n");
+			else
+				nofp->nof_w--;
+		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
+			if (nofp->nof_rw == 0)
+				printf("nfs4_close: open(RW) count underrun\n");
+			else
+				nofp->nof_rw--;
+		}
+	} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
+		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) {
+			if (nofp->nof_r_dw == 0)
+				printf("nfs4_close: open(R,DW) count underrun\n");
+			else
+				nofp->nof_r_dw--;
+		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) {
+			if (nofp->nof_w_dw == 0)
+				printf("nfs4_close: open(W,DW) count underrun\n");
+			else
+				nofp->nof_w_dw--;
+		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
+			if (nofp->nof_rw_dw == 0)
+				printf("nfs4_close: open(RW,DW) count underrun\n");
+			else
+				nofp->nof_rw_dw--;
+		}
+	} else { /* NFS_OPEN_SHARE_DENY_BOTH */
+		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) {
+			if (nofp->nof_r_drw == 0)
+				printf("nfs4_close: open(R,DRW) count underrun\n");
+			else
+				nofp->nof_r_drw--;
+		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) {
+			if (nofp->nof_w_drw == 0)
+				printf("nfs4_close: open(W,DRW) count underrun\n");
+			else
+				nofp->nof_w_drw--;
+		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
+			if (nofp->nof_rw_drw == 0)
+				printf("nfs4_close: open(RW,DRW) count underrun\n");
+			else
+				nofp->nof_rw_drw--;
+		}
+	}
+	/* update the modes */
+	nofp->nof_access = newAccessMode;
+	nofp->nof_deny = newDenyMode;
+	if (closed) {
+		if (nofp->nof_r || nofp->nof_w ||
+		    (nofp->nof_rw && !((nofp->nof_flags & NFS_OPEN_FILE_CREATE) && !nofp->nof_creator && (nofp->nof_rw == 1))) ||
+		    nofp->nof_r_dw || nofp->nof_w_dw || nofp->nof_rw_dw ||
+		    nofp->nof_r_drw || nofp->nof_w_drw || nofp->nof_rw_drw)
+			printf("nfs4_close: unexpected count: %u %u %u dw %u %u %u drw %u %u %u flags 0x%x\n",
+				nofp->nof_r, nofp->nof_w, nofp->nof_rw,
+				nofp->nof_r_dw, nofp->nof_w_dw, nofp->nof_rw_dw,
+				nofp->nof_r_drw, nofp->nof_w_drw, nofp->nof_rw_drw,
+				nofp->nof_flags);
+		/* clear out all open info, just to be safe */
+		nofp->nof_access = nofp->nof_deny = 0;
+		nofp->nof_mmap_access = nofp->nof_mmap_deny = 0;
+		nofp->nof_r = nofp->nof_w = nofp->nof_rw = 0;
+		nofp->nof_r_dw = nofp->nof_w_dw = nofp->nof_rw_dw = 0;
+		nofp->nof_r_drw = nofp->nof_w_drw = nofp->nof_rw_drw = 0;
+		nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
+		/* XXX we may potentially want to clean up idle/unused open file structures */
+	}
+	nofp->nof_opencnt--;
+	if (nofp->nof_flags & NFS_OPEN_FILE_LOST) {
+		error = EIO;
+		if (!nofp->nof_opencnt)
+			nofp->nof_flags &= ~NFS_OPEN_FILE_LOST;
+		const char *vname = vnode_getname(NFSTOV(np));
+		printf("nfs_close: LOST%s, %s\n", !(nofp->nof_flags & NFS_OPEN_FILE_LOST) ? " (last)" : "", vname);
+		vnode_putname(vname);
+	}
+	return (error);
+}
+
+int
+nfs4_vnop_close(
+	struct vnop_close_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_vp;
+		int a_fflag;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	vfs_context_t ctx = ap->a_context;
+	vnode_t vp = ap->a_vp;
+	int fflag = ap->a_fflag;
+	int error, common_error, accessMode, denyMode;
+	nfsnode_t np = VTONFS(vp);
+	struct nfsmount *nmp;
+	struct nfs_open_owner *noop = NULL;
+	struct nfs_open_file *nofp = NULL;
+
+	nmp = VTONMP(vp);
+	if (!nmp)
+		return (ENXIO);
+
+	/* First, call the common code */
+	common_error = nfs3_vnop_close(ap);
+
+	if (!vnode_isreg(vp)) {
+		/* Just mark that it was closed */
+		lck_mtx_lock(&np->n_openlock);
+		np->n_openrefcnt--;
+		lck_mtx_unlock(&np->n_openlock);
+		return (common_error);
+	}
+
+	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 0);
+	if (!noop) {
+		printf("nfs4_vnop_close: can't get open owner!\n");
+		return (EIO);
+	}
+
+restart:
+	error = nfs_mount_state_in_use_start(nmp);
+	if (error) {
+		nfs_open_owner_rele(noop);
+		return (error);
+	}
+
+	error = nfs_open_file_find(np, noop, &nofp, 0, 0, 0);
+	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
+		nfs_mount_state_in_use_end(nmp, 0);
+		nfs4_reopen(nofp, vfs_context_thread(ctx));
+		nofp = NULL;
+		goto restart;
+	}
+	if (error) {
+		const char *vname = vnode_getname(NFSTOV(np));
+		printf("nfs4_vnop_close: no open file for owner %d, %s\n", error, vname);
+		vnode_putname(vname);
+		error = EBADF;
+		goto out;
+	}
+	error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
+	if (error) {
+		nofp = NULL;
+		goto out;
+	}
+
+	/* fflag contains some combination of: FREAD, FWRITE, FHASLOCK */
+	accessMode = 0;
+	if (fflag & FREAD)
+		accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
+	if (fflag & FWRITE)
+		accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE;
+// XXX It would be nice if we still had the O_EXLOCK/O_SHLOCK flags that were on the open
+//	if (fflag & O_EXLOCK)
+//		denyMode = NFS_OPEN_SHARE_DENY_BOTH;
+//	else if (fflag & O_SHLOCK)
+//		denyMode = NFS_OPEN_SHARE_DENY_WRITE;
+//	else
+//		denyMode = NFS_OPEN_SHARE_DENY_NONE;
+	if (fflag & FHASLOCK) {
+		/* XXX assume FHASLOCK is for the deny mode and not flock */
+		/* FHASLOCK flock will be unlocked in the close path, but the flag is not cleared. */
+		if (nofp->nof_deny & NFS_OPEN_SHARE_DENY_READ)
+			denyMode = NFS_OPEN_SHARE_DENY_BOTH;
+		else if (nofp->nof_deny & NFS_OPEN_SHARE_DENY_WRITE)
+			denyMode = NFS_OPEN_SHARE_DENY_WRITE;
+		else
+			denyMode = NFS_OPEN_SHARE_DENY_NONE;
+	} else {
+			denyMode = NFS_OPEN_SHARE_DENY_NONE;
+	}
+
+	if (!accessMode) {
+		error = EINVAL;
+		goto out;
+	}
+
+	error = nfs4_close(np, nofp, accessMode, denyMode, ctx);
+	if (error) {
+		const char *vname = vnode_getname(NFSTOV(np));
+		printf("nfs_vnop_close: close error %d, %s\n", error, vname);
+		vnode_putname(vname);
+	}
+
+out:
+	if (nofp)
+		nfs_open_file_clear_busy(nofp);
+	if (nfs_mount_state_in_use_end(nmp, error)) {
+		nofp = NULL;
+		goto restart;
+	}
+	if (noop)
+		nfs_open_owner_rele(noop);
+	if (error) {
+		const char *vname = vnode_getname(NFSTOV(np));
+		printf("nfs_vnop_close: error %d, %s\n", error, vname);
+		vnode_putname(vname);
+	}
+	if (!error)
+		error = common_error;
+	return (error);
+}
+
+int
+nfs4_vnop_mmap(
+	struct vnop_mmap_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_vp;
+		int a_fflags;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	vfs_context_t ctx = ap->a_context;
+	vnode_t vp = ap->a_vp;
+	nfsnode_t np = VTONFS(vp);
+	int error = 0, accessMode, denyMode;
+	struct nfsmount *nmp;
+	struct nfs_open_owner *noop = NULL;
+	struct nfs_open_file *nofp = NULL;
+
+	nmp = VTONMP(vp);
+	if (!nmp)
+		return (ENXIO);
+
+	if (!vnode_isreg(vp) || !(ap->a_fflags & (PROT_READ|PROT_WRITE)))
+		return (EINVAL);
+
+	/*
+	 * fflags contains some combination of: PROT_READ, PROT_WRITE
+	 * Since it's not possible to mmap() without having the file open for reading,
+	 * read access is always there (regardless if PROT_READ is not set).
+	 */
+	accessMode = NFS_OPEN_SHARE_ACCESS_READ;
+	if (ap->a_fflags & PROT_WRITE)
+		accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE;
+	denyMode = NFS_OPEN_SHARE_DENY_NONE;
+
+	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 0);
+	if (!noop) {
+		printf("nfs4_vnop_mmap: no open owner\n");
+		return (EPERM);
+	}
+
+restart:
+	error = nfs_mount_state_in_use_start(nmp);
+	if (error) {
+		nfs_open_owner_rele(noop);
+		return (error);
+	}
+
+	error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
+	if (error || (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST))) {
+		printf("nfs4_vnop_mmap: no open file for owner %d\n", error);
+		error = EPERM;
+	}
+	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
+		nfs_mount_state_in_use_end(nmp, 0);
+		nfs4_reopen(nofp, vfs_context_thread(ctx));
+		nofp = NULL;
+		goto restart;
+	}
+	if (!error)
+		error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
+	if (error) {
+		nofp = NULL;
+		goto out;
+	}
+
+	/*
+	 * The open reference for mmap must mirror an existing open because
+	 * we may need to reclaim it after the file is closed.
+	 * So grab another open count matching the accessMode passed in.
+	 * If we already had an mmap open, prefer read/write without deny mode.
+	 * This means we may have to drop the current mmap open first.
+	 */
+
+	/* determine deny mode for open */
+	if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
+		if (nofp->nof_rw)
+			denyMode = NFS_OPEN_SHARE_DENY_NONE;
+		else if (nofp->nof_rw_dw)
+			denyMode = NFS_OPEN_SHARE_DENY_WRITE;
+		else if (nofp->nof_rw_drw)
+			denyMode = NFS_OPEN_SHARE_DENY_BOTH;
+		else
+			error = EPERM;
+	} else { /* NFS_OPEN_SHARE_ACCESS_READ */
+		if (nofp->nof_r)
+			denyMode = NFS_OPEN_SHARE_DENY_NONE;
+		else if (nofp->nof_r_dw)
+			denyMode = NFS_OPEN_SHARE_DENY_WRITE;
+		else if (nofp->nof_r_drw)
+			denyMode = NFS_OPEN_SHARE_DENY_BOTH;
+		else
+			error = EPERM;
+	}
+	if (error) /* mmap mode without proper open mode */
+		goto out;
+
+	/*
+	 * If the existing mmap access is more than the new access OR the
+	 * existing access is the same and the existing deny mode is less,
+	 * then we'll stick with the existing mmap open mode.
+	 */
+	if ((nofp->nof_mmap_access > accessMode) ||
+	    ((nofp->nof_mmap_access == accessMode) && (nofp->nof_mmap_deny <= denyMode)))
+		goto out;
+
+	/* update mmap open mode */
+	if (nofp->nof_mmap_access) {
+		error = nfs4_close(np, nofp, nofp->nof_mmap_access, nofp->nof_mmap_deny, ctx);
+		if (error) {
+			if (!nfs_mount_state_error_should_restart(error))
+				printf("nfs_vnop_mmap: close of previous mmap mode failed: %d\n", error);
+			const char *vname = vnode_getname(NFSTOV(np));
+			printf("nfs_vnop_mmap: update, close error %d, %s\n", error, vname);
+			vnode_putname(vname);
+			goto out;
+		}
+		nofp->nof_mmap_access = nofp->nof_mmap_deny = 0;
+	}
+
+	if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
+		if (denyMode == NFS_OPEN_SHARE_DENY_NONE)
+			nofp->nof_rw++;
+		else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE)
+			nofp->nof_rw_dw++;
+		else /* NFS_OPEN_SHARE_DENY_BOTH */
+			nofp->nof_rw_drw++;
+	} else if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) {
+		if (denyMode == NFS_OPEN_SHARE_DENY_NONE)
+			nofp->nof_r++;
+		else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE)
+			nofp->nof_r_dw++;
+		else /* NFS_OPEN_SHARE_DENY_BOTH */
+			nofp->nof_r_drw++;
+	}
+	nofp->nof_mmap_access = accessMode;
+	nofp->nof_mmap_deny = denyMode;
+	nofp->nof_opencnt++;
+
+out:
+	if (nofp)
+		nfs_open_file_clear_busy(nofp);
+	if (nfs_mount_state_in_use_end(nmp, error)) {
+		nofp = NULL;
+		goto restart;
+	}
+	if (noop)
+		nfs_open_owner_rele(noop);
+	return (error);
+}
+
+
+int
+nfs4_vnop_mnomap(
+	struct vnop_mnomap_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_vp;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	vfs_context_t ctx = ap->a_context;
+	vnode_t vp = ap->a_vp;
+	nfsnode_t np = VTONFS(vp);
+	struct nfsmount *nmp;
+	struct nfs_open_file *nofp = NULL;
+	int error;
+
+	nmp = VTONMP(vp);
+	if (!nmp)
+		return (ENXIO);
+
+	/* walk all open files and close all mmap opens */
+loop:
+	error = nfs_mount_state_in_use_start(nmp);
+	if (error)
+		return (error);
+	lck_mtx_lock(&np->n_openlock);
+	TAILQ_FOREACH(nofp, &np->n_opens, nof_link) {
+		if (!nofp->nof_mmap_access)
+			continue;
+		lck_mtx_unlock(&np->n_openlock);
+		if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) {
+			nfs_mount_state_in_use_end(nmp, 0);
+			nfs4_reopen(nofp, vfs_context_thread(ctx));
+			goto loop;
+		}
+		error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
+		if (error) {
+			lck_mtx_lock(&np->n_openlock);
+			break;
+		}
+		if (nofp->nof_mmap_access) {
+			error = nfs4_close(np, nofp, nofp->nof_mmap_access, nofp->nof_mmap_deny, ctx);
+			if (!nfs_mount_state_error_should_restart(error)) {
+				if (error) /* not a state-operation-restarting error, so just clear the access */
+					printf("nfs_vnop_mnomap: close of mmap mode failed: %d\n", error);
+				nofp->nof_mmap_access = nofp->nof_mmap_deny = 0;
+			}
+			if (error) {
+				const char *vname = vnode_getname(NFSTOV(np));
+				printf("nfs_vnop_mnomap: error %d, %s\n", error, vname);
+				vnode_putname(vname);
+			}
+		}
+		nfs_open_file_clear_busy(nofp);
+		nfs_mount_state_in_use_end(nmp, error);
+		goto loop;
+	}
+	lck_mtx_unlock(&np->n_openlock);
+	nfs_mount_state_in_use_end(nmp, error);
+	return (error);
+}
+
+/*
+ * Search a node's lock owner list for the owner for this process.
+ * If not found and "alloc" is set, then allocate a new one.
+ */
+struct nfs_lock_owner *
+nfs_lock_owner_find(nfsnode_t np, proc_t p, int alloc)
+{
+	pid_t pid = proc_pid(p);
+	struct nfs_lock_owner *nlop, *newnlop = NULL;
+
+tryagain:
+	lck_mtx_lock(&np->n_openlock);
+	TAILQ_FOREACH(nlop, &np->n_lock_owners, nlo_link) {
+		if (nlop->nlo_pid != pid)
+			continue;
+		if (timevalcmp(&nlop->nlo_pid_start, &p->p_start, ==))
+			break;
+		/* stale lock owner... reuse it if we can */
+		if (nlop->nlo_refcnt) {
+			TAILQ_REMOVE(&np->n_lock_owners, nlop, nlo_link);
+			nlop->nlo_flags &= ~NFS_LOCK_OWNER_LINK;
+			lck_mtx_unlock(&np->n_openlock);
+			goto tryagain;
+		}
+		nlop->nlo_pid_start = p->p_start;
+		nlop->nlo_seqid = 0;
+		nlop->nlo_stategenid = 0;
+		break;
+	}
+
+	if (!nlop && !newnlop && alloc) {
+		lck_mtx_unlock(&np->n_openlock);
+		MALLOC(newnlop, struct nfs_lock_owner *, sizeof(struct nfs_lock_owner), M_TEMP, M_WAITOK);
+		if (!newnlop)
+			return (NULL);
+		bzero(newnlop, sizeof(*newnlop));
+		lck_mtx_init(&newnlop->nlo_lock, nfs_open_grp, LCK_ATTR_NULL);
+		newnlop->nlo_pid = pid;
+		newnlop->nlo_pid_start = p->p_start;
+		newnlop->nlo_name = OSAddAtomic(1, &nfs_lock_owner_seqnum);
+		TAILQ_INIT(&newnlop->nlo_locks);
+		goto tryagain;
+	}
+	if (!nlop && newnlop) {
+		newnlop->nlo_flags |= NFS_LOCK_OWNER_LINK;
+		TAILQ_INSERT_HEAD(&np->n_lock_owners, newnlop, nlo_link);
+		nlop = newnlop;
+	}
+	lck_mtx_unlock(&np->n_openlock);
+
+	if (newnlop && (nlop != newnlop))
+		nfs_lock_owner_destroy(newnlop);
+
+	if (nlop)
+		nfs_lock_owner_ref(nlop);
+
+	return (nlop);
+}
+
+/*
+ * destroy a lock owner that's no longer needed
+ */
+void
+nfs_lock_owner_destroy(struct nfs_lock_owner *nlop)
+{
+	if (nlop->nlo_open_owner) {
+		nfs_open_owner_rele(nlop->nlo_open_owner);
+		nlop->nlo_open_owner = NULL;
+	}
+	lck_mtx_destroy(&nlop->nlo_lock, nfs_open_grp);
+	FREE(nlop, M_TEMP);
+}
+
+/*
+ * acquire a reference count on a lock owner
+ */
+void
+nfs_lock_owner_ref(struct nfs_lock_owner *nlop)
+{
+	lck_mtx_lock(&nlop->nlo_lock);
+	nlop->nlo_refcnt++;
+	lck_mtx_unlock(&nlop->nlo_lock);
+}
+
+/*
+ * drop a reference count on a lock owner and destroy it if
+ * it is no longer referenced and no longer on the mount's list.
+ */
+void
+nfs_lock_owner_rele(struct nfs_lock_owner *nlop)
+{
+	lck_mtx_lock(&nlop->nlo_lock);
+	if (nlop->nlo_refcnt < 1)
+		panic("nfs_lock_owner_rele: no refcnt");
+	nlop->nlo_refcnt--;
+	if (!nlop->nlo_refcnt && (nlop->nlo_flags & NFS_LOCK_OWNER_BUSY))
+		panic("nfs_lock_owner_rele: busy");
+	/* XXX we may potentially want to clean up idle/unused lock owner structures */
+	if (nlop->nlo_refcnt || (nlop->nlo_flags & NFS_LOCK_OWNER_LINK)) {
+		lck_mtx_unlock(&nlop->nlo_lock);
+		return;
+	}
+	/* owner is no longer referenced or linked to mount, so destroy it */
+	lck_mtx_unlock(&nlop->nlo_lock);
+	nfs_lock_owner_destroy(nlop);
+}
+
+/*
+ * Mark a lock owner as busy because we are about to
+ * start an operation that uses and updates lock owner state.
+ */
+int
+nfs_lock_owner_set_busy(struct nfs_lock_owner *nlop, thread_t thd)
+{
+	struct nfsmount *nmp;
+	struct timespec ts = {2, 0};
+	int error = 0, slpflag;
+
+	nmp = nlop->nlo_open_owner->noo_mount;
+	if (!nmp)
+		return (ENXIO);
+	slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
+
+	lck_mtx_lock(&nlop->nlo_lock);
+	while (nlop->nlo_flags & NFS_LOCK_OWNER_BUSY) {
+		if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
+			break;
+		nlop->nlo_flags |= NFS_LOCK_OWNER_WANT;
+		msleep(nlop, &nlop->nlo_lock, slpflag, "nfs_lock_owner_set_busy", &ts);
+	}
+	if (!error)
+		nlop->nlo_flags |= NFS_LOCK_OWNER_BUSY;
+	lck_mtx_unlock(&nlop->nlo_lock);
+
+	return (error);
+}
+
+/*
+ * Clear the busy flag on a lock owner and wake up anyone waiting
+ * to mark it busy.
+ */
+void
+nfs_lock_owner_clear_busy(struct nfs_lock_owner *nlop)
+{
+	int wanted;
+
+	lck_mtx_lock(&nlop->nlo_lock);
+	if (!(nlop->nlo_flags & NFS_LOCK_OWNER_BUSY))
+		panic("nfs_lock_owner_clear_busy");
+	wanted = (nlop->nlo_flags & NFS_LOCK_OWNER_WANT);
+	nlop->nlo_flags &= ~(NFS_LOCK_OWNER_BUSY|NFS_LOCK_OWNER_WANT);
+	lck_mtx_unlock(&nlop->nlo_lock);
+	if (wanted)
+		wakeup(nlop);
+}
+
+/*
+ * Insert a held lock into a lock owner's sorted list.
+ * (flock locks are always inserted at the head the list)
+ */
+void
+nfs_lock_owner_insert_held_lock(struct nfs_lock_owner *nlop, struct nfs_file_lock *newnflp)
+{
+	struct nfs_file_lock *nflp;
+
+	/* insert new lock in lock owner's held lock list */
+	lck_mtx_lock(&nlop->nlo_lock);
+	if ((newnflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_FLOCK) {
+		TAILQ_INSERT_HEAD(&nlop->nlo_locks, newnflp, nfl_lolink);
+	} else {
+		TAILQ_FOREACH(nflp, &nlop->nlo_locks, nfl_lolink) {
+			if (newnflp->nfl_start < nflp->nfl_start)
+				break;
+		}
+		if (nflp)
+			TAILQ_INSERT_BEFORE(nflp, newnflp, nfl_lolink);
+		else
+			TAILQ_INSERT_TAIL(&nlop->nlo_locks, newnflp, nfl_lolink);
+	}
+	lck_mtx_unlock(&nlop->nlo_lock);
+}
+
+/*
+ * Get a file lock structure for this lock owner.
+ */
+struct nfs_file_lock *
+nfs_file_lock_alloc(struct nfs_lock_owner *nlop)
+{
+	struct nfs_file_lock *nflp = NULL;
+
+	lck_mtx_lock(&nlop->nlo_lock);
+	if (!nlop->nlo_alock.nfl_owner) {
+		nflp = &nlop->nlo_alock;
+		nflp->nfl_owner = nlop;
+	}
+	lck_mtx_unlock(&nlop->nlo_lock);
+	if (!nflp) {
+		MALLOC(nflp, struct nfs_file_lock *, sizeof(struct nfs_file_lock), M_TEMP, M_WAITOK);
+		if (!nflp)
+			return (NULL);
+		bzero(nflp, sizeof(*nflp));
+		nflp->nfl_flags |= NFS_FILE_LOCK_ALLOC;
+		nflp->nfl_owner = nlop;
+	}
+	nfs_lock_owner_ref(nlop);
+	return (nflp);
+}
+
+/*
+ * destroy the given NFS file lock structure
+ */
+void
+nfs_file_lock_destroy(struct nfs_file_lock *nflp)
+{
+	struct nfs_lock_owner *nlop = nflp->nfl_owner;
+
+	if (nflp->nfl_flags & NFS_FILE_LOCK_ALLOC) {
+		nflp->nfl_owner = NULL;
+		FREE(nflp, M_TEMP);
+	} else {
+		lck_mtx_lock(&nlop->nlo_lock);
+		bzero(nflp, sizeof(nflp));
+		lck_mtx_unlock(&nlop->nlo_lock);
+	}
+	nfs_lock_owner_rele(nlop);
+}
+
+/*
+ * Check if one file lock conflicts with another.
+ * (nflp1 is the new lock.  nflp2 is the existing lock.)
+ */
+int
+nfs_file_lock_conflict(struct nfs_file_lock *nflp1, struct nfs_file_lock *nflp2, int *willsplit)
+{
+	/* no conflict if lock is dead */
+	if ((nflp1->nfl_flags & NFS_FILE_LOCK_DEAD) || (nflp2->nfl_flags & NFS_FILE_LOCK_DEAD))
+		return (0);
+	/* no conflict if it's ours - unless the lock style doesn't match */
+	if ((nflp1->nfl_owner == nflp2->nfl_owner) &&
+	    ((nflp1->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == (nflp2->nfl_flags & NFS_FILE_LOCK_STYLE_MASK))) {
+		if (willsplit && (nflp1->nfl_type != nflp2->nfl_type) &&
+		    (nflp1->nfl_start > nflp2->nfl_start) &&
+		    (nflp1->nfl_end < nflp2->nfl_end))
+			*willsplit = 1;
+		return (0);
+	}
+	/* no conflict if ranges don't overlap */
+	if ((nflp1->nfl_start > nflp2->nfl_end) || (nflp1->nfl_end < nflp2->nfl_start))
+		return (0);
+	/* no conflict if neither lock is exclusive */
+	if ((nflp1->nfl_type != F_WRLCK) && (nflp2->nfl_type != F_WRLCK))
+		return (0);
+	/* conflict */
+	return (1);
+}
+
+/*
+ * Send an NFSv4 LOCK RPC to the server.
+ */
+int
+nfs4_lock_rpc(
+	nfsnode_t np,
+	struct nfs_open_file *nofp,
+	struct nfs_file_lock *nflp,
+	int reclaim,
+	thread_t thd,
+	kauth_cred_t cred)
+{
+	struct nfs_lock_owner *nlop = nflp->nfl_owner;
+	struct nfsmount *nmp;
+	struct nfsm_chain nmreq, nmrep;
+	uint64_t xid;
+	uint32_t locktype;
+	int error = 0, lockerror = ENOENT, newlocker, numops, status;
+
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+
+	newlocker = (nlop->nlo_stategenid != nmp->nm_stategenid);
+	locktype = (nflp->nfl_flags & NFS_FILE_LOCK_WAIT) ?
+			((nflp->nfl_type == F_WRLCK) ?
+				NFS_LOCK_TYPE_WRITEW :
+				NFS_LOCK_TYPE_READW) :
+			((nflp->nfl_type == F_WRLCK) ?
+				NFS_LOCK_TYPE_WRITE :
+				NFS_LOCK_TYPE_READ);
+	if (newlocker) {
+		error = nfs_open_file_set_busy(nofp, thd);
+		if (error)
+			return (error);
+		error = nfs_open_owner_set_busy(nofp->nof_owner, thd);
+		if (error) {
+			nfs_open_file_clear_busy(nofp);
+			return (error);
+		}
+		if (!nlop->nlo_open_owner) {
+			nfs_open_owner_ref(nofp->nof_owner);
+			nlop->nlo_open_owner = nofp->nof_owner;
+		}
+	}
+	error = nfs_lock_owner_set_busy(nlop, thd);
+	if (error) {
+		if (newlocker) {
+			nfs_open_owner_clear_busy(nofp->nof_owner);
+			nfs_open_file_clear_busy(nofp);
+		}
+		return (error);
+	}
+
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
+
+	// PUTFH, GETATTR, LOCK
+	numops = 3;
+	nfsm_chain_build_alloc_init(error, &nmreq, 33 * NFSX_UNSIGNED);
+	nfsm_chain_add_compound_header(error, &nmreq, "lock", numops);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
+		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_LOCK);
+	nfsm_chain_add_32(error, &nmreq, locktype);
+	nfsm_chain_add_32(error, &nmreq, reclaim);
+	nfsm_chain_add_64(error, &nmreq, nflp->nfl_start);
+	nfsm_chain_add_64(error, &nmreq, NFS_LOCK_LENGTH(nflp->nfl_start, nflp->nfl_end));
+	nfsm_chain_add_32(error, &nmreq, newlocker);
+	if (newlocker) {
+		nfsm_chain_add_32(error, &nmreq, nofp->nof_owner->noo_seqid);
+		nfsm_chain_add_stateid(error, &nmreq, &nofp->nof_stateid);
+		nfsm_chain_add_32(error, &nmreq, nlop->nlo_seqid);
+		nfsm_chain_add_lock_owner4(error, &nmreq, nmp, nlop);
+	} else {
+		nfsm_chain_add_stateid(error, &nmreq, &nlop->nlo_stateid);
+		nfsm_chain_add_32(error, &nmreq, nlop->nlo_seqid);
+	}
+	nfsm_chain_build_done(error, &nmreq);
+	nfsm_assert(error, (numops == 0), EPROTO);
+	nfsmout_if(error);
+
+	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, (reclaim ? R_RECOVER : 0), &nmrep, &xid, &status);
+
+	if ((lockerror = nfs_node_lock(np)))
+		error = lockerror;
+	nfsm_chain_skip_tag(error, &nmrep);
+	nfsm_chain_get_32(error, &nmrep, numops);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsmout_if(error);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, NULL, &xid);
+	nfsmout_if(error);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_LOCK);
+	nfs_owner_seqid_increment(newlocker ? nofp->nof_owner : NULL, nlop, error);
+	nfsm_chain_get_stateid(error, &nmrep, &nlop->nlo_stateid);
+
+	/* Update the lock owner's stategenid once it appears the server has state for it. */
+	/* We determine this by noting the request was successful (we got a stateid). */
+	if (newlocker && !error)
+		nlop->nlo_stategenid = nmp->nm_stategenid;
+nfsmout:
+	if (!lockerror)
+		nfs_node_unlock(np);
+	nfs_lock_owner_clear_busy(nlop);
+	if (newlocker) {
+		nfs_open_owner_clear_busy(nofp->nof_owner);
+		nfs_open_file_clear_busy(nofp);
+	}
+	nfsm_chain_cleanup(&nmreq);
+	nfsm_chain_cleanup(&nmrep);
+	return (error);
+}
+
+/*
+ * Send an NFSv4 LOCKU RPC to the server.
+ */
+int
+nfs4_unlock_rpc(
+	nfsnode_t np,
+	struct nfs_lock_owner *nlop,
+	int type,
+	uint64_t start,
+	uint64_t end,
+	vfs_context_t ctx)
+{
+	struct nfsmount *nmp;
+	struct nfsm_chain nmreq, nmrep;
+	uint64_t xid;
+	int error = 0, lockerror = ENOENT, numops, status;
+
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+
+	error = nfs_lock_owner_set_busy(nlop, vfs_context_thread(ctx));
+	if (error)
+		return (error);
+
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
+
+	// PUTFH, GETATTR, LOCKU
+	numops = 3;
+	nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED);
+	nfsm_chain_add_compound_header(error, &nmreq, "unlock", numops);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
+		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_LOCKU);
+	nfsm_chain_add_32(error, &nmreq, (type == F_WRLCK) ? NFS_LOCK_TYPE_WRITE : NFS_LOCK_TYPE_READ);
+	nfsm_chain_add_32(error, &nmreq, nlop->nlo_seqid);
+	nfsm_chain_add_stateid(error, &nmreq, &nlop->nlo_stateid);
+	nfsm_chain_add_64(error, &nmreq, start);
+	nfsm_chain_add_64(error, &nmreq, NFS_LOCK_LENGTH(start, end));
+	nfsm_chain_build_done(error, &nmreq);
+	nfsm_assert(error, (numops == 0), EPROTO);
+	nfsmout_if(error);
+
+	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
+
+	if ((lockerror = nfs_node_lock(np)))
+		error = lockerror;
+	nfsm_chain_skip_tag(error, &nmrep);
+	nfsm_chain_get_32(error, &nmrep, numops);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsmout_if(error);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, NULL, &xid);
+	nfsmout_if(error);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_LOCKU);
+	nfs_owner_seqid_increment(NULL, nlop, error);
+	nfsm_chain_get_stateid(error, &nmrep, &nlop->nlo_stateid);
+nfsmout:
+	if (!lockerror)
+		nfs_node_unlock(np);
+	nfs_lock_owner_clear_busy(nlop);
+	nfsm_chain_cleanup(&nmreq);
+	nfsm_chain_cleanup(&nmrep);
+	return (error);
+}
+
+/*
+ * Check for any conflicts with the given lock.
+ *
+ * Checking for a lock doesn't require the file to be opened.
+ * So we skip all the open owner, open file, lock owner work
+ * and just check for a conflicting lock.
+ */
+int
+nfs4_getlock(
+	nfsnode_t np,
+	struct nfs_lock_owner *nlop,
+	struct flock *fl,
+	uint64_t start,
+	uint64_t end,
+	vfs_context_t ctx)
+{
+	struct nfsmount *nmp;
+	struct nfs_file_lock *nflp;
+	struct nfsm_chain nmreq, nmrep;
+	uint64_t xid, val64 = 0;
+	uint32_t val = 0;
+	int error = 0, lockerror = ENOENT, numops, status;
+
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+
+	lck_mtx_lock(&np->n_openlock);
+	/* scan currently held locks for conflict */
+	TAILQ_FOREACH(nflp, &np->n_locks, nfl_link) {
+		if (nflp->nfl_flags & NFS_FILE_LOCK_BLOCKED)
+			continue;
+		if ((start <= nflp->nfl_end) && (end >= nflp->nfl_start) &&
+		    ((fl->l_type == F_WRLCK) || (nflp->nfl_type == F_WRLCK)))
+			break;
+	}
+	if (nflp) {
+		/* found a conflicting lock */
+		fl->l_type = nflp->nfl_type;
+		fl->l_pid = (nflp->nfl_flags & NFS_FILE_LOCK_STYLE_FLOCK) ? -1 : nflp->nfl_owner->nlo_pid;
+		fl->l_start = nflp->nfl_start;
+		fl->l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end);
+		fl->l_whence = SEEK_SET;
+	}
+	lck_mtx_unlock(&np->n_openlock);
+	if (nflp)
+		return (0);
+
+	/* no conflict found locally, so ask the server */
+
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
+
+	// PUTFH, GETATTR, LOCKT
+	numops = 3;
+	nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED);
+	nfsm_chain_add_compound_header(error, &nmreq, "locktest", numops);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
+		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_LOCKT);
+	nfsm_chain_add_32(error, &nmreq, (fl->l_type == F_WRLCK) ? NFS_LOCK_TYPE_WRITE : NFS_LOCK_TYPE_READ);
+	nfsm_chain_add_64(error, &nmreq, start);
+	nfsm_chain_add_64(error, &nmreq, NFS_LOCK_LENGTH(start, end));
+	nfsm_chain_add_lock_owner4(error, &nmreq, nmp, nlop);
+	nfsm_chain_build_done(error, &nmreq);
+	nfsm_assert(error, (numops == 0), EPROTO);
+	nfsmout_if(error);
+
+	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
+
+	if ((lockerror = nfs_node_lock(np)))
+		error = lockerror;
+	nfsm_chain_skip_tag(error, &nmrep);
+	nfsm_chain_get_32(error, &nmrep, numops);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsmout_if(error);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, NULL, &xid);
+	nfsmout_if(error);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_LOCKT);
+	if (error == NFSERR_DENIED) {
+		error = 0;
+		nfsm_chain_get_64(error, &nmrep, fl->l_start);
+		nfsm_chain_get_64(error, &nmrep, val64);
+		fl->l_len = (val64 == UINT64_MAX) ? 0 : val64;
+		nfsm_chain_get_32(error, &nmrep, val);
+		fl->l_type = (val == NFS_LOCK_TYPE_WRITE) ? F_WRLCK : F_RDLCK;
+		fl->l_pid = 0;
+		fl->l_whence = SEEK_SET;
+	} else if (!error) {
+		fl->l_type = F_UNLCK;
+	}
+nfsmout:
+	if (!lockerror)
+		nfs_node_unlock(np);
+	nfsm_chain_cleanup(&nmreq);
+	nfsm_chain_cleanup(&nmrep);
+	return (error);
+}
+
+/*
+ * Acquire a file lock for the given range.
+ *
+ * Add the lock (request) to the lock queue.
+ * Scan the lock queue for any conflicting locks.
+ * If a conflict is found, block or return an error.
+ * Once end of queue is reached, send request to the server.
+ * If the server grants the lock, scan the lock queue and
+ * update any existing locks.  Then (optionally) scan the
+ * queue again to coalesce any locks adjacent to the new one.
+ */
+int
+nfs4_setlock(
+	nfsnode_t np,
+	struct nfs_open_file *nofp,
+	struct nfs_lock_owner *nlop,
+	int op,
+	uint64_t start,
+	uint64_t end,
+	int style,
+	short type,
+	vfs_context_t ctx)
+{
+	struct nfsmount *nmp;
+	struct nfs_file_lock *newnflp, *nflp, *nflp2 = NULL, *nextnflp, *flocknflp = NULL;
+	struct nfs_file_lock *coalnflp;
+	int error = 0, error2, willsplit = 0, delay, slpflag, busy = 0, inuse = 0, restart, inqueue = 0;
+	struct timespec ts = {1, 0};
+
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+	slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
+
+	/* allocate a new lock */
+	newnflp = nfs_file_lock_alloc(nlop);
+	if (!newnflp)
+		return (ENOLCK);
+	newnflp->nfl_start = start;
+	newnflp->nfl_end = end;
+	newnflp->nfl_type = type;
+	if (op == F_SETLKW)
+		newnflp->nfl_flags |= NFS_FILE_LOCK_WAIT;
+	newnflp->nfl_flags |= style;
+	newnflp->nfl_flags |= NFS_FILE_LOCK_BLOCKED;
+
+	if ((style == NFS_FILE_LOCK_STYLE_FLOCK) && (type == F_WRLCK)) {
+		/*
+		 * For exclusive flock-style locks, if we block waiting for the
+		 * lock, we need to first release any currently held shared
+		 * flock-style lock.  So, the first thing we do is check if we
+		 * have a shared flock-style lock.
+		 */
+		nflp = TAILQ_FIRST(&nlop->nlo_locks);
+		if (nflp && ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != NFS_FILE_LOCK_STYLE_FLOCK))
+			nflp = NULL;
+		if (nflp && (nflp->nfl_type != F_RDLCK))
+			nflp = NULL;
+		flocknflp = nflp;
+	}
+
+restart:
+	restart = 0;
+	error = nfs_mount_state_in_use_start(nmp);
+	if (error)
+		goto error_out;
+	inuse = 1;
+	if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) {
+		nfs_mount_state_in_use_end(nmp, 0);
+		inuse = 0;
+		nfs4_reopen(nofp, vfs_context_thread(ctx));
+		goto restart;
+	}
+
+	lck_mtx_lock(&np->n_openlock);
+	if (!inqueue) {
+		/* insert new lock at beginning of list */
+		TAILQ_INSERT_HEAD(&np->n_locks, newnflp, nfl_link);
+		inqueue = 1;
+	}
+
+	/* scan current list of locks (held and pending) for conflicts */
+	for (nflp = TAILQ_NEXT(newnflp, nfl_link); nflp; nflp = TAILQ_NEXT(nflp, nfl_link)) {
+		if (!nfs_file_lock_conflict(newnflp, nflp, &willsplit))
+			continue;
+		/* Conflict */
+		if (!(newnflp->nfl_flags & NFS_FILE_LOCK_WAIT)) {
+			error = EAGAIN;
+			break;
+		}
+		/* Block until this lock is no longer held. */
+		if (nflp->nfl_blockcnt == UINT_MAX) {
+			error = ENOLCK;
+			break;
+		}
+		nflp->nfl_blockcnt++;
+		do {
+			if (flocknflp) {
+				/* release any currently held shared lock before sleeping */
+				lck_mtx_unlock(&np->n_openlock);
+				nfs_mount_state_in_use_end(nmp, 0);
+				inuse = 0;
+				error = nfs4_unlock(np, nofp, nlop, 0, UINT64_MAX, NFS_FILE_LOCK_STYLE_FLOCK, ctx);
+				flocknflp = NULL;
+				if (!error)
+					error = nfs_mount_state_in_use_start(nmp);
+				if (error) {
+					lck_mtx_lock(&np->n_openlock);
+					break;
+				}
+				inuse = 1;
+				lck_mtx_lock(&np->n_openlock);
+				/* no need to block/sleep if the conflict is gone */
+				if (!nfs_file_lock_conflict(newnflp, nflp, NULL))
+					break;
+			}
+			msleep(nflp, &np->n_openlock, slpflag, "nfs4_setlock_blocked", &ts);
+			error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0);
+			if (!error && (nmp->nm_state & NFSSTA_RECOVER)) {
+				/* looks like we have a recover pending... restart */
+				restart = 1;
+				lck_mtx_unlock(&np->n_openlock);
+				nfs_mount_state_in_use_end(nmp, 0);
+				inuse = 0;
+				lck_mtx_lock(&np->n_openlock);
+				break;
+			}
+		} while (!error && nfs_file_lock_conflict(newnflp, nflp, NULL));
+		nflp->nfl_blockcnt--;
+		if ((nflp->nfl_flags & NFS_FILE_LOCK_DEAD) && !nflp->nfl_blockcnt) {
+			TAILQ_REMOVE(&np->n_locks, nflp, nfl_link);
+			nfs_file_lock_destroy(nflp);
+		}
+		if (error || restart)
+			break;
+	}
+	lck_mtx_unlock(&np->n_openlock);
+	if (restart)
+		goto restart;
+	if (error)
+		goto error_out;
+
+	if (willsplit) {
+		/*
+		 * It looks like this operation is splitting a lock.
+		 * We allocate a new lock now so we don't have to worry
+		 * about the allocation failing after we've updated some state.
+		 */
+		nflp2 = nfs_file_lock_alloc(nlop);
+		if (!nflp2) {
+			error = ENOLCK;
+			goto error_out;
+		}
+	}
+
+	/* once scan for local conflicts is clear, send request to server */
+	if ((error = nfs_open_state_set_busy(np, ctx)))
+		goto error_out;
+	busy = 1;
+	delay = 0;
+	do {
+		error = nfs4_lock_rpc(np, nofp, newnflp, 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
+		if (!error || ((error != NFSERR_DENIED) && (error != NFSERR_GRACE)))
+			break;
+		/* request was denied due to either conflict or grace period */
+		if ((error != NFSERR_GRACE) && !(newnflp->nfl_flags & NFS_FILE_LOCK_WAIT)) {
+			error = EAGAIN;
+			break;
+		}
+		if (flocknflp) {
+			/* release any currently held shared lock before sleeping */
+			nfs_open_state_clear_busy(np);
+			busy = 0;
+			nfs_mount_state_in_use_end(nmp, 0);
+			inuse = 0;
+			error2 = nfs4_unlock(np, nofp, nlop, 0, UINT64_MAX, NFS_FILE_LOCK_STYLE_FLOCK, ctx);
+			flocknflp = NULL;
+			if (!error2)
+				error2 = nfs_mount_state_in_use_start(nmp);
+			if (!error2) {
+				inuse = 1;
+				error2 = nfs_open_state_set_busy(np, ctx);
+			}
+			if (error2) {
+				error = error2;
+				break;
+			}
+			busy = 1;
+		}
+		/* wait a little bit and send the request again */
+		if (error == NFSERR_GRACE)
+			delay = 4;
+		if (delay < 4)
+			delay++;
+		tsleep(newnflp, slpflag, "nfs4_setlock_delay", delay * (hz/2));
+		error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0);
+		if (!error && (nmp->nm_state & NFSSTA_RECOVER)) {
+			/* looks like we have a recover pending... restart */
+			nfs_open_state_clear_busy(np);
+			busy = 0;
+			nfs_mount_state_in_use_end(nmp, 0);
+			inuse = 0;
+			goto restart;
+		}
+	} while (!error);
+
+error_out:
+	if (nfs_mount_state_error_should_restart(error)) {
+		/* looks like we need to restart this operation */
+		if (busy) {
+			nfs_open_state_clear_busy(np);
+			busy = 0;
+		}
+		if (inuse) {
+			nfs_mount_state_in_use_end(nmp, error);
+			inuse = 0;
+		}
+		goto restart;
+	}
+	lck_mtx_lock(&np->n_openlock);
+	newnflp->nfl_flags &= ~NFS_FILE_LOCK_BLOCKED;
+	if (error) {
+		newnflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
+		if (newnflp->nfl_blockcnt) {
+			/* wake up anyone blocked on this lock */
+			wakeup(newnflp);
+		} else {
+			/* remove newnflp from lock list and destroy */
+			TAILQ_REMOVE(&np->n_locks, newnflp, nfl_link);
+			nfs_file_lock_destroy(newnflp);
+		}
+		lck_mtx_unlock(&np->n_openlock);
+		if (busy)
+			nfs_open_state_clear_busy(np);
+		if (inuse)
+			nfs_mount_state_in_use_end(nmp, error);
+		if (nflp2)
+			nfs_file_lock_destroy(nflp2);
+		return (error);
+	}
+
+	/* server granted the lock */
+
+	/*
+	 * Scan for locks to update.
+	 *
+	 * Locks completely covered are killed.
+	 * At most two locks may need to be clipped.
+	 * It's possible that a single lock may need to be split.
+	 */
+	TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) {
+		if (nflp == newnflp)
+			continue;
+		if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD))
+			continue;
+		if (nflp->nfl_owner != nlop)
+			continue;
+		if ((newnflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != (nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK))
+			continue;
+		if ((newnflp->nfl_start > nflp->nfl_end) || (newnflp->nfl_end < nflp->nfl_start))
+			continue;
+		/* here's one to update */
+		if ((newnflp->nfl_start <= nflp->nfl_start) && (newnflp->nfl_end >= nflp->nfl_end)) {
+			/* The entire lock is being replaced. */
+			nflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
+			lck_mtx_lock(&nlop->nlo_lock);
+			TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink);
+			lck_mtx_unlock(&nlop->nlo_lock);
+			/* lock will be destroyed below, if no waiters */
+		} else if ((newnflp->nfl_start > nflp->nfl_start) && (newnflp->nfl_end < nflp->nfl_end)) {
+			/* We're replacing a range in the middle of a lock. */
+			/* The current lock will be split into two locks. */
+			/* Update locks and insert new lock after current lock. */
+			nflp2->nfl_flags |= (nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK);
+			nflp2->nfl_type = nflp->nfl_type;
+			nflp2->nfl_start = newnflp->nfl_end + 1;
+			nflp2->nfl_end = nflp->nfl_end;
+			nflp->nfl_end = newnflp->nfl_start - 1;
+			TAILQ_INSERT_AFTER(&np->n_locks, nflp, nflp2, nfl_link);
+			nfs_lock_owner_insert_held_lock(nlop, nflp2);
+			nextnflp = nflp2;
+			nflp2 = NULL;
+		} else if (newnflp->nfl_start > nflp->nfl_start) {
+			/* We're replacing the end of a lock. */
+			nflp->nfl_end = newnflp->nfl_start - 1;
+		} else if (newnflp->nfl_end < nflp->nfl_end) {
+			/* We're replacing the start of a lock. */
+			nflp->nfl_start = newnflp->nfl_end + 1;
+		}
+		if (nflp->nfl_blockcnt) {
+			/* wake up anyone blocked on this lock */
+			wakeup(nflp);
+		} else if (nflp->nfl_flags & NFS_FILE_LOCK_DEAD) {
+			/* remove nflp from lock list and destroy */
+			TAILQ_REMOVE(&np->n_locks, nflp, nfl_link);
+			nfs_file_lock_destroy(nflp);
+		}
+	}
+
+	nfs_lock_owner_insert_held_lock(nlop, newnflp);
+
+	/*
+	 * POSIX locks should be coalesced when possible.
+	 */
+	if ((style == NFS_FILE_LOCK_STYLE_POSIX) && (nofp->nof_flags & NFS_OPEN_FILE_POSIXLOCK)) {
+		/*
+		 * Walk through the lock queue and check each of our held locks with
+		 * the previous and next locks in the lock owner's "held lock list".
+		 * If the two locks can be coalesced, we merge the current lock into
+		 * the other (previous or next) lock.  Merging this way makes sure that
+		 * lock ranges are always merged forward in the lock queue.  This is
+		 * important because anyone blocked on the lock being "merged away"
+		 * will still need to block on that range and it will simply continue
+		 * checking locks that are further down the list.
+		 */
+		TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) {
+			if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD))
+				continue;
+			if (nflp->nfl_owner != nlop)
+				continue;
+			if ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != NFS_FILE_LOCK_STYLE_POSIX)
+				continue;
+			if (((coalnflp = TAILQ_PREV(nflp, nfs_file_lock_queue, nfl_lolink))) &&
+			    ((coalnflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX) &&
+			    (coalnflp->nfl_type == nflp->nfl_type) &&
+			    (coalnflp->nfl_end == (nflp->nfl_start - 1))) {
+				coalnflp->nfl_end = nflp->nfl_end;
+				nflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
+				lck_mtx_lock(&nlop->nlo_lock);
+				TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink);
+				lck_mtx_unlock(&nlop->nlo_lock);
+			} else if (((coalnflp = TAILQ_NEXT(nflp, nfl_lolink))) &&
+			    ((coalnflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX) &&
+			    (coalnflp->nfl_type == nflp->nfl_type) &&
+			    (coalnflp->nfl_start == (nflp->nfl_end + 1))) {
+				coalnflp->nfl_start = nflp->nfl_start;
+				nflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
+				lck_mtx_lock(&nlop->nlo_lock);
+				TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink);
+				lck_mtx_unlock(&nlop->nlo_lock);
+			}
+			if (!(nflp->nfl_flags & NFS_FILE_LOCK_DEAD))
+				continue;
+			if (nflp->nfl_blockcnt) {
+				/* wake up anyone blocked on this lock */
+				wakeup(nflp);
+			} else {
+				/* remove nflp from lock list and destroy */
+				TAILQ_REMOVE(&np->n_locks, nflp, nfl_link);
+				nfs_file_lock_destroy(nflp);
+			}
+		}
+	}
+
+	lck_mtx_unlock(&np->n_openlock);
+	nfs_open_state_clear_busy(np);
+	nfs_mount_state_in_use_end(nmp, error);
+
+	if (nflp2)
+		nfs_file_lock_destroy(nflp2);
+	return (error);
+}
+
+int
+nfs4_unlock(
+	nfsnode_t np,
+	struct nfs_open_file *nofp,
+	struct nfs_lock_owner *nlop,
+	uint64_t start,
+	uint64_t end,
+	int style,
+	vfs_context_t ctx)
+{
+	struct nfsmount *nmp;
+	struct nfs_file_lock *nflp, *nextnflp, *newnflp = NULL;
+	int error = 0, willsplit = 0, send_unlock_rpcs = 1;
+
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+
+restart:
+	if ((error = nfs_mount_state_in_use_start(nmp)))
+		return (error);
+	if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) {
+		nfs_mount_state_in_use_end(nmp, 0);
+		nfs4_reopen(nofp, vfs_context_thread(ctx));
+		goto restart;
+	}
+	if ((error = nfs_open_state_set_busy(np, ctx))) {
+		nfs_mount_state_in_use_end(nmp, error);
+		return (error);
+	}
+
+	lck_mtx_lock(&np->n_openlock);
+	if ((start > 0) && (end < UINT64_MAX) && !willsplit) {
+		/*
+		 * We may need to allocate a new lock if an existing lock gets split.
+		 * So, we first scan the list to check for a split, and if there's
+		 * going to be one, we'll allocate one now.
+		 */
+		TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) {
+			if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD))
+				continue;
+			if (nflp->nfl_owner != nlop)
+				continue;
+			if ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != style)
+				continue;
+			if ((start > nflp->nfl_end) || (end < nflp->nfl_start))
+				continue;
+			if ((start > nflp->nfl_start) && (end < nflp->nfl_end)) {
+				willsplit = 1;
+				break;
+			}
+		}
+		if (willsplit) {
+			lck_mtx_unlock(&np->n_openlock);
+			nfs_open_state_clear_busy(np);
+			nfs_mount_state_in_use_end(nmp, 0);
+			newnflp = nfs_file_lock_alloc(nlop);
+			if (!newnflp)
+				return (ENOMEM);
+			goto restart;
+		}
+	}
+
+	/*
+	 * Free all of our locks in the given range.
+	 *
+	 * Note that this process requires sending requests to the server.
+	 * Because of this, we will release the n_openlock while performing 
+	 * the unlock RPCs.  The N_OPENBUSY state keeps the state of *held*
+	 * locks from changing underneath us.  However, other entries in the
+	 * list may be removed.  So we need to be careful walking the list.
+	 */
+
+	/*
+	 * Don't unlock ranges that are held by other-style locks.
+	 * If style is posix, don't send any unlock rpcs if flock is held.
+	 * If we unlock an flock, don't send unlock rpcs for any posix-style
+	 * ranges held - instead send unlocks for the ranges not held.
+	 */
+	if ((style == NFS_FILE_LOCK_STYLE_POSIX) &&
+	    ((nflp = TAILQ_FIRST(&nlop->nlo_locks))) &&
+	    ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_FLOCK))
+		send_unlock_rpcs = 0;
+	if ((style == NFS_FILE_LOCK_STYLE_FLOCK) &&
+	    ((nflp = TAILQ_FIRST(&nlop->nlo_locks))) &&
+	    ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_FLOCK) &&
+	    ((nflp = TAILQ_NEXT(nflp, nfl_lolink))) &&
+	    ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX)) {
+		uint64_t s = 0;
+		int type = TAILQ_FIRST(&nlop->nlo_locks)->nfl_type;
+		while (nflp) {
+			if ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX) {
+				/* unlock the range preceding this lock */
+				lck_mtx_unlock(&np->n_openlock);
+				error = nfs4_unlock_rpc(np, nlop, type, s, nflp->nfl_start-1, ctx);
+				if (nfs_mount_state_error_should_restart(error)) {
+					nfs_open_state_clear_busy(np);
+					nfs_mount_state_in_use_end(nmp, error);
+					goto restart;
+				}
+				lck_mtx_lock(&np->n_openlock);
+				if (error)
+					goto out;
+				s = nflp->nfl_end+1;
+			}
+			nflp = TAILQ_NEXT(nflp, nfl_lolink);
+		}
+		lck_mtx_unlock(&np->n_openlock);
+		error = nfs4_unlock_rpc(np, nlop, type, s, end, ctx);
+		if (nfs_mount_state_error_should_restart(error)) {
+			nfs_open_state_clear_busy(np);
+			nfs_mount_state_in_use_end(nmp, error);
+			goto restart;
+		}
+		lck_mtx_lock(&np->n_openlock);
+		if (error)
+			goto out;
+		send_unlock_rpcs = 0;
+	}
+
+	TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) {
+		if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD))
+			continue;
+		if (nflp->nfl_owner != nlop)
+			continue;
+		if ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != style)
+			continue;
+		if ((start > nflp->nfl_end) || (end < nflp->nfl_start))
+			continue;
+		/* here's one to unlock */
+		if ((start <= nflp->nfl_start) && (end >= nflp->nfl_end)) {
+			/* The entire lock is being unlocked. */
+			if (send_unlock_rpcs) {
+				lck_mtx_unlock(&np->n_openlock);
+				error = nfs4_unlock_rpc(np, nlop, nflp->nfl_type, nflp->nfl_start, nflp->nfl_end, ctx);
+				if (nfs_mount_state_error_should_restart(error)) {
+					nfs_open_state_clear_busy(np);
+					nfs_mount_state_in_use_end(nmp, error);
+					goto restart;
+				}
+				lck_mtx_lock(&np->n_openlock);
+			}
+			nextnflp = TAILQ_NEXT(nflp, nfl_link);
+			if (error)
+				break;
+			nflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
+			lck_mtx_lock(&nlop->nlo_lock);
+			TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink);
+			lck_mtx_unlock(&nlop->nlo_lock);
+			/* lock will be destroyed below, if no waiters */
+		} else if ((start > nflp->nfl_start) && (end < nflp->nfl_end)) {
+			/* We're unlocking a range in the middle of a lock. */
+			/* The current lock will be split into two locks. */
+			if (send_unlock_rpcs) {
+				lck_mtx_unlock(&np->n_openlock);
+				error = nfs4_unlock_rpc(np, nlop, nflp->nfl_type, start, end, ctx);
+				if (nfs_mount_state_error_should_restart(error)) {
+					nfs_open_state_clear_busy(np);
+					nfs_mount_state_in_use_end(nmp, error);
+					goto restart;
+				}
+				lck_mtx_lock(&np->n_openlock);
+			}
+			if (error)
+				break;
+			/* update locks and insert new lock after current lock */
+			newnflp->nfl_flags |= (nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK);
+			newnflp->nfl_type = nflp->nfl_type;
+			newnflp->nfl_start = end + 1;
+			newnflp->nfl_end = nflp->nfl_end;
+			nflp->nfl_end = start - 1;
+			TAILQ_INSERT_AFTER(&np->n_locks, nflp, newnflp, nfl_link);
+			nfs_lock_owner_insert_held_lock(nlop, newnflp);
+			nextnflp = newnflp;
+			newnflp = NULL;
+		} else if (start > nflp->nfl_start) {
+			/* We're unlocking the end of a lock. */
+			if (send_unlock_rpcs) {
+				lck_mtx_unlock(&np->n_openlock);
+				error = nfs4_unlock_rpc(np, nlop, nflp->nfl_type, start, nflp->nfl_end, ctx);
+				if (nfs_mount_state_error_should_restart(error)) {
+					nfs_open_state_clear_busy(np);
+					nfs_mount_state_in_use_end(nmp, error);
+					goto restart;
+				}
+				lck_mtx_lock(&np->n_openlock);
+			}
+			nextnflp = TAILQ_NEXT(nflp, nfl_link);
+			if (error)
+				break;
+			nflp->nfl_end = start - 1;
+		} else if (end < nflp->nfl_end) {
+			/* We're unlocking the start of a lock. */
+			if (send_unlock_rpcs) {
+				lck_mtx_unlock(&np->n_openlock);
+				error = nfs4_unlock_rpc(np, nlop, nflp->nfl_type, nflp->nfl_start, end, ctx);
+				if (nfs_mount_state_error_should_restart(error)) {
+					nfs_open_state_clear_busy(np);
+					nfs_mount_state_in_use_end(nmp, error);
+					goto restart;
+				}
+				lck_mtx_lock(&np->n_openlock);
+			}
+			nextnflp = TAILQ_NEXT(nflp, nfl_link);
+			if (error)
+				break;
+			nflp->nfl_start = end + 1;
+		}
+		if (nflp->nfl_blockcnt) {
+			/* wake up anyone blocked on this lock */
+			wakeup(nflp);
+		} else if (nflp->nfl_flags & NFS_FILE_LOCK_DEAD) {
+			/* remove nflp from lock list and destroy */
+			TAILQ_REMOVE(&np->n_locks, nflp, nfl_link);
+			nfs_file_lock_destroy(nflp);
+		}
+	}
+out:
+	lck_mtx_unlock(&np->n_openlock);
+	nfs_open_state_clear_busy(np);
+	nfs_mount_state_in_use_end(nmp, 0);
+
+	if (newnflp)
+		nfs_file_lock_destroy(newnflp);
+	return (error);
+}
+
+/*
+ * NFSv4 advisory file locking
+ */
+int
+nfs4_vnop_advlock(
+	struct vnop_advlock_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_vp;
+		caddr_t a_id;
+		int a_op;
+		struct flock *a_fl;
+		int a_flags;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	vnode_t vp = ap->a_vp;
+	nfsnode_t np = VTONFS(ap->a_vp);
+	struct flock *fl = ap->a_fl;
+	int op = ap->a_op;
+	int flags = ap->a_flags;
+	vfs_context_t ctx = ap->a_context;
+	struct nfsmount *nmp;
+	struct nfs_vattr nvattr;
+	struct nfs_open_owner *noop = NULL;
+	struct nfs_open_file *nofp = NULL;
+	struct nfs_lock_owner *nlop = NULL;
+	off_t lstart;
+	uint64_t start, end;
+	int error = 0, modified, style;
+#define OFF_MAX QUAD_MAX
+
+	nmp = VTONMP(ap->a_vp);
+	if (!nmp)
+		return (ENXIO);
+
+	switch (fl->l_whence) {
+	case SEEK_SET:
+	case SEEK_CUR:
+		/*
+		 * Caller is responsible for adding any necessary offset
+		 * to fl->l_start when SEEK_CUR is used.
+		 */
+		lstart = fl->l_start;
+		break;
+	case SEEK_END:
+		/* need to flush, and refetch attributes to make */
+		/* sure we have the correct end of file offset   */
+		if ((error = nfs_node_lock(np)))
+			return (error);
+		modified = (np->n_flag & NMODIFIED);
+		nfs_node_unlock(np);
+		if (modified && ((error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1))))
+			return (error);
+		if ((error = nfs_getattr(np, &nvattr, ctx, NGA_UNCACHED)))
+			return (error);
+		nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
+		if ((np->n_size > OFF_MAX) ||
+		    ((fl->l_start > 0) && (np->n_size > (u_quad_t)(OFF_MAX - fl->l_start))))
+			error = EOVERFLOW;
+		lstart = np->n_size + fl->l_start;
+		nfs_data_unlock(np);
+		if (error)
+			return (error);
+		break;
+	default:
+		return (EINVAL);
+	}
+	if (lstart < 0)
+		return (EINVAL);
+	start = lstart;
+	if (fl->l_len == 0) {
+		end = UINT64_MAX;
+	} else if (fl->l_len > 0) {
+		if ((fl->l_len - 1) > (OFF_MAX - lstart))
+			return (EOVERFLOW);
+		end = start - 1 + fl->l_len;
+	} else { /* l_len is negative */
+		if ((lstart + fl->l_len) < 0)
+			return (EINVAL);
+		end = start - 1;
+		start += fl->l_len;
+	}
+	if (error)
+		return (error);
+
+	style = (flags & F_FLOCK) ? NFS_FILE_LOCK_STYLE_FLOCK : NFS_FILE_LOCK_STYLE_POSIX;
+	if ((style == NFS_FILE_LOCK_STYLE_FLOCK) && ((start != 0) || (end != UINT64_MAX)))
+		return (EINVAL);
+
+	/* find the lock owner, alloc if not unlock */
+	nlop = nfs_lock_owner_find(np, vfs_context_proc(ctx), (op != F_UNLCK));
+	if (!nlop) {
+		error = (op == F_UNLCK) ? 0 : ENOMEM;
+		if (error)
+			printf("nfs4_vnop_advlock: no lock owner %d\n", error);
+		goto out;
+	}
+
+	if (op == F_GETLK) {
+		error = nfs4_getlock(np, nlop, fl, start, end, ctx);
+	} else {
+		/* find the open owner */
+		noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 0);
+		if (!noop) {
+			printf("nfs4_vnop_advlock: no open owner\n");
+			error = EPERM;
+			goto out;
+		}
+		/* find the open file */
+restart:
+		error = nfs_open_file_find(np, noop, &nofp, 0, 0, 0);
+		if (error)
+			error = EBADF;
+		if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
+			printf("nfs_vnop_advlock: LOST\n");
+			error = EIO;
+		}
+		if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
+			nfs4_reopen(nofp, vfs_context_thread(ctx));
+			nofp = NULL;
+			goto restart;
+		}
+		if (error) {
+			printf("nfs4_vnop_advlock: no open file %d\n", error);
+			goto out;
+		}
+		if (op == F_UNLCK) {
+			error = nfs4_unlock(np, nofp, nlop, start, end, style, ctx);
+		} else if ((op == F_SETLK) || (op == F_SETLKW)) {
+			if ((op == F_SETLK) && (flags & F_WAIT))
+				op = F_SETLKW;
+			error = nfs4_setlock(np, nofp, nlop, op, start, end, style, fl->l_type, ctx);
+		} else {
+			/* not getlk, unlock or lock? */
+			error = EINVAL;
+		}
+	}
+
+out:
+	if (nlop)
+		nfs_lock_owner_rele(nlop);
+	if (noop)
+		nfs_open_owner_rele(noop);
+	return (error);
+}
+
+/*
+ * Check if an open owner holds any locks on a file.
+ */
+int
+nfs4_check_for_locks(struct nfs_open_owner *noop, struct nfs_open_file *nofp)
+{
+	struct nfs_lock_owner *nlop;
+
+	TAILQ_FOREACH(nlop, &nofp->nof_np->n_lock_owners, nlo_link) {
+		if (nlop->nlo_open_owner != noop)
+			continue;
+		if (!TAILQ_EMPTY(&nlop->nlo_locks))
+			break;
+	}
+	return (nlop ? 1 : 0);
+}
+
+/*
+ * Reopen simple (no deny, no locks) open state that was lost.
+ */
+void
+nfs4_reopen(struct nfs_open_file *nofp, thread_t thd)
+{
+	struct nfs_open_owner *noop = nofp->nof_owner;
+	struct nfsmount *nmp = NFSTONMP(nofp->nof_np);
+	vnode_t vp = NFSTOV(nofp->nof_np);
+	vnode_t dvp = NULL;
+	struct componentname cn;
+	const char *vname = NULL;
+	size_t namelen;
+	char smallname[128];
+	char *filename = NULL;
+	int error = 0, done = 0, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
+	struct timespec ts = { 1, 0 };
+
+	lck_mtx_lock(&nofp->nof_lock);
+	while (nofp->nof_flags & NFS_OPEN_FILE_REOPENING) {
+		if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
+			break;
+		msleep(&nofp->nof_flags, &nofp->nof_lock, slpflag|(PZERO-1), "nfsreopenwait", &ts);
+	}
+	if (!(nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
+		lck_mtx_unlock(&nofp->nof_lock);
+		return;
+	}
+	nofp->nof_flags |= NFS_OPEN_FILE_REOPENING;
+	lck_mtx_unlock(&nofp->nof_lock);
+
+	dvp = vnode_getparent(vp);
+	vname = vnode_getname(vp);
+	if (!dvp || !vname) {
+		error = EIO;
+		goto out;
+	}
+	filename = &smallname[0];
+	namelen = snprintf(filename, sizeof(smallname), "%s", vname);
+	if (namelen >= sizeof(smallname)) {
+		namelen++;  /* snprintf result doesn't include '\0' */
+		MALLOC(filename, char *, namelen, M_TEMP, M_WAITOK);
+		if (!filename) {
+			error = ENOMEM;
+			goto out;
+		}
+		snprintf(filename, namelen, "%s", vname);
+	}
+	bzero(&cn, sizeof(cn));
+	cn.cn_nameptr = filename;
+	cn.cn_namelen = namelen;
+
+restart:
+	done = 0;
+	if ((error = nfs_mount_state_in_use_start(nmp)))
+		goto out;
+
+	if (nofp->nof_rw)
+		error = nfs4_open_reopen_rpc(nofp, thd, noop->noo_cred, &cn, dvp, &vp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE);
+	if (!error && nofp->nof_w)
+		error = nfs4_open_reopen_rpc(nofp, thd, noop->noo_cred, &cn, dvp, &vp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE);
+	if (!error && nofp->nof_r)
+		error = nfs4_open_reopen_rpc(nofp, thd, noop->noo_cred, &cn, dvp, &vp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE);
+
+	if (nfs_mount_state_in_use_end(nmp, error)) {
+		if (error == NFSERR_GRACE)
+			goto restart;
+		error = 0;
+		goto out;
+	}
+	done = 1;
+out:
+	lck_mtx_lock(&nofp->nof_lock);
+	nofp->nof_flags &= ~NFS_OPEN_FILE_REOPENING;
+	if (error)
+		nofp->nof_flags |= NFS_OPEN_FILE_LOST;
+	if (done)
+		nofp->nof_flags &= ~NFS_OPEN_FILE_REOPEN;
+	else
+		printf("nfs4_reopen: failed, error %d, lost %d\n", error, (nofp->nof_flags & NFS_OPEN_FILE_LOST) ? 1 : 0);
+	lck_mtx_unlock(&nofp->nof_lock);
+	if (filename && (filename != &smallname[0]))
+		FREE(filename, M_TEMP);
+	if (vname)
+		vnode_putname(vname);
+	if (dvp != NULLVP)
+		vnode_put(dvp);
+}
+
+/*
+ * Send a normal OPEN RPC to open/create a file.
+ */
+int
+nfs4_open_rpc(
+	struct nfs_open_file *nofp,
+	vfs_context_t ctx,
+	struct componentname *cnp,
+	struct vnode_attr *vap,
+	vnode_t dvp,
+	vnode_t *vpp,
+	int create,
+	int share_access,
+	int share_deny)
+{
+	return (nfs4_open_rpc_internal(nofp, ctx, vfs_context_thread(ctx), vfs_context_ucred(ctx),
+					cnp, vap, dvp, vpp, create, share_access, share_deny));
+}
+
+/*
+ * Send an OPEN RPC to reopen a file.
+ */
+int
+nfs4_open_reopen_rpc(
+	struct nfs_open_file *nofp,
+	thread_t thd,
+	kauth_cred_t cred,
+	struct componentname *cnp,
+	vnode_t dvp,
+	vnode_t *vpp,
+	int share_access,
+	int share_deny)
+{
+	return (nfs4_open_rpc_internal(nofp, NULL, thd, cred, cnp, NULL, dvp, vpp, 0, share_access, share_deny));
+}
+
+/*
+ * common OPEN RPC code
+ *
+ * If create is set, ctx must be passed in.
+ */
+int
+nfs4_open_rpc_internal(
+	struct nfs_open_file *nofp,
+	vfs_context_t ctx,
+	thread_t thd,
+	kauth_cred_t cred,
+	struct componentname *cnp,
+	struct vnode_attr *vap,
+	vnode_t dvp,
+	vnode_t *vpp,
+	int create,
+	int share_access,
+	int share_deny)
+{
+	struct nfsmount *nmp;
+	struct nfs_open_owner *noop = nofp->nof_owner;
+	struct nfs_vattr nvattr, dnvattr;
+	int error = 0, open_error = EIO, lockerror = ENOENT, busyerror = ENOENT, status;
+	int nfsvers, numops, exclusive = 0, gotuid, gotgid;
+	u_int64_t xid, savedxid = 0;
+	nfsnode_t dnp = VTONFS(dvp);
+	nfsnode_t np, newnp = NULL;
+	vnode_t newvp = NULL;
+	struct nfsm_chain nmreq, nmrep;
+	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
+	uint32_t rflags, delegation = 0, recall = 0, val;
+	struct nfs_stateid stateid, dstateid, *sid;
+	fhandle_t fh;
+	struct nfsreq *req = NULL;
+	struct nfs_dulookup dul;
+
+	if (create && !ctx)
+		return (EINVAL);
+
+	nmp = VTONMP(dvp);
+	if (!nmp)
+		return (ENXIO);
+	nfsvers = nmp->nm_vers;
+
+	np = *vpp ? VTONFS(*vpp) : NULL;
+	if (create && vap) {
+		exclusive = (vap->va_vaflags & VA_EXCLUSIVE);
+		nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
+		gotuid = VATTR_IS_ACTIVE(vap, va_uid);
+		gotgid = VATTR_IS_ACTIVE(vap, va_gid);
+	} else {
+		exclusive = gotuid = gotgid = 0;
+	}
+	if (nofp) {
+		sid = &nofp->nof_stateid;
+	} else {
+		stateid.seqid = stateid.other[0] = stateid.other[1] = stateid.other[2] = 0;
+		sid = &stateid;
+	}
+
+	if ((error = nfs_open_owner_set_busy(noop, thd)))
+		return (error);
+again:
+	rflags = 0;
+
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
+
+	// PUTFH, SAVEFH, OPEN(CREATE?), GETATTR(FH), RESTOREFH, GETATTR
+	numops = 6;
+	nfsm_chain_build_alloc_init(error, &nmreq, 53 * NFSX_UNSIGNED + cnp->cn_namelen);
+	nfsm_chain_add_compound_header(error, &nmreq, create ? "create" : "open", numops);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN);
+	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
+	nfsm_chain_add_32(error, &nmreq, share_access);
+	nfsm_chain_add_32(error, &nmreq, share_deny);
+
+	// open owner: clientid + uid
+	nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid); // open_owner4.clientid
+	nfsm_chain_add_32(error, &nmreq, NFSX_UNSIGNED);
+	nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(noop->noo_cred)); // open_owner4.owner
+
+	// openflag4
+	nfsm_chain_add_32(error, &nmreq, create);
+	if (create) {
+		if (exclusive) {
+			static uint32_t create_verf; // XXX need a better verifier
+			create_verf++;
+			nfsm_chain_add_32(error, &nmreq, NFS_CREATE_EXCLUSIVE);
+			/* insert 64 bit verifier */
+			nfsm_chain_add_32(error, &nmreq, create_verf);
+			nfsm_chain_add_32(error, &nmreq, create_verf);
+		} else {
+			nfsm_chain_add_32(error, &nmreq, NFS_CREATE_UNCHECKED);
+			nfsm_chain_add_fattr4(error, &nmreq, vap, nmp);
+		}
+	}
+
+	// open_claim4
+	nfsm_chain_add_32(error, &nmreq, NFS_CLAIM_NULL);
+	nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
+	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
+	nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
+		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
+		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_build_done(error, &nmreq);
+	nfsm_assert(error, (numops == 0), EPROTO);
+	if (!error)
+		error = busyerror = nfs_node_set_busy(dnp, thd);
+	nfsmout_if(error);
+
+	if (create)
+		nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
+
+	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, NULL, &req);
+	if (!error) {
+		if (create)
+			nfs_dulookup_start(&dul, dnp, ctx);
+		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+		savedxid = xid;
+	}
+
+	if (create)
+		nfs_dulookup_finish(&dul, dnp, ctx);
+
+	if ((lockerror = nfs_node_lock(dnp)))
+		error = lockerror;
+	nfsm_chain_skip_tag(error, &nmrep);
+	nfsm_chain_get_32(error, &nmrep, numops);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH);
+	nfsmout_if(error);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN);
+	nfs_owner_seqid_increment(noop, NULL, error);
+	nfsm_chain_get_stateid(error, &nmrep, sid);
+	nfsm_chain_check_change_info(error, &nmrep, dnp);
+	nfsm_chain_get_32(error, &nmrep, rflags);
+	bmlen = NFS_ATTR_BITMAP_LEN;
+	nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
+	nfsm_chain_get_32(error, &nmrep, delegation);
+	if (!error)
+		switch (delegation) {
+		case NFS_OPEN_DELEGATE_NONE:
+			break;
+		case NFS_OPEN_DELEGATE_READ:
+			nfsm_chain_get_stateid(error, &nmrep, &dstateid);
+			nfsm_chain_get_32(error, &nmrep, recall);
+			// ACE: (skip) XXX
+			nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
+			nfsm_chain_get_32(error, &nmrep, val); /* string length */
+			nfsm_chain_adv(error, &nmrep, nfsm_rndup(val));
+			break;
+		case NFS_OPEN_DELEGATE_WRITE:
+			nfsm_chain_get_stateid(error, &nmrep, &dstateid);
+			nfsm_chain_get_32(error, &nmrep, recall);
+			// space (skip) XXX
+			nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
+			// ACE: (skip) XXX
+			nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
+			nfsm_chain_get_32(error, &nmrep, val); /* string length */
+			nfsm_chain_adv(error, &nmrep, nfsm_rndup(val));
+			break;
+		default:
+			error = EBADRPC;
+			break;
+		}
+	/* At this point if we have no error, the object was created/opened. */
+	/* if we don't get attributes, then we should lookitup. */
+	open_error = error;
+	nfsmout_if(error);
+	if (create && !exclusive)
+		nfs_vattr_set_supported(bitmap, vap);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+	nfsmout_if(error);
+	NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap);
+	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL);
+	nfsmout_if(error);
+	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
+		printf("nfs: open/create didn't return filehandle?\n");
 		error = EBADRPC;
 		goto nfsmout;
 	}
+	if (!create && np && !NFS_CMPFH(np, fh.fh_data, fh.fh_len)) {
+		// XXX for the open case, what if fh doesn't match the vnode we think we're opening?
+		printf("nfs4_open_rpc: warning: file handle mismatch\n");
+	}
 	/* directory attributes: if we don't get them, make sure to invalidate */
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
 	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, NULL, &xid);
 	if (error)
 		NATTRINVALIDATE(dnp);
+	nfsmout_if(error);
+
+	if (rflags & NFS_OPEN_RESULT_LOCKTYPE_POSIX)
+		nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
+
+	if (rflags & NFS_OPEN_RESULT_CONFIRM) {
+		nfs_node_unlock(dnp);
+		lockerror = ENOENT;
+		nfsm_chain_cleanup(&nmreq);
+		nfsm_chain_cleanup(&nmrep);
+		// PUTFH, OPEN_CONFIRM, GETATTR
+		numops = 3;
+		nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
+		nfsm_chain_add_compound_header(error, &nmreq, "open_confirm", numops);
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+		nfsm_chain_add_fh(error, &nmreq, nfsvers, fh.fh_data, fh.fh_len);
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN_CONFIRM);
+		nfsm_chain_add_stateid(error, &nmreq, sid);
+		nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+		nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
+			NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+		nfsm_chain_build_done(error, &nmreq);
+		nfsm_assert(error, (numops == 0), EPROTO);
+		nfsmout_if(error);
+		error = nfs_request2(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, 0, &nmrep, &xid, &status);
+
+		nfsm_chain_skip_tag(error, &nmrep);
+		nfsm_chain_get_32(error, &nmrep, numops);
+		nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+		nfsmout_if(error);
+		nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN_CONFIRM);
+		nfs_owner_seqid_increment(noop, NULL, error);
+		nfsm_chain_get_stateid(error, &nmrep, sid);
+		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+		nfsmout_if(error);
+		NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap);
+		error = nfs4_parsefattr(&nmrep, NULL, &nvattr, NULL, NULL);
+		nfsmout_if(error);
+		savedxid = xid;
+		if ((lockerror = nfs_node_lock(dnp)))
+			error = lockerror;
+	}
+
+nfsmout:
+	nfsm_chain_cleanup(&nmreq);
+	nfsm_chain_cleanup(&nmrep);
+
+	if (!lockerror && create) {
+		if (!open_error && (dnp->n_flag & NNEGNCENTRIES)) {
+			dnp->n_flag &= ~NNEGNCENTRIES;
+			cache_purge_negatives(dvp);
+		}
+		dnp->n_flag |= NMODIFIED;
+		nfs_node_unlock(dnp);
+		lockerror = ENOENT;
+		nfs_getattr(dnp, &dnvattr, ctx, NGA_CACHED);
+	}
+	if (!lockerror)
+		nfs_node_unlock(dnp);
+	if (!error && create && fh.fh_len) {
+		/* create the vnode with the filehandle and attributes */
+		xid = savedxid;
+		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &newnp);
+		if (!error)
+			newvp = NFSTOV(newnp);
+	}
+	if (!busyerror)
+		nfs_node_clear_busy(dnp);
+	if ((delegation == NFS_OPEN_DELEGATE_READ) || (delegation == NFS_OPEN_DELEGATE_WRITE)) {
+		if (!np)
+			np = newnp;
+		if (!error && np && !recall) {
+			/* stuff the delegation state in the node */
+			lck_mtx_lock(&np->n_openlock);
+			np->n_openflags &= ~N_DELEG_MASK;
+			np->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
+			np->n_dstateid = dstateid;
+			lck_mtx_unlock(&np->n_openlock);
+		}
+		if (recall) {
+			nfs4_delegreturn_rpc(nmp, fh.fh_data, fh.fh_len, &dstateid, thd, cred);
+			if (np) {
+				lck_mtx_lock(&np->n_openlock);
+				np->n_openflags &= ~N_DELEG_MASK;
+				lck_mtx_unlock(&np->n_openlock);
+			}
+		}
+	}
+	if (error) {
+		if (exclusive && (error == NFSERR_NOTSUPP)) {
+			exclusive = 0;
+			goto again;
+		}
+		if (newvp) {
+			nfs_node_unlock(newnp);
+			vnode_put(newvp);
+		}
+	} else if (create) {
+		nfs_node_unlock(newnp);
+		if (exclusive) {
+			error = nfs4_setattr_rpc(newnp, vap, ctx);
+			if (error && (gotuid || gotgid)) {
+				/* it's possible the server didn't like our attempt to set IDs. */
+				/* so, let's try it again without those */
+				VATTR_CLEAR_ACTIVE(vap, va_uid);
+				VATTR_CLEAR_ACTIVE(vap, va_gid);
+				error = nfs4_setattr_rpc(newnp, vap, ctx);
+			}
+		}
+		if (error)
+			vnode_put(newvp);
+		else
+			*vpp = newvp;
+	}
+	nfs_open_owner_clear_busy(noop);
+	return (error);
+}
+
+/*
+ * Send an OPEN RPC to reclaim an open file.
+ */
+int
+nfs4_open_reclaim_rpc(
+	struct nfs_open_file *nofp,
+	int share_access,
+	int share_deny)
+{
+	struct nfsmount *nmp;
+	struct nfs_open_owner *noop = nofp->nof_owner;
+	struct nfs_vattr nvattr;
+	int error = 0, lockerror = ENOENT, status;
+	int nfsvers, numops;
+	u_int64_t xid;
+	nfsnode_t np = nofp->nof_np;
+	struct nfsm_chain nmreq, nmrep;
+	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
+	uint32_t rflags = 0, delegation, recall = 0, val;
+	fhandle_t fh;
+	struct nfs_stateid dstateid;
+
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+	nfsvers = nmp->nm_vers;
+
+	if ((error = nfs_open_owner_set_busy(noop, current_thread())))
+		return (error);
+
+	delegation = NFS_OPEN_DELEGATE_NONE;
+
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
+
+	// PUTFH, OPEN, GETATTR(FH)
+	numops = 3;
+	nfsm_chain_build_alloc_init(error, &nmreq, 48 * NFSX_UNSIGNED);
+	nfsm_chain_add_compound_header(error, &nmreq, "open_reclaim", numops);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN);
+	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
+	nfsm_chain_add_32(error, &nmreq, share_access);
+	nfsm_chain_add_32(error, &nmreq, share_deny);
+	// open owner: clientid + uid
+	nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid); // open_owner4.clientid
+	nfsm_chain_add_32(error, &nmreq, NFSX_UNSIGNED);
+	nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(noop->noo_cred)); // open_owner4.owner
+	// openflag4
+	nfsm_chain_add_32(error, &nmreq, NFS_OPEN_NOCREATE);
+	// open_claim4
+	nfsm_chain_add_32(error, &nmreq, NFS_CLAIM_PREVIOUS);
+	delegation = (np->n_openflags & N_DELEG_READ) ? NFS_OPEN_DELEGATE_READ :
+			(np->n_openflags & N_DELEG_WRITE) ? NFS_OPEN_DELEGATE_WRITE :
+			NFS_OPEN_DELEGATE_NONE;
+	nfsm_chain_add_32(error, &nmreq, delegation);
+	delegation = NFS_OPEN_DELEGATE_NONE;
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
+	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
+	nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
+		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_build_done(error, &nmreq);
+	nfsm_assert(error, (numops == 0), EPROTO);
+	nfsmout_if(error);
+
+	error = nfs_request2(np, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, current_thread(), noop->noo_cred, R_RECOVER, &nmrep, &xid, &status);
+
+	if ((lockerror = nfs_node_lock(np)))
+		error = lockerror;
+	nfsm_chain_skip_tag(error, &nmrep);
+	nfsm_chain_get_32(error, &nmrep, numops);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsmout_if(error);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN);
+	nfs_owner_seqid_increment(noop, NULL, error);
+	nfsm_chain_get_stateid(error, &nmrep, &nofp->nof_stateid);
+	nfsm_chain_check_change_info(error, &nmrep, np);
+	nfsm_chain_get_32(error, &nmrep, rflags);
+	bmlen = NFS_ATTR_BITMAP_LEN;
+	nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
+	nfsm_chain_get_32(error, &nmrep, delegation);
+	if (!error)
+		switch (delegation) {
+		case NFS_OPEN_DELEGATE_NONE:
+			break;
+		case NFS_OPEN_DELEGATE_READ:
+			nfsm_chain_get_stateid(error, &nmrep, &dstateid);
+			nfsm_chain_get_32(error, &nmrep, recall);
+			// ACE: (skip) XXX
+			nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
+			nfsm_chain_get_32(error, &nmrep, val); /* string length */
+			nfsm_chain_adv(error, &nmrep, nfsm_rndup(val));
+			if (!error) {
+				/* stuff the delegation state in the node */
+				lck_mtx_lock(&np->n_openlock);
+				np->n_openflags &= ~N_DELEG_MASK;
+				np->n_openflags |= N_DELEG_READ;
+				np->n_dstateid = dstateid;
+				lck_mtx_unlock(&np->n_openlock);
+			}
+			break;
+		case NFS_OPEN_DELEGATE_WRITE:
+			nfsm_chain_get_stateid(error, &nmrep, &dstateid);
+			nfsm_chain_get_32(error, &nmrep, recall);
+			// space (skip) XXX
+			nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
+			// ACE: (skip) XXX
+			nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
+			nfsm_chain_get_32(error, &nmrep, val); /* string length */
+			nfsm_chain_adv(error, &nmrep, nfsm_rndup(val));
+			if (!error) {
+				/* stuff the delegation state in the node */
+				lck_mtx_lock(&np->n_openlock);
+				np->n_openflags &= ~N_DELEG_MASK;
+				np->n_openflags |= N_DELEG_WRITE;
+				np->n_dstateid = dstateid;
+				lck_mtx_unlock(&np->n_openlock);
+			}
+			break;
+		default:
+			error = EBADRPC;
+			break;
+		}
+	nfsmout_if(error);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+	NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap);
+	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL);
+	nfsmout_if(error);
+	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
+		printf("nfs: open reclaim didn't return filehandle?\n");
+		error = EBADRPC;
+		goto nfsmout;
+	}
+	if (!NFS_CMPFH(np, fh.fh_data, fh.fh_len)) {
+		// XXX what if fh doesn't match the vnode we think we're re-opening?
+		printf("nfs4_open_reclaim_rpc: warning: file handle mismatch\n");
+	}
+	error = nfs_loadattrcache(np, &nvattr, &xid, 1);
+	nfsmout_if(error);
+	if (rflags & NFS_OPEN_RESULT_LOCKTYPE_POSIX)
+		nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
+nfsmout:
+	nfsm_chain_cleanup(&nmreq);
+	nfsm_chain_cleanup(&nmrep);
+	if (!lockerror)
+		nfs_node_unlock(np);
+	nfs_open_owner_clear_busy(noop);
+	if ((delegation == NFS_OPEN_DELEGATE_READ) || (delegation == NFS_OPEN_DELEGATE_WRITE)) {
+		if (recall) {
+			nfs4_delegreturn_rpc(nmp, fh.fh_data, fh.fh_len, &dstateid, current_thread(), noop->noo_cred);
+			lck_mtx_lock(&np->n_openlock);
+			np->n_openflags &= ~N_DELEG_MASK;
+			lck_mtx_unlock(&np->n_openlock);
+		}
+	}
+	return (error);
+}
 
-	if (rflags & NFS_OPEN_RESULT_CONFIRM) {
-		nfsm_chain_cleanup(&nmreq);
-		nfsm_chain_cleanup(&nmrep);
-		// PUTFH, OPEN_CONFIRM, GETATTR
-		numops = 3;
-		nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
-		nfsm_chain_add_compound_header(error, &nmreq, "create_confirm", numops);
-		numops--;
-		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
-		nfsm_chain_add_fh(error, &nmreq, nfsvers, fh.fh_data, fh.fh_len);
-		numops--;
-		nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN_CONFIRM);
-		nfsm_chain_add_32(error, &nmreq, stateid[0]);
-		nfsm_chain_add_32(error, &nmreq, stateid[1]);
-		nfsm_chain_add_32(error, &nmreq, stateid[2]);
-		nfsm_chain_add_32(error, &nmreq, stateid[3]);
-		nfsm_chain_add_32(error, &nmreq, seqid);
-		seqid++;
-		numops--;
-		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-		nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-			NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
-		nfsm_chain_build_done(error, &nmreq);
-		nfsm_assert(error, (numops == 0), EPROTO);
-		nfsmout_if(error);
-		error = nfs_request(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
+int
+nfs4_open_downgrade_rpc(
+	nfsnode_t np,
+	struct nfs_open_file *nofp,
+	vfs_context_t ctx)
+{
+	struct nfs_open_owner *noop = nofp->nof_owner;
+	struct nfsmount *nmp;
+	int error, lockerror = ENOENT, status, nfsvers, numops;
+	struct nfsm_chain nmreq, nmrep;
+	u_int64_t xid;
 
-		nfsm_chain_skip_tag(error, &nmrep);
-		nfsm_chain_get_32(error, &nmrep, numops);
-		nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
-		nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN_CONFIRM);
-		nfsm_chain_get_32(error, &nmrep, stateid[0]);
-		nfsm_chain_get_32(error, &nmrep, stateid[1]);
-		nfsm_chain_get_32(error, &nmrep, stateid[2]);
-		nfsm_chain_get_32(error, &nmrep, stateid[3]);
-		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-		nfsmout_if(error);
-		NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap);
-		error = nfs4_parsefattr(&nmrep, NULL, &nvattr, NULL, NULL);
-		nfsmout_if(error);
-		savedxid = xid;
-	}
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+	nfsvers = nmp->nm_vers;
+
+	if ((error = nfs_open_owner_set_busy(noop, vfs_context_thread(ctx))))
+		return (error);
+
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
+
+	// PUTFH, OPEN_DOWNGRADE, GETATTR
+	numops = 3;
+	nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
+	nfsm_chain_add_compound_header(error, &nmreq, "open_downgrd", numops);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN_DOWNGRADE);
+	nfsm_chain_add_stateid(error, &nmreq, &nofp->nof_stateid);
+	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
+	nfsm_chain_add_32(error, &nmreq, nofp->nof_access);
+	nfsm_chain_add_32(error, &nmreq, nofp->nof_deny);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
+		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_build_done(error, &nmreq);
+	nfsm_assert(error, (numops == 0), EPROTO);
+	nfsmout_if(error);
+	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
+
+	if ((lockerror = nfs_node_lock(np)))
+		error = lockerror;
+	nfsm_chain_skip_tag(error, &nmrep);
+	nfsm_chain_get_32(error, &nmrep, numops);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
 	nfsmout_if(error);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN_DOWNGRADE);
+	nfs_owner_seqid_increment(noop, NULL, error);
+	nfsm_chain_get_stateid(error, &nmrep, &nofp->nof_stateid);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
+nfsmout:
+	if (!lockerror)
+		nfs_node_unlock(np);
+	nfs_open_owner_clear_busy(noop);
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
+	return (error);
+}
 
-	// PUTFH, CLOSE
-	numops = 2;
-	nfsm_chain_build_alloc_init(error, &nmreq, 19 * NFSX_UNSIGNED);
-	nfsm_chain_add_compound_header(error, &nmreq, "create_close", numops);
+int
+nfs4_close_rpc(
+	nfsnode_t np,
+	struct nfs_open_file *nofp,
+	thread_t thd,
+	kauth_cred_t cred,
+	int flag)
+{
+	struct nfs_open_owner *noop = nofp->nof_owner;
+	struct nfsmount *nmp;
+	int error, lockerror = ENOENT, status, nfsvers, numops;
+	struct nfsm_chain nmreq, nmrep;
+	u_int64_t xid;
+
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+	nfsvers = nmp->nm_vers;
+
+	if ((error = nfs_open_owner_set_busy(noop, thd)))
+		return (error);
+
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
+
+	// PUTFH, CLOSE, GETFH
+	numops = 3;
+	nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
+	nfsm_chain_add_compound_header(error, &nmreq, "close", numops);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
-	nfsm_chain_add_fh(error, &nmreq, nfsvers, fh.fh_data, fh.fh_len);
+	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_CLOSE);
-	nfsm_chain_add_32(error, &nmreq, seqid);
-	seqid++;
-	nfsm_chain_add_32(error, &nmreq, stateid[0]);
-	nfsm_chain_add_32(error, &nmreq, stateid[1]);
-	nfsm_chain_add_32(error, &nmreq, stateid[2]);
-	nfsm_chain_add_32(error, &nmreq, stateid[3]);
+	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
+	nfsm_chain_add_stateid(error, &nmreq, &nofp->nof_stateid);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
+		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
-	error = nfs_request(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
+	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, flag, &nmrep, &xid, &status);
 
+	if ((lockerror = nfs_node_lock(np)))
+		error = lockerror;
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsmout_if(error);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_CLOSE);
-	nfsm_chain_get_32(error, &nmrep, stateid[0]);
-	nfsm_chain_get_32(error, &nmrep, stateid[1]);
-	nfsm_chain_get_32(error, &nmrep, stateid[2]);
-	nfsm_chain_get_32(error, &nmrep, stateid[3]);
-	if (error)
-		printf("nfs4_vnop_create: close error %d\n", error);
+	nfs_owner_seqid_increment(noop, NULL, error);
+	nfsm_chain_get_stateid(error, &nmrep, &nofp->nof_stateid);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
+nfsmout:
+	if (!lockerror)
+		nfs_node_unlock(np);
+	nfs_open_owner_clear_busy(noop);
+	nfsm_chain_cleanup(&nmreq);
+	nfsm_chain_cleanup(&nmrep);
+	return (error);
+}
+
+
+int
+nfs4_delegreturn_rpc(struct nfsmount *nmp, u_char *fhp, int fhlen, struct nfs_stateid *sid, thread_t thd, kauth_cred_t cred)
+{
+	int error = 0, status, numops;
+	uint64_t xid;
+	struct nfsm_chain nmreq, nmrep;
+
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
 
+	// PUTFH, DELEGRETURN
+	numops = 2;
+	nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED);
+	nfsm_chain_add_compound_header(error, &nmreq, "delegreturn", numops);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, fhp, fhlen);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_DELEGRETURN);
+	nfsm_chain_add_stateid(error, &nmreq, sid);
+	nfsm_chain_build_done(error, &nmreq);
+	nfsm_assert(error, (numops == 0), EPROTO);
+	nfsmout_if(error);
+	error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, thd, cred, R_RECOVER, &nmrep, &xid, &status);
+	nfsm_chain_skip_tag(error, &nmrep);
+	nfsm_chain_get_32(error, &nmrep, numops);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_DELEGRETURN);
 nfsmout:
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
+	return (error);
+}
 
-	if (!lockerror) {
-		if (!create_error && (dnp->n_flag & NNEGNCENTRIES)) {
-			dnp->n_flag &= ~NNEGNCENTRIES;
-			cache_purge_negatives(dvp);
+
+/*
+ * NFSv4 read call.
+ * Just call nfs_bioread() to do the work.
+ *
+ * Note: the exec code paths have a tendency to call VNOP_READ (and VNOP_MMAP)
+ * without first calling VNOP_OPEN, so we make sure the file is open here.
+ */
+int
+nfs4_vnop_read(
+	struct vnop_read_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_vp;
+		struct uio *a_uio;
+		int a_ioflag;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	vnode_t vp = ap->a_vp;
+	vfs_context_t ctx = ap->a_context;
+	nfsnode_t np;
+	struct nfsmount *nmp;
+	struct nfs_open_owner *noop;
+	struct nfs_open_file *nofp;
+	int error;
+
+	if (vnode_vtype(ap->a_vp) != VREG)
+		return (EPERM);
+
+	np = VTONFS(vp);
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+
+	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
+	if (!noop)
+		return (ENOMEM);
+restart:
+	error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
+	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
+		printf("nfs_vnop_read: LOST\n");
+		error = EIO;
+	}
+	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
+		nfs4_reopen(nofp, vfs_context_thread(ctx));
+		nofp = NULL;
+		goto restart;
+	}
+	if (error) {
+		nfs_open_owner_rele(noop);
+		return (error);
+	}
+	if (!nofp->nof_access) {
+		/* we don't have the file open, so open it for read access */
+		error = nfs_mount_state_in_use_start(nmp);
+		if (error) {
+			nfs_open_owner_rele(noop);
+			return (error);
 		}
-		dnp->n_flag |= NMODIFIED;
-		if (!nfs_getattr(dnp, &dnvattr, ctx, 1)) {
-			if (NFS_CHANGED_NC(nfsvers, dnp, &dnvattr)) {
-				dnp->n_flag &= ~NNEGNCENTRIES;
-				cache_purge(dvp);
-				NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnvattr);
-			}
+		error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
+		if (error)
+			nofp = NULL;
+		if (!error)
+			error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx);
+		if (!error)
+			nofp->nof_flags |= NFS_OPEN_FILE_NEEDCLOSE;
+		if (nofp)
+			nfs_open_file_clear_busy(nofp);
+		if (nfs_mount_state_in_use_end(nmp, error)) {
+			nofp = NULL;
+			goto restart;
 		}
 	}
+	nfs_open_owner_rele(noop);
+	if (error)
+		return (error);
+	return (nfs_bioread(VTONFS(ap->a_vp), ap->a_uio, ap->a_ioflag, ap->a_context));
+}
 
-	if (!error && fh.fh_len) {
-		/* create the vnode with the filehandle and attributes */
-		xid = savedxid;
-		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np);
-		if (!error)
-			newvp = NFSTOV(np);
+/*
+ * Note: the NFSv4 CREATE RPC is for everything EXCEPT regular files.
+ * Files are created using the NFSv4 OPEN RPC.  So we must open the
+ * file to create it and then close it.
+ */
+int
+nfs4_vnop_create(
+	struct vnop_create_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_dvp;
+		vnode_t *a_vpp;
+		struct componentname *a_cnp;
+		struct vnode_attr *a_vap;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	vfs_context_t ctx = ap->a_context;
+	struct componentname *cnp = ap->a_cnp;
+	struct vnode_attr *vap = ap->a_vap;
+	vnode_t dvp = ap->a_dvp;
+	vnode_t *vpp = ap->a_vpp;
+	struct nfsmount *nmp;
+	nfsnode_t np;
+	int error = 0;
+	struct nfs_open_owner *noop = NULL;
+	struct nfs_open_file *nofp = NULL;
+
+	nmp = VTONMP(dvp);
+	if (!nmp)
+		return (ENXIO);
+
+	nfs_avoid_needless_id_setting_on_create(VTONFS(dvp), vap, ctx);
+
+	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
+	if (!noop)
+		return (ENOMEM);
+
+restart:
+	error = nfs_mount_state_in_use_start(nmp);
+	if (error) {
+		nfs_open_owner_rele(noop);
+		return (error);
 	}
 
-	nfs_dulookup_finish(&dul, dnp, ctx);
+	error = nfs_open_file_find(NULL, noop, &nofp, 0, 0, 1);
+	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
+		printf("nfs_vnop_create: LOST\n");
+		error = EIO;
+	}
+	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
+		nfs_mount_state_in_use_end(nmp, 0);
+		nfs4_reopen(nofp, vfs_context_thread(ctx));
+		nofp = NULL;
+		goto restart;
+	}
+	if (!error)
+		error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
+	if (error) {
+		nofp = NULL;
+		goto out;
+	}
 
-	/*
-	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
-	 * if we can succeed in looking up the object.
-	 */
-	if ((create_error == EEXIST) || (!create_error && !newvp)) {
-		error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
-		if (!error) {
-			newvp = NFSTOV(np);
-			if (vnode_vtype(newvp) != VLNK)
-				error = EEXIST;
-		}
+	nofp->nof_opencnt++;
+	nofp->nof_access = NFS_OPEN_SHARE_ACCESS_BOTH;
+	nofp->nof_deny = NFS_OPEN_SHARE_DENY_NONE;
+	nofp->nof_rw++;
+
+	error = nfs4_open_rpc(nofp, ctx, cnp, vap, dvp, vpp, NFS_OPEN_CREATE,
+			NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE);
+	if (!error && !*vpp) {
+		printf("nfs4_open_rpc returned without a node?\n");
+		/* Hmmm... with no node, we have no filehandle and can't close it */
+		error = EIO;
 	}
-	if (!lockerror)
-		nfs_unlock(dnp);
 	if (error) {
-		if (newvp) {
-			nfs_unlock(np);
-			vnode_put(newvp);
+		nofp->nof_rw--;
+		nofp->nof_access = 0;
+		nofp->nof_deny = 0;
+		nofp->nof_opencnt--;
+	}
+	if (*vpp) {
+		nofp->nof_np = np = VTONFS(*vpp);
+		/* insert nofp onto np's open list */
+		TAILQ_INSERT_HEAD(&np->n_opens, nofp, nof_link);
+		if (!error) {
+			nofp->nof_flags |= NFS_OPEN_FILE_CREATE;
+			nofp->nof_creator = current_thread();
 		}
-	} else {
-		nfs_unlock(np);
-		*vpp = newvp;
 	}
+out:
+	if (nofp)
+		nfs_open_file_clear_busy(nofp);
+	if (nfs_mount_state_in_use_end(nmp, error)) {
+		nofp = NULL;
+		goto restart;
+	}
+	if (noop)
+		nfs_open_owner_rele(noop);
 	return (error);
 }
 
+void
+nfs_avoid_needless_id_setting_on_create(nfsnode_t dnp, struct vnode_attr *vap, vfs_context_t ctx)
+{
+	/*
+	 * Don't bother setting UID if it's the same as the credential performing the create.
+	 * Don't bother setting GID if it's the same as the directory or credential.
+	 */
+	if (VATTR_IS_ACTIVE(vap, va_uid)) {
+		if (kauth_cred_getuid(vfs_context_ucred(ctx)) == vap->va_uid)
+			VATTR_CLEAR_ACTIVE(vap, va_uid);
+	}
+	if (VATTR_IS_ACTIVE(vap, va_gid)) {
+		if ((vap->va_gid == dnp->n_vattr.nva_gid) ||
+		    (kauth_cred_getgid(vfs_context_ucred(ctx)) == vap->va_gid))
+			VATTR_CLEAR_ACTIVE(vap, va_gid);
+	}
+}
+
 /*
  * Note: the NFSv4 CREATE RPC is for everything EXCEPT regular files.
  */
-static int
+int
 nfs4_create_rpc(
 	vfs_context_t ctx,
 	nfsnode_t dnp,
@@ -1711,7 +5019,7 @@ nfs4_create_rpc(
 {
 	struct nfsmount *nmp;
 	struct nfs_vattr nvattr, dnvattr;
-	int error = 0, create_error = EIO, lockerror = ENOENT, status;
+	int error = 0, create_error = EIO, lockerror = ENOENT, busyerror = ENOENT, status;
 	int nfsvers, numops;
 	u_int64_t xid, savedxid = 0;
 	nfsnode_t np = NULL;
@@ -1754,7 +5062,10 @@ nfs4_create_rpc(
 		return (EINVAL);
 	}
 
-	nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen);
+	nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
+
+	error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
+	nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
 
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
@@ -1794,9 +5105,6 @@ nfs4_create_rpc(
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
-	if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE)))
-		error = lockerror;
-	nfsmout_if(error);
 
 	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC4_COMPOUND,
 			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
@@ -1805,6 +5113,8 @@ nfs4_create_rpc(
 		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
 	}
 
+	if ((lockerror = nfs_node_lock(dnp)))
+		error = lockerror;
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
@@ -1847,13 +5157,9 @@ nfsmout:
 			cache_purge_negatives(NFSTOV(dnp));
 		}
 		dnp->n_flag |= NMODIFIED;
-		if (!nfs_getattr(dnp, &dnvattr, ctx, 1)) {
-			if (NFS_CHANGED_NC(nfsvers, dnp, &dnvattr)) {
-				dnp->n_flag &= ~NNEGNCENTRIES;
-				cache_purge(NFSTOV(dnp));
-				NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnvattr);
-			}
-		}
+		nfs_node_unlock(dnp);
+		/* nfs_getattr() will check changed and purge caches */
+		nfs_getattr(dnp, &dnvattr, ctx, NGA_CACHED);
 	}
 
 	if (!error && fh.fh_len) {
@@ -1878,15 +5184,15 @@ nfsmout:
 				error = EEXIST;
 		}
 	}
-	if (!lockerror)
-		nfs_unlock(dnp);
+	if (!busyerror)
+		nfs_node_clear_busy(dnp);
 	if (error) {
 		if (newvp) {
-			nfs_unlock(np);
+			nfs_node_unlock(np);
 			vnode_put(newvp);
 		}
 	} else {
-		nfs_unlock(np);
+		nfs_node_unlock(np);
 		*npp = np;
 	}
 	return (error);
@@ -1987,7 +5293,7 @@ nfs4_vnop_link(
 	vnode_t vp = ap->a_vp;
 	vnode_t tdvp = ap->a_tdvp;
 	struct componentname *cnp = ap->a_cnp;
-	int error = 0, status;
+	int error = 0, lockerror = ENOENT, status;
 	struct nfsmount *nmp;
 	nfsnode_t np = VTONFS(vp);
 	nfsnode_t tdnp = VTONFS(tdvp);
@@ -2010,8 +5316,7 @@ nfs4_vnop_link(
 	 */
 	nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), V_IGNORE_WRITEERR);
 
-	error = nfs_lock2(tdnp, np, NFS_NODE_LOCK_EXCLUSIVE);
-	if (error)
+	if ((error = nfs_node_set_busy2(tdnp, np, vfs_context_thread(ctx))))
 		return (error);
 
 	nfsm_chain_null(&nmreq);
@@ -2047,6 +5352,10 @@ nfs4_vnop_link(
 	nfsmout_if(error);
 	error = nfs_request(tdnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
 
+	if ((lockerror = nfs_node_lock2(tdnp, np))) {
+		error = lockerror;
+		goto nfsmout;
+	}
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
@@ -2070,7 +5379,8 @@ nfs4_vnop_link(
 nfsmout:
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
-	tdnp->n_flag |= NMODIFIED;
+	if (!lockerror)
+		tdnp->n_flag |= NMODIFIED;
 	/* Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */
 	if (error == EEXIST)
 		error = 0;
@@ -2078,7 +5388,9 @@ nfsmout:
 		tdnp->n_flag &= ~NNEGNCENTRIES;
 		cache_purge_negatives(tdvp);
 	}
-	nfs_unlock2(tdnp, np);
+	if (!lockerror)
+		nfs_node_unlock2(tdnp, np);
+	nfs_node_clear_busy2(tdnp, np);
 	return (error);
 }
 
@@ -2105,27 +5417,20 @@ nfs4_vnop_rmdir(
 	if (vnode_vtype(vp) != VDIR)
 		return (EINVAL);
 
-	nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen);
-
-	if ((error = nfs_lock2(dnp, np, NFS_NODE_LOCK_EXCLUSIVE)))
+	if ((error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx))))
 		return (error);
 
+	nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
 	nfs_dulookup_start(&dul, dnp, ctx);
 
 	error = nfs4_remove_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
 			vfs_context_thread(ctx), vfs_context_ucred(ctx));
 
-	cache_purge(vp);
-	if (!nfs_getattr(dnp, &dnvattr, ctx, 1)) {
-		if (NFS_CHANGED_NC(NFS_VER4, dnp, &dnvattr)) {
-			dnp->n_flag &= ~NNEGNCENTRIES;
-			cache_purge(dvp);
-			NFS_CHANGED_UPDATE_NC(NFS_VER4, dnp, &dnvattr);
-		}
-	}
-
+	nfs_name_cache_purge(dnp, np, cnp, ctx);
+	/* nfs_getattr() will check changed and purge caches */
+	nfs_getattr(dnp, &dnvattr, ctx, NGA_CACHED);
 	nfs_dulookup_finish(&dul, dnp, ctx);
-	nfs_unlock2(dnp, np);
+	nfs_node_clear_busy2(dnp, np);
 
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.