X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/1c79356b52d46aa6b508fb032f5ae709b1f2897b..55e303ae13a4cf49d70f2294092726f2fffb9ef2:/bsd/nfs/nfs_vnops.c

diff --git a/bsd/nfs/nfs_vnops.c b/bsd/nfs/nfs_vnops.c
index 06f5961e3..882ed59fe 100644
--- a/bsd/nfs/nfs_vnops.c
+++ b/bsd/nfs/nfs_vnops.c
@@ -1,21 +1,24 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
  * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
  * 
  * @APPLE_LICENSE_HEADER_END@
  */
@@ -63,14 +66,12 @@
 /*
  * vnode op calls for Sun NFS version 2 and 3
  */
-
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
-#include <sys/buf.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/conf.h>
@@ -81,7 +82,6 @@
 #include <sys/lockf.h>
 #include <sys/ubc.h>
 
-#include <ufs/ufs/dir.h>
 #include <vfs/vfs_support.h>
 
 #include <sys/vm.h>
@@ -99,6 +99,7 @@
 #include <nfs/nfs.h>
 #include <nfs/nfsnode.h>
 #include <nfs/nfsmount.h>
+#include <nfs/nfs_lock.h>
 #include <nfs/xdr_subs.h>
 #include <nfs/nfsm_subs.h>
 #include <nfs/nqnfs.h>
@@ -106,14 +107,35 @@
 #include <net/if.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
-#include <kern/task.h>
 #include <vm/vm_kern.h>
 
+#include <kern/task.h>
+#include <kern/sched_prim.h>
+
 #include <sys/kdebug.h>
 
+#define FSDBG(A, B, C, D, E) \
+	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \
+		(int)(B), (int)(C), (int)(D), (int)(E), 0)
+#define FSDBG_TOP(A, B, C, D, E) \
+	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \
+		(int)(B), (int)(C), (int)(D), (int)(E), 0)
+#define FSDBG_BOT(A, B, C, D, E) \
+	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \
+		(int)(B), (int)(C), (int)(D), (int)(E), 0)
+
 #define	TRUE	1
 #define	FALSE	0
 
+#define NFS_FREE_PNBUF(CNP) \
+	do { \
+		char *tmp = (CNP)->cn_pnbuf; \
+		(CNP)->cn_pnbuf = NULL; \
+		(CNP)->cn_flags &= ~HASBUF; \
+		FREE_ZONE(tmp, (CNP)->cn_pnlen, M_NAMEI); \
+	} while (0)
+
+
 static int	nfsspec_read __P((struct vop_read_args *));
 static int	nfsspec_write __P((struct vop_write_args *));
 static int	nfsfifo_read __P((struct vop_read_args *));
@@ -144,7 +166,6 @@ static	int	nfs_rmdir __P((struct vop_rmdir_args *));
 static	int	nfs_symlink __P((struct vop_symlink_args *));
 static	int	nfs_readdir __P((struct vop_readdir_args *));
 static	int	nfs_bmap __P((struct vop_bmap_args *));
-static	int	nfs_strategy __P((struct vop_strategy_args *));
 static	int	nfs_lookitup __P((struct vnode *,char *,int,struct ucred *,struct proc *,struct nfsnode **));
 static	int	nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *));
 static int	nfsspec_access __P((struct vop_access_args *));
@@ -153,7 +174,6 @@ static int	nfs_print __P((struct vop_print_args *));
 static int	nfs_pathconf __P((struct vop_pathconf_args *));
 static int	nfs_advlock __P((struct vop_advlock_args *));
 static int	nfs_blkatoff __P((struct vop_blkatoff_args *));
-static int	nfs_bwrite __P((struct vop_bwrite_args *));
 static int	nfs_valloc __P((struct vop_valloc_args *));
 static int	nfs_vfree __P((struct vop_vfree_args *));
 static int	nfs_truncate __P((struct vop_truncate_args *));
@@ -195,13 +215,13 @@ static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
 	{ &vop_symlink_desc, (vop_t *)nfs_symlink },	/* symlink */
 	{ &vop_readdir_desc, (vop_t *)nfs_readdir },	/* readdir */
 	{ &vop_readlink_desc, (vop_t *)nfs_readlink },	/* readlink */
-	{ &vop_abortop_desc, (vop_t *)nfs_abortop },	/* abortop */
+	{ &vop_abortop_desc, (vop_t *)nop_abortop },	/* abortop */
 	{ &vop_inactive_desc, (vop_t *)nfs_inactive },	/* inactive */
 	{ &vop_reclaim_desc, (vop_t *)nfs_reclaim },	/* reclaim */
 	{ &vop_lock_desc, (vop_t *)nfs_lock },		/* lock */
 	{ &vop_unlock_desc, (vop_t *)nfs_unlock },	/* unlock */
 	{ &vop_bmap_desc, (vop_t *)nfs_bmap },		/* bmap */
-	{ &vop_strategy_desc, (vop_t *)nfs_strategy },	/* strategy */
+	{ &vop_strategy_desc, (vop_t *)err_strategy },	/* strategy */
 	{ &vop_print_desc, (vop_t *)nfs_print },	/* print */
 	{ &vop_islocked_desc, (vop_t *)nfs_islocked },	/* islocked */
 	{ &vop_pathconf_desc, (vop_t *)nfs_pathconf },	/* pathconf */
@@ -212,7 +232,7 @@ static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
 	{ &vop_vfree_desc, (vop_t *)nfs_vfree },	/* vfree */
 	{ &vop_truncate_desc, (vop_t *)nfs_truncate },	/* truncate */
 	{ &vop_update_desc, (vop_t *)nfs_update },	/* update */
-	{ &vop_bwrite_desc, (vop_t *)nfs_bwrite },	/* bwrite */
+	{ &vop_bwrite_desc, (vop_t *)err_bwrite },	/* bwrite */
 	{ &vop_pagein_desc, (vop_t *)nfs_pagein },		/* Pagein */
 	{ &vop_pageout_desc, (vop_t *)nfs_pageout },		/* Pageout */
 	{ &vop_copyfile_desc, (vop_t *)err_copyfile },		/* Copyfile */
@@ -324,7 +344,7 @@ static struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = {
 	{ &vop_lock_desc, (vop_t *)nfs_lock },		/* lock */
 	{ &vop_unlock_desc, (vop_t *)nfs_unlock },	/* unlock */
 	{ &vop_bmap_desc, (vop_t *)fifo_bmap },		/* bmap */
-	{ &vop_strategy_desc, (vop_t *)fifo_badop },	/* strategy */
+	{ &vop_strategy_desc, (vop_t *)fifo_strategy },	/* strategy */
 	{ &vop_print_desc, (vop_t *)nfs_print },	/* print */
 	{ &vop_islocked_desc, (vop_t *)nfs_islocked },	/* islocked */
 	{ &vop_pathconf_desc, (vop_t *)fifo_pathconf },	/* pathconf */
@@ -349,8 +369,6 @@ struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
 VNODEOP_SET(fifo_nfsv2nodeop_opv_desc);
 #endif
 
-static int	nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt,
-				struct ucred *cred, struct proc *procp));
 static int	nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp,
 				  struct componentname *cnp,
 				  struct vattr *vap));
@@ -373,6 +391,7 @@ extern nfstype nfsv3_type[9];
 struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
 struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
 int nfs_numasync = 0;
+int nfs_ioddelwri = 0;
 #define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
 
 static int	nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
@@ -383,41 +402,166 @@ static int	nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
 			 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE	\
 			 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
                          
-                         
+
+/* 
+ * the following are needed only by nfs_pageout to know how to handle errors
+ * see nfs_pageout comments on explanation of actions.
+ * the errors here are copied from errno.h and errors returned by servers
+ * are expected to match the same numbers here. If not, our actions maybe
+ * erroneous.
+ */
+enum actiontype {NOACTION, DUMP, DUMPANDLOG, RETRY, RETRYWITHSLEEP, SEVER};
+
+static int errorcount[ELAST+1]; /* better be zeros when initialized */
+
+static const short errortooutcome[ELAST+1] = {
+	NOACTION,
+	DUMP,			/* EPERM	1	Operation not permitted */
+	DUMP,			/* ENOENT	2	No such file or directory */
+	DUMPANDLOG,		/* ESRCH	3	No such process */
+	RETRY,			/* EINTR 	4	Interrupted system call */
+	DUMP,			/* EIO		5	Input/output error */
+	DUMP,			/* ENXIO	6	Device not configured */
+	DUMPANDLOG,		/* E2BIG	7	Argument list too long */
+	DUMPANDLOG,		/* ENOEXEC	8	Exec format error */
+	DUMPANDLOG,		/* EBADF	9	Bad file descriptor */
+	DUMPANDLOG,		/* ECHILD	10	No child processes */
+	DUMPANDLOG,		/* EDEADLK	11	Resource deadlock avoided - was EAGAIN */
+	RETRY,			/* ENOMEM	12	Cannot allocate memory */
+	DUMP,			/* EACCES	13	Permission denied */
+	DUMPANDLOG,		/* EFAULT	14	Bad address */
+	DUMPANDLOG,		/* ENOTBLK	15	POSIX - Block device required */
+	RETRY,			/* EBUSY	16	Device busy */
+	DUMP,			/* EEXIST	17	File exists */
+	DUMP,			/* EXDEV	18	Cross-device link */
+	DUMP,			/* ENODEV	19	Operation not supported by device */
+	DUMP,			/* ENOTDIR	20	Not a directory */
+	DUMP,			/* EISDIR 	21	Is a directory */
+	DUMP,			/* EINVAL	22	Invalid argument */
+	DUMPANDLOG,		/* ENFILE	23	Too many open files in system */
+	DUMPANDLOG,		/* EMFILE	24	Too many open files */
+	DUMPANDLOG,		/* ENOTTY	25	Inappropriate ioctl for device */
+	DUMPANDLOG,		/* ETXTBSY	26	Text file busy - POSIX */
+	DUMP,			/* EFBIG	27	File too large */
+	DUMP,			/* ENOSPC	28	No space left on device */
+	DUMPANDLOG,		/* ESPIPE	29	Illegal seek */
+	DUMP,			/* EROFS	30	Read-only file system */
+	DUMP,			/* EMLINK	31	Too many links */
+	RETRY,			/* EPIPE	32	Broken pipe */
+	/* math software */
+	DUMPANDLOG,		/* EDOM				33	Numerical argument out of domain */
+	DUMPANDLOG,		/* ERANGE			34	Result too large */
+	RETRY,			/* EAGAIN/EWOULDBLOCK	35	Resource temporarily unavailable */
+	DUMPANDLOG,		/* EINPROGRESS		36	Operation now in progress */
+	DUMPANDLOG,		/* EALREADY			37	Operation already in progress */
+	/* ipc/network software -- argument errors */
+	DUMPANDLOG,		/* ENOTSOC			38	Socket operation on non-socket */
+	DUMPANDLOG,		/* EDESTADDRREQ		39	Destination address required */
+	DUMPANDLOG,		/* EMSGSIZE			40	Message too long */
+	DUMPANDLOG,		/* EPROTOTYPE		41	Protocol wrong type for socket */
+	DUMPANDLOG,		/* ENOPROTOOPT		42	Protocol not available */
+	DUMPANDLOG,		/* EPROTONOSUPPORT	43	Protocol not supported */
+	DUMPANDLOG,		/* ESOCKTNOSUPPORT	44	Socket type not supported */
+	DUMPANDLOG,		/* ENOTSUP			45	Operation not supported */
+	DUMPANDLOG,		/* EPFNOSUPPORT		46	Protocol family not supported */
+	DUMPANDLOG,		/* EAFNOSUPPORT		47	Address family not supported by protocol family */
+	DUMPANDLOG,		/* EADDRINUSE		48	Address already in use */
+	DUMPANDLOG,		/* EADDRNOTAVAIL	49	Can't assign requested address */
+	/* ipc/network software -- operational errors */
+	RETRY,			/* ENETDOWN			50	Network is down */
+	RETRY,			/* ENETUNREACH		51	Network is unreachable */
+	RETRY,			/* ENETRESET		52	Network dropped connection on reset */
+	RETRY,			/* ECONNABORTED		53	Software caused connection abort */
+	RETRY,			/* ECONNRESET		54	Connection reset by peer */
+	RETRY,			/* ENOBUFS			55	No buffer space available */
+	RETRY,			/* EISCONN			56	Socket is already connected */
+	RETRY,			/* ENOTCONN			57	Socket is not connected */
+	RETRY,			/* ESHUTDOWN		58	Can't send after socket shutdown */
+	RETRY,			/* ETOOMANYREFS		59	Too many references: can't splice */
+	RETRY,			/* ETIMEDOUT		60	Operation timed out */
+	RETRY,			/* ECONNREFUSED		61	Connection refused */
+
+	DUMPANDLOG,		/* ELOOP			62	Too many levels of symbolic links */
+	DUMP,			/* ENAMETOOLONG		63	File name too long */
+	RETRY,			/* EHOSTDOWN		64	Host is down */ 
+	RETRY,			/* EHOSTUNREACH		65	No route to host */
+	DUMP,			/* ENOTEMPTY		66	Directory not empty */
+	/* quotas & mush */     
+	DUMPANDLOG,		/* PROCLIM			67	Too many processes */
+	DUMPANDLOG,		/* EUSERS			68	Too many users */
+	DUMPANDLOG,		/* EDQUOT			69	Disc quota exceeded */   
+	/* Network File System */
+	DUMP,			/* ESTALE			70	Stale NFS file handle */
+	DUMP,			/* EREMOTE			71	Too many levels of remote in path */
+	DUMPANDLOG,		/* EBADRPC			72	RPC struct is bad */
+	DUMPANDLOG,		/* ERPCMISMATCH		73	RPC version wrong */
+	DUMPANDLOG,		/* EPROGUNAVAIL		74	RPC prog. not avail */
+	DUMPANDLOG,		/* EPROGMISMATCH	75	Program version wrong */
+	DUMPANDLOG,		/* EPROCUNAVAIL		76	Bad procedure for program */
+
+	DUMPANDLOG,		/* ENOLCK			77	No locks available */
+	DUMPANDLOG,		/* ENOSYS			78	Function not implemented */
+	DUMPANDLOG,		/* EFTYPE			79	Inappropriate file type or format */  
+	DUMPANDLOG,		/* EAUTH			80	Authentication error */
+	DUMPANDLOG,		/* ENEEDAUTH		81	Need authenticator */
+	/* Intelligent device errors */
+	DUMPANDLOG,		/* EPWROFF			82	Device power is off */
+	DUMPANDLOG,		/* EDEVERR			83	Device error, e.g. paper out */
+	DUMPANDLOG,		/* EOVERFLOW		84	Value too large to be stored in data type */
+	/* Program loading errors */
+	DUMPANDLOG,		/* EBADEXEC			85	Bad executable */
+	DUMPANDLOG,		/* EBADARCH			86	Bad CPU type in executable */
+	DUMPANDLOG,		/* ESHLIBVERS		87	Shared library version mismatch */
+	DUMPANDLOG,		/* EBADMACHO		88	Malformed Macho file */
+};
+
+
+static short
+nfs_pageouterrorhandler(error)
+	int error;
+{
+	if (error > ELAST) 
+		return(DUMP);
+	else 
+		return(errortooutcome[error]);
+}
 
 static int
 nfs3_access_otw(struct vnode *vp,  
-                int wmode,
-                struct proc *p,
-                struct ucred *cred)  
+		int wmode,
+		struct proc *p,
+		struct ucred *cred)  
 {
-        const int v3 = 1;
-        u_int32_t *tl;
-        int error = 0, attrflag;
-
-        struct mbuf *mreq, *mrep, *md, *mb, *mb2;
-        caddr_t bpos, dpos, cp2;
-        register int32_t t1, t2;
-        register caddr_t cp;
-        u_int32_t rmode;
-        struct nfsnode *np = VTONFS(vp);
-
-        nfsstats.rpccnt[NFSPROC_ACCESS]++;   
-        nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
-        nfsm_fhtom(vp, v3);
-        nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
-        *tl = txdr_unsigned(wmode);
-        nfsm_request(vp, NFSPROC_ACCESS, p, cred);
-        nfsm_postop_attr(vp, attrflag);
-        if (!error) {
-                nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
-                rmode = fxdr_unsigned(u_int32_t, *tl);
-                np->n_mode = rmode;
-                np->n_modeuid = cred->cr_uid;
-                np->n_modestamp = time_second;
-        }
-        nfsm_reqdone;
-        return error;
+	const int v3 = 1;
+	u_long *tl;
+	int error = 0, attrflag;
+
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	caddr_t bpos, dpos, cp2;
+	register long t1, t2;
+	register caddr_t cp;
+	u_int32_t rmode;
+	struct nfsnode *np = VTONFS(vp);
+	u_int64_t xid;
+	struct timeval now;
+
+	nfsstats.rpccnt[NFSPROC_ACCESS]++;   
+	nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
+	nfsm_fhtom(vp, v3);
+	nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+	*tl = txdr_unsigned(wmode);
+	nfsm_request(vp, NFSPROC_ACCESS, p, cred, &xid);
+	nfsm_postop_attr(vp, attrflag, &xid);
+	if (!error) {
+		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		rmode = fxdr_unsigned(u_int32_t, *tl);
+		np->n_mode = rmode;
+		np->n_modeuid = cred->cr_uid;
+		microuptime(&now);
+		np->n_modestamp = now.tv_sec;
+	}
+	nfsm_reqdone;
+	return error;
 }
 
 /*
@@ -436,10 +580,11 @@ nfs_access(ap)
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
-        int error = 0;
-        u_long mode, wmode;
+	int error = 0;
+	u_long mode, wmode;
 	int v3 = NFS_ISV3(vp);
-        struct nfsnode *np = VTONFS(vp);
+	struct nfsnode *np = VTONFS(vp);
+	struct timeval now;
 
 	/*
 	 * For nfs v3, do an access rpc, otherwise you are stuck emulating
@@ -456,63 +601,63 @@ nfs_access(ap)
 			mode = 0;
 		if (vp->v_type == VDIR) {
 			if (ap->a_mode & VWRITE)
-				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
-					 NFSV3ACCESS_DELETE);
+				mode |= NFSV3ACCESS_MODIFY |
+					NFSV3ACCESS_EXTEND | NFSV3ACCESS_DELETE;
 			if (ap->a_mode & VEXEC)
 				mode |= NFSV3ACCESS_LOOKUP;
 		} else {
 			if (ap->a_mode & VWRITE)
-				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
+				mode |= NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND;
 			if (ap->a_mode & VEXEC)
 				mode |= NFSV3ACCESS_EXECUTE;
 		}
-                /* XXX safety belt, only make blanket request if caching */
-                if (nfsaccess_cache_timeout > 0) {
-                        wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
-                                NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
-                                NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
-                } else {
-                        wmode = mode;
-                }
+		/* XXX safety belt, only make blanket request if caching */
+		if (nfsaccess_cache_timeout > 0) {
+			wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
+				NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
+				NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
+		} else
+			wmode = mode;
                 
-                /*
-                 * Does our cached result allow us to give a definite yes to
-                 * this request?
-                 */     
-                if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
-                    (ap->a_cred->cr_uid == np->n_modeuid) &&
-                    ((np->n_mode & mode) == mode)) {
-                        /* nfsstats.accesscache_hits++; */
-                } else {
-                        /*
-                         * Either a no, or a don't know.  Go to the wire.
-                         */
-                       /* nfsstats.accesscache_misses++; */
-                       error = nfs3_access_otw(vp, wmode, ap->a_p,ap->a_cred);
-                       if (!error) {
-                            if ((np->n_mode & mode) != mode)
-				error = EACCES;
-                        }
-                }
+		/*
+		 * Does our cached result allow us to give a definite yes to
+		 * this request?
+		 */     
+		microuptime(&now);
+		if (now.tv_sec < np->n_modestamp + nfsaccess_cache_timeout &&
+		    ap->a_cred->cr_uid == np->n_modeuid &&
+		    (np->n_mode & mode) == mode) {
+			/* nfsstats.accesscache_hits++; */
+		} else {
+			/*
+			 * Either a no, or a don't know.  Go to the wire.
+			 */
+			/* nfsstats.accesscache_misses++; */
+			error = nfs3_access_otw(vp, wmode, ap->a_p,ap->a_cred);
+			if (!error) {
+				if ((np->n_mode & mode) != mode)
+					error = EACCES;
+			}
+		}
 	} else
-		return (nfsspec_access(ap)); /* NFSv2 case checks for EROFS here*/
-	/* CSM - moved EROFS check down per NetBSD rev 1.71.  So you
-	 * get the correct error value with layered filesystems. 
-         * EKN - moved the return(error) below this so it does get called.*/
+		return (nfsspec_access(ap)); /* NFSv2 case checks for EROFS here */
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
+	 * CSM - moved EROFS check down per NetBSD rev 1.71.  So you
+	 * get the correct error value with layered filesystems. 
+	 * EKN - moved the return(error) below this so it does get called.	 
 	 */
 	if (!error && (ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
-		case VREG: case VDIR: case VLNK:
-			error = EROFS;
-                default: 
-                        break;
+			case VREG: case VDIR: case VLNK:
+				error = EROFS;
+			default: 
+				break;
 		}
 	}
-        return (error);
+	return (error);
 }
 
 /*
@@ -523,6 +668,7 @@ nfs_access(ap)
  * if consistency is lost.
  */
 /* ARGSUSED */
+
 static int
 nfs_open(ap)
 	struct vop_open_args /* {
@@ -538,10 +684,9 @@ nfs_open(ap)
 	struct vattr vattr;
 	int error;
 
-	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
-{ printf("open eacces vtyp=%d\n",vp->v_type);
+	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
 		return (EACCES);
-}
+	}
 	/*
 	 * Get a valid lease. If cached data is stale, flush it.
 	 */
@@ -572,14 +717,22 @@ nfs_open(ap)
 			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 			if (error)
 				return (error);
+			/* if directory changed, purge any name cache entries */
+			if ((vp->v_type == VDIR) &&
+			    (np->n_mtime != vattr.va_mtime.tv_sec))
+				cache_purge(vp);
 			np->n_mtime = vattr.va_mtime.tv_sec;
 		} else {
 			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 			if (error)
 				return (error);
 			if (np->n_mtime != vattr.va_mtime.tv_sec) {
-				if (vp->v_type == VDIR)
+				if (vp->v_type == VDIR) {
 					np->n_direofoffset = 0;
+					nfs_invaldir(vp);
+					/* purge name cache entries */
+					cache_purge(vp);
+				}
 				if ((error = nfs_vinvalbuf(vp, V_SAVE,
 					ap->a_cred, ap->a_p, 1)) == EINTR)
 					return (error);
@@ -635,6 +788,7 @@ nfs_close(ap)
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
+	struct nfsmount *nmp;
 	int error = 0;
 
 	if (vp->v_type == VREG) {
@@ -645,14 +799,35 @@ nfs_close(ap)
                 	&sp->s_name[0], (unsigned)(sp->s_dvp), (unsigned)vp,
                 	(unsigned)ap, (unsigned)np, (unsigned)sp);
 #endif
-	    if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
+	    nmp = VFSTONFS(vp->v_mount);
+	    if (!nmp)
+	   	return (ENXIO);
+	    if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 &&
 		(np->n_flag & NMODIFIED)) {
+		int getlock = !VOP_ISLOCKED(vp);
+		if (getlock) {
+			error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
+			if (!error && !VFSTONFS(vp->v_mount)) {
+				VOP_UNLOCK(vp, 0, ap->a_p);
+				error = ENXIO;
+			}
+			if (error)
+				return (error);
+		}
 		if (NFS_ISV3(vp)) {
-		    error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 0);
-		    np->n_flag &= ~NMODIFIED;
-		} else
+		    error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 1);
+                    /*
+                     * We cannot clear the NMODIFIED bit in np->n_flag due to
+                     * potential races with other processes
+		     * NMODIFIED is a hint
+                     */
+		    /* np->n_flag &= ~NMODIFIED; */
+		} else {
 		    error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
+		}
 		np->n_attrstamp = 0;
+		if (getlock)
+			VOP_UNLOCK(vp, 0, ap->a_p);
 	    }
 	    if (np->n_flag & NWRITEERR) {
 		np->n_flag &= ~NWRITEERR;
@@ -682,66 +857,83 @@ nfs_getattr(ap)
 	caddr_t bpos, dpos;
 	int error = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
-	int v3 = NFS_ISV3(vp);
+	int v3;
+	u_int64_t xid;
+	int avoidfloods;
 	
+	FSDBG_TOP(513, np->n_size, np, np->n_vattr.va_size, np->n_flag);
 	/*
 	 * Update local times for special files.
 	 */
 	if (np->n_flag & (NACC | NUPD))
 		np->n_flag |= NCHG;
-
-	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 513)) | DBG_FUNC_START,
-		     (int)np->n_size, 0, (int)np->n_vattr.va_size, np->n_flag, 0);
-
 	/*
 	 * First look in the cache.
 	 */
 	if ((error = nfs_getattrcache(vp, ap->a_vap)) == 0) {
-	        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 513)) | DBG_FUNC_END,
-			     (int)np->n_size, 0, (int)np->n_vattr.va_size, np->n_flag, 0);
-
+		FSDBG_BOT(513, np->n_size, 0, np->n_vattr.va_size, np->n_flag);
 		return (0);
 	}
-	if (error != ENOENT)
+	if (error != ENOENT) {
+		FSDBG_BOT(513, np->n_size, error, np->n_vattr.va_size,
+			  np->n_flag);
 		return (error);
+	}
+
+	if (!VFSTONFS(vp->v_mount)) {
+		FSDBG_BOT(513, np->n_size, ENXIO, np->n_vattr.va_size, np->n_flag);
+		return (ENXIO);
+	}
+	v3 = NFS_ISV3(vp);
 	error = 0;
-        
+
 	if (v3 && nfsaccess_cache_timeout > 0) {
 		/*  nfsstats.accesscache_misses++; */
-		if (error = nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_p, ap->a_cred))
-                    return (error);
+		if (error = nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_p,
+					    ap->a_cred))
+			return (error);
 		if ((error = nfs_getattrcache(vp, ap->a_vap)) == 0)
 			return (0);
 		if (error != ENOENT)
 			return (error);
 		error = 0;
 	}
-
+	avoidfloods = 0;
+tryagain:
 	nfsstats.rpccnt[NFSPROC_GETATTR]++;
 	nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
 	nfsm_fhtom(vp, v3);
-	nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred);
+	nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred, &xid);
 	if (!error) {
-		nfsm_loadattr(vp, ap->a_vap);
+		nfsm_loadattr(vp, ap->a_vap, &xid);
+		if (!xid) { /* out-of-order rpc - attributes were dropped */
+			m_freem(mrep);
+			FSDBG(513, -1, np, np->n_xid << 32, np->n_xid);
+			if (avoidfloods++ < 100)
+				goto tryagain;
+			/*
+			 * avoidfloods>1 is bizarre.  at 100 pull the plug
+			 */
+			panic("nfs_getattr: getattr flood\n");
+		}
 		if (np->n_mtime != ap->a_vap->va_mtime.tv_sec) {
-			NFSTRACE(NFSTRC_GA_INV, vp);
-			if (vp->v_type == VDIR)
+			FSDBG(513, -1, np, -1, vp);
+			if (vp->v_type == VDIR) {
 				nfs_invaldir(vp);
+				/* purge name cache entries */
+				cache_purge(vp);
+			}
 			error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 					      ap->a_p, 1);
+			FSDBG(513, -1, np, -2, error);
 			if (!error) {
-				NFSTRACE(NFSTRC_GA_INV1, vp);
 				np->n_mtime = ap->a_vap->va_mtime.tv_sec;
-			} else {
-				NFSTRACE(NFSTRC_GA_INV2, error);
 			}
 		}
 	}
 	nfsm_reqdone;
 
-	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 513)) | DBG_FUNC_END,
-		     (int)np->n_size, -1, (int)np->n_vattr.va_size, error, 0);
-
+	FSDBG_BOT(513, np->n_size, -1, np->n_vattr.va_size, error);
 	return (error);
 }
 
@@ -767,6 +959,15 @@ nfs_setattr(ap)
 #ifndef nolint
 	tsize = (u_quad_t)0;
 #endif
+
+#ifdef XXX /* enable this code soon! (but test it first) */
+	/*		  
+	 * Setting of flags is not supported.
+	 */
+	if (vap->va_flags != VNOVAL)
+		return (EOPNOTSUPP);
+#endif
+
 	/*
 	 * Disallow write attempts if the filesystem is mounted read-only.
 	 */
@@ -798,72 +999,101 @@ nfs_setattr(ap)
 			 */
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
-			np->n_flag |= NMODIFIED;
- 			tsize = np->n_size;
-			
-			KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 512)) | DBG_FUNC_START,
-				     (int)np->n_size, (int)vap->va_size, (int)np->n_vattr.va_size, np->n_flag, 0);
-
- 			if (vap->va_size == 0)
- 				error = nfs_vinvalbuf(vp, 0,
- 					ap->a_cred, ap->a_p, 1);
- 			else
- 				error = nfs_vinvalbuf(vp, V_SAVE,
- 					ap->a_cred, ap->a_p, 1);
-
-			if (UBCISVALID(vp))
-				ubc_setsize(vp, (off_t)vap->va_size); /* XXX check error */
-
- 			if (error) {
-				printf("nfs_setattr: nfs_vinvalbuf %d\n", error);
-
-#if DIAGNOSTIC
-				kprintf("nfs_setattr: nfs_vinvalbuf %d\n",
-					error);
-#endif /* DIAGNOSTIC */
-				if (UBCISVALID(vp))
-				        ubc_setsize(vp, (off_t)tsize); /* XXX check error */
-
-				KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 512)) | DBG_FUNC_END,
-					     (int)np->n_size, (int)vap->va_size, (int)np->n_vattr.va_size, -1, 0);
-
- 				return (error);
+			FSDBG_TOP(512, np->n_size, vap->va_size,
+				  np->n_vattr.va_size, np->n_flag);
+			if (np->n_flag & NMODIFIED) {
+ 				if (vap->va_size == 0)
+ 					error = nfs_vinvalbuf(vp, 0,
+ 						ap->a_cred, ap->a_p, 1);
+ 				else
+ 					error = nfs_vinvalbuf(vp, V_SAVE,
+ 						ap->a_cred, ap->a_p, 1);
+	 			if (error) {
+					printf("nfs_setattr: nfs_vinvalbuf %d\n", error);
+					FSDBG_BOT(512, np->n_size, vap->va_size,
+						  np->n_vattr.va_size, -1);
+ 					return (error);
+				}
+			} else if (np->n_size > vap->va_size) { /* shrinking? */
+				daddr_t obn, bn;
+				int biosize;
+				struct nfsbuf *bp;
+
+				biosize = vp->v_mount->mnt_stat.f_iosize;
+				obn = (np->n_size - 1) / biosize;
+				bn = vap->va_size / biosize; 
+				for ( ; obn >= bn; obn--)
+				    if (nfs_buf_incore(vp, obn)) {
+					bp = nfs_buf_get(vp, obn, biosize, 0, BLK_READ);
+					if (!bp)
+						continue;
+					if (obn == bn) {
+					    int neweofoff, mustwrite;
+					    mustwrite = 0;
+					    neweofoff = vap->va_size - NBOFF(bp);
+					    /* check for any dirty data before the new EOF */
+					    if (bp->nb_dirtyend && bp->nb_dirtyoff < neweofoff) {
+						/* clip dirty range to EOF */
+						if (bp->nb_dirtyend > neweofoff)
+						    bp->nb_dirtyend = neweofoff;
+						mustwrite++;
+					    }
+					    bp->nb_dirty &= (1 << round_page_32(neweofoff)/PAGE_SIZE) - 1;
+					    if (bp->nb_dirty)
+						mustwrite++;
+					    if (mustwrite) {
+						/* gotta write out dirty data before invalidating */
+						/* (NB_STABLE indicates that data writes should be FILESYNC) */
+						/* (NB_NOCACHE indicates buffer should be discarded) */
+						CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL | NB_ASYNC | NB_READ));
+						SET(bp->nb_flags, NB_STABLE | NB_NOCACHE);
+						/*
+						 * NFS has embedded ucred so crhold() risks zone corruption
+						 */
+						if (bp->nb_wcred == NOCRED)
+						    bp->nb_wcred = crdup(ap->a_cred);
+						error = nfs_buf_write(bp);
+						// Note: bp has been released
+						if (error) {
+						    FSDBG(512, bp, 0xd00dee, 0xbad, error);
+						    np->n_error = error;
+						    np->n_flag |= NWRITEERR;
+						    error = 0;
+						}
+						bp = NULL;
+					    }
+					}
+					if (bp) {
+					    FSDBG(512, bp, bp->nb_flags, 0, obn);
+					    SET(bp->nb_flags, NB_INVAL);
+					    nfs_buf_release(bp);
+					}
+				    }
 			}
+ 			tsize = np->n_size;
 			np->n_size = np->n_vattr.va_size = vap->va_size;
-
+			ubc_setsize(vp, (off_t)vap->va_size); /* XXX error? */
   		};
   	} else if ((vap->va_mtime.tv_sec != VNOVAL ||
-		    vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) &&
-		   vp->v_type == VREG &&
-		   (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
-					  ap->a_p, 1)) == EINTR)
-	        return (error);
-
+		    vap->va_atime.tv_sec != VNOVAL) &&
+		   (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
+		error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
+		if (error == EINTR)
+			return (error);
+	}
 	error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
-
-	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 512)) | DBG_FUNC_END,
-		     (int)np->n_size, (int)vap->va_size, (int)np->n_vattr.va_size, error, 0);
-
+	FSDBG_BOT(512, np->n_size, vap->va_size, np->n_vattr.va_size, error);
 	if (error && vap->va_size != VNOVAL) {
 		/* make every effort to resync file size w/ server... */
 		int err = 0; /* preserve "error" for return */
 
 		printf("nfs_setattr: nfs_setattrrpc %d\n", error);
-#if DIAGNOSTIC
-		kprintf("nfs_setattr: nfs_setattrrpc %d\n", error);
-#endif /* DIAGNOSTIC */
 		np->n_size = np->n_vattr.va_size = tsize;
-		if (UBCISVALID(vp))
-			ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
+		ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
 		vap->va_size = tsize;
 		err = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
-
 		if (err)
 			printf("nfs_setattr1: nfs_setattrrpc %d\n", err);
-#if DIAGNOSTIC
-		if (err)
-			kprintf("nfs_setattr nfs_setattrrpc %d\n", err);
-#endif /* DIAGNOSTIC */
 	}
 	return (error);
 }
@@ -885,7 +1115,13 @@ nfs_setattrrpc(vp, vap, cred, procp)
 	u_long *tl;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
-	int v3 = NFS_ISV3(vp);
+	int v3;
+	u_int64_t xid;
+	struct timeval now;
+
+	if (!VFSTONFS(vp->v_mount))
+		return (ENXIO);
+	v3 = NFS_ISV3(vp);
 
 	nfsstats.rpccnt[NFSPROC_SETATTR]++;
 	nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
@@ -923,8 +1159,9 @@ nfs_setattrrpc(vp, vap, cred, procp)
 			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 			*tl = nfs_false;
 		}
+		microtime(&now);
 		if (vap->va_atime.tv_sec != VNOVAL) {
-			if (vap->va_atime.tv_sec != time.tv_sec) {
+			if (vap->va_atime.tv_sec != now.tv_sec) {
 				nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
 				*tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);
 				txdr_nfsv3time(&vap->va_atime, tl);
@@ -937,7 +1174,7 @@ nfs_setattrrpc(vp, vap, cred, procp)
 			*tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE);
 		}
 		if (vap->va_mtime.tv_sec != VNOVAL) {
-			if (vap->va_mtime.tv_sec != time.tv_sec) {
+			if (vap->va_mtime.tv_sec != now.tv_sec) {
 				nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
 				*tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);
 				txdr_nfsv3time(&vap->va_mtime, tl);
@@ -969,13 +1206,13 @@ nfs_setattrrpc(vp, vap, cred, procp)
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
-	nfsm_request(vp, NFSPROC_SETATTR, procp, cred);
+	nfsm_request(vp, NFSPROC_SETATTR, procp, cred, &xid);
 	if (v3) {
-		nfsm_wcc_data(vp, wccflag);
-        	if ((!wccflag) && (vp->v_type != VBAD)) /* EINVAL set on VBAD vnode */
-                	VTONFS(vp)->n_attrstamp = 0;
+		nfsm_wcc_data(vp, wccflag, &xid);
+		if (!wccflag)
+    			VTONFS(vp)->n_attrstamp = 0;
 	} else
-		nfsm_loadattr(vp, (struct vattr *)0);
+		nfsm_loadattr(vp, (struct vattr *)0, &xid);
 	nfsm_reqdone;
 	return (error);
 }
@@ -1002,7 +1239,6 @@ nfs_lookup(ap)
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
-	struct nfsmount *nmp;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	long len;
@@ -1011,7 +1247,9 @@ nfs_lookup(ap)
 	int lockparent, wantparent, error = 0, attrflag, fhsize;
 	int v3 = NFS_ISV3(dvp);
 	struct proc *p = cnp->cn_proc;
-        int worldbuildworkaround = 1;
+	int unlockdvp = 0;
+	u_int64_t xid;
+	struct vattr vattr;
 
 	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
@@ -1019,110 +1257,80 @@ nfs_lookup(ap)
 	*vpp = NULLVP;
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
+
 	lockparent = flags & LOCKPARENT;
 	wantparent = flags & (LOCKPARENT|WANTPARENT);
-	nmp = VFSTONFS(dvp->v_mount);
 	np = VTONFS(dvp);
-        
-        if (worldbuildworkaround) {
-        /* temporary workaround for world builds to not have dvp go
-            VBAD on during server calls in this routine. When
-            the real ref counting problem is found take this out.
-            Note if this was later and before the nfsm_request
-            set up, the workaround did not work (NOTE other difference
-            was I only put one VREF in that time. Thus it needs
-            to be above the cache_lookup branch or with 2 VREFS. Not
-            sure which. Can't play with world builds right now to see
-            which.  VOP_ACCESS could also make it go to server.  - EKN */
-            VREF(dvp);   /* hang on to this dvp - EKN */
-            VREF(dvp);   /* hang on tight - EKN  */
-        }
+
+	/* if directory has changed, purge any name cache entries */
+	if (!VOP_GETATTR(dvp, &vattr, cnp->cn_cred, p) &&
+	    (np->n_mtime != vattr.va_mtime.tv_sec))
+		cache_purge(dvp);
 
 	if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
-		struct vattr vattr;
 		int vpid;
 
-		if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p))) {
-			*vpp = NULLVP;
-                        if (worldbuildworkaround) {
-                            vrele(dvp);  /* end of hanging on tight to dvp - EKN */
-                            vrele(dvp);  /* end of hanging on tight to dvp - EKN */
-                        }
-			return (error);
-		}
-                
-                /* got to check to make sure the vnode didn't go away if access went to server */
-                if ((*vpp)->v_type == VBAD) {
-                       if (worldbuildworkaround) {
-                            vrele(dvp);  /* end of hanging on tight to dvp - EKN */
-                            vrele(dvp);  /* end of hanging on tight to dvp - EKN */
-                        }
-                        return(EINVAL);
-                }
-
 		newvp = *vpp;
 		vpid = newvp->v_id;
+
 		/*
 		 * See the comment starting `Step through' in ufs/ufs_lookup.c
 		 * for an explanation of the locking protocol
 		 */
+
+		/*
+		 * Note: we need to make sure to get a lock/ref on newvp
+		 * before we possibly go off to the server in VOP_ACCESS.
+		 */
 		if (dvp == newvp) {
 			VREF(newvp);
 			error = 0;
 		} else if (flags & ISDOTDOT) {
 			VOP_UNLOCK(dvp, 0, p);
 			error = vget(newvp, LK_EXCLUSIVE, p);
-			if (!error && lockparent && (flags & ISLASTCN))
+			if (!error)
 				error = vn_lock(dvp, LK_EXCLUSIVE, p);
 		} else {
 			error = vget(newvp, LK_EXCLUSIVE, p);
-			if (!lockparent || error || !(flags & ISLASTCN))
+			if (error)
 				VOP_UNLOCK(dvp, 0, p);
 		}
-		if (!error) {
-			if (vpid == newvp->v_id) {
-			   if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p)
-			    && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
+
+		if (error)
+			goto cache_lookup_out;
+
+		if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p))) {
+			if (dvp == newvp)
+				vrele(newvp);
+			else
+				vput(newvp);
+			*vpp = NULLVP;
+			goto error_return;
+			}
+
+		if ((dvp != newvp) && (!lockparent || !(flags & ISLASTCN)))
+			VOP_UNLOCK(dvp, 0, p);
+
+		if (vpid == newvp->v_id) {
+			if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p)
+				&& vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
 				nfsstats.lookupcache_hits++;
-				if (cnp->cn_nameiop != LOOKUP &&
-				    (flags & ISLASTCN))
+				if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 					cnp->cn_flags |= SAVENAME;
-                                        
-                                if (worldbuildworkaround) {
-                                    vrele(dvp);  /* end of hanging on tight to dvp - EKN */
-                                    vrele(dvp);  /* end of hanging on tight to dvp - EKN */
-                                }
-                                
-				return (0);
-			   }
-			   cache_purge(newvp);
-                        }
-			vput(newvp);
-			if (lockparent && dvp != newvp && (flags & ISLASTCN))
-				VOP_UNLOCK(dvp, 0, p);
+				error = 0; /* ignore any from VOP_GETATTR  */
+				goto error_return;
+			}
+			cache_purge(newvp);
 		}
+		vput(newvp);
+		if ((dvp != newvp) && lockparent && (flags & ISLASTCN))
+			VOP_UNLOCK(dvp, 0, p);
+cache_lookup_out:
 		error = vn_lock(dvp, LK_EXCLUSIVE, p);
 		*vpp = NULLVP;
-		if (error) {
-                        if (worldbuildworkaround) {
-                            vrele(dvp);  /* end of hanging on tight to dvp - EKN */
-                            vrele(dvp);  /* end of hanging on tight to dvp - EKN */
-                        }
-			return (error);
-                }
+		if (error) 
+			goto error_return;
 	}
-        
-	/* 
-         * Got to check to make sure the vnode didn't go away if VOP_GETATTR went to server
-	 * or callers prior to this blocked and had it go VBAD.
-         */
-	if (dvp->v_type == VBAD) {                   
-            if (worldbuildworkaround) {
-                vrele(dvp);  /* end of hanging on tight to dvp - EKN */
-                vrele(dvp);  /* end of hanging on tight to dvp - EKN */
-                }
-            return(EINVAL);
-        }
 
 	error = 0;
 	newvp = NULLVP;
@@ -1133,19 +1341,11 @@ nfs_lookup(ap)
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
-	nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
-        
-        /* this two lines set dvp refcounts back to where they were
-        * before we took extra 2 VREFS to avoid VBAD vnode on dvp
-        * during server calls for world builds. Remove when real
-        * fix is found. - EKN */
-        if (worldbuildworkaround) {
-            vrele(dvp);  /* end of hanging on tight to dvp - EKN */
-            vrele(dvp);  /* end of hanging on tight to dvp - EKN */
-            }
+	/* nfsm_request for NFSv2 causes you to goto to nfsmout upon errors */
+	nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred, &xid); 
 
 	if (error) {
-		nfsm_postop_attr(dvp, attrflag);
+		nfsm_postop_attr(dvp, attrflag, &xid);
 		m_freem(mrep);
 		goto nfsmout;
 	}
@@ -1157,56 +1357,65 @@ nfs_lookup(ap)
 	if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
 		if (NFS_CMPFH(np, fhp, fhsize)) {
 			m_freem(mrep);
-			return (EISDIR);
+			error = EISDIR;
+			goto error_return;
 		}
 		if ((error = nfs_nget(dvp->v_mount, fhp, fhsize, &np))) {
 			m_freem(mrep);
-			return (error);
+			goto error_return;
 		}
 		newvp = NFSTOV(np);
 		if (v3) {
-			nfsm_postop_attr(newvp, attrflag);
-			nfsm_postop_attr(dvp, attrflag);
+			u_int64_t dxid = xid;
+
+			nfsm_postop_attr(newvp, attrflag, &xid);
+			nfsm_postop_attr(dvp, attrflag, &dxid);
 		} else
-			nfsm_loadattr(newvp, (struct vattr *)0);
+			nfsm_loadattr(newvp, (struct vattr *)0, &xid);
 		*vpp = newvp;
 		m_freem(mrep);
 		cnp->cn_flags |= SAVENAME;
 		if (!lockparent)
 			VOP_UNLOCK(dvp, 0, p);
-		return (0);
+		error = 0;
+		goto error_return;
 	}
 
-	if (flags & ISDOTDOT) {
+	if (NFS_CMPFH(np, fhp, fhsize)) {
+		VREF(dvp);
+		newvp = dvp;
+	} else if (flags & ISDOTDOT) {
 		VOP_UNLOCK(dvp, 0, p);
 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
 		if (error) {
+			m_freem(mrep);
 			vn_lock(dvp, LK_EXCLUSIVE + LK_RETRY, p);
-			return (error);
+			goto error_return;
 		}
 		newvp = NFSTOV(np);
-		if (lockparent && (flags & ISLASTCN) &&
-		    (error = vn_lock(dvp, LK_EXCLUSIVE, p))) {
+		if (!lockparent || !(flags & ISLASTCN))
+			unlockdvp = 1; /* keep dvp locked until after postops */
+		if (error = vn_lock(dvp, LK_EXCLUSIVE, p)) {
+			m_freem(mrep);
 		    	vput(newvp);
-			return (error);
+			goto error_return;
 		}
-	} else if (NFS_CMPFH(np, fhp, fhsize)) {
-		VREF(dvp);
-		newvp = dvp;
 	} else {
 		if ((error = nfs_nget(dvp->v_mount, fhp, fhsize, &np))) {
 			m_freem(mrep);
-			return (error);
+			goto error_return;
 		}
 		if (!lockparent || !(flags & ISLASTCN))
-			VOP_UNLOCK(dvp, 0, p);
+			unlockdvp = 1; /* keep dvp locked until after postops */
 		newvp = NFSTOV(np);
 	}
 	if (v3) {
-		nfsm_postop_attr(newvp, attrflag);
-		nfsm_postop_attr(dvp, attrflag);
+		u_int64_t dxid = xid;
+
+		nfsm_postop_attr(newvp, attrflag, &xid);
+		nfsm_postop_attr(dvp, attrflag, &dxid);
 	} else
-		nfsm_loadattr(newvp, (struct vattr *)0);
+		nfsm_loadattr(newvp, (struct vattr *)0, &xid);
 	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 		cnp->cn_flags |= SAVENAME;
 	if ((cnp->cn_flags & MAKEENTRY) &&
@@ -1216,23 +1425,29 @@ nfs_lookup(ap)
 	}
 	*vpp = newvp;
 	nfsm_reqdone;
+	if (unlockdvp)
+		VOP_UNLOCK(dvp, 0, p);
 	if (error) {
 		if (newvp != NULLVP) {
-			vrele(newvp);
+			if (newvp == dvp)
+				vrele(newvp);
+			else
+				vput(newvp);
 			*vpp = NULLVP;
 		}
 		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
 		    (flags & ISLASTCN) && error == ENOENT) {
-			if (!lockparent)
-				VOP_UNLOCK(dvp, 0, p);
-			if (dvp->v_mount->mnt_flag & MNT_RDONLY)
+			if (dvp->v_mount && (dvp->v_mount->mnt_flag & MNT_RDONLY))
 				error = EROFS;
 			else
 				error = EJUSTRETURN;
+			if (!lockparent)
+				VOP_UNLOCK(dvp, 0, p);
 		}
 		if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 			cnp->cn_flags |= SAVENAME;
 	}
+error_return:
 	return (error);
 }
 
@@ -1256,6 +1471,7 @@ nfs_read(ap)
 	return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred, 0));
 }
 
+
 /*
  * nfs readlink call
  */
@@ -1290,18 +1506,23 @@ nfs_readlinkrpc(vp, uiop, cred)
 	caddr_t bpos, dpos, cp2;
 	int error = 0, len, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
-	int v3 = NFS_ISV3(vp);
+	int v3;
+	u_int64_t xid;
+
+	if (!VFSTONFS(vp->v_mount))
+		return (ENXIO);
+	v3 = NFS_ISV3(vp);
 
 	nfsstats.rpccnt[NFSPROC_READLINK]++;
 	nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
 	nfsm_fhtom(vp, v3);
-	nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred);
+	nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred, &xid);
 	if (v3)
-		nfsm_postop_attr(vp, attrflag);
+		nfsm_postop_attr(vp, attrflag, &xid);
 	if (!error) {
 		nfsm_strsiz(len, NFS_MAXPATHLEN);
-               if (len == NFS_MAXPATHLEN) {
-                        struct nfsnode *np = VTONFS(vp);
+		if (len == NFS_MAXPATHLEN) {
+			struct nfsnode *np = VTONFS(vp);
 #if DIAGNOSTIC
                         if (!np)
                                 panic("nfs_readlinkrpc: null np");
@@ -1331,19 +1552,25 @@ nfs_readrpc(vp, uiop, cred)
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct nfsmount *nmp;
-	int error = 0, len, retlen, tsiz, eof, attrflag;
-	int v3 = NFS_ISV3(vp);
+	int error = 0, len, retlen, tsiz, eof = 0, attrflag;
+	int v3, nmrsize;
+	u_int64_t xid;
 
-#ifndef nolint
-	eof = 0;
-#endif
+	FSDBG_TOP(536, vp, uiop->uio_offset, uiop->uio_resid, 0);
 	nmp = VFSTONFS(vp->v_mount);
+	if (!nmp)
+		return (ENXIO);
+	v3 = NFS_ISV3(vp);
+	nmrsize = nmp->nm_rsize;
+
 	tsiz = uiop->uio_resid;
-        if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) && !v3)
+        if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) && !v3) {
+		FSDBG_BOT(536, vp, uiop->uio_offset, uiop->uio_resid, EFBIG);
 		return (EFBIG);
+	}
 	while (tsiz > 0) {
 		nfsstats.rpccnt[NFSPROC_READ]++;
-		len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
+		len = (tsiz > nmrsize) ? nmrsize : tsiz;
 		nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
 		nfsm_fhtom(vp, v3);
 		nfsm_build(tl, u_long *, NFSX_UNSIGNED * 3);
@@ -1355,9 +1582,10 @@ nfs_readrpc(vp, uiop, cred)
 			*tl++ = txdr_unsigned(len);
 			*tl = 0;
 		}
-		nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred);
+		FSDBG(536, vp, uiop->uio_offset, len, 0);
+		nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred, &xid);
 		if (v3) {
-			nfsm_postop_attr(vp, attrflag);
+			nfsm_postop_attr(vp, attrflag, &xid);
 			if (error) {
 				m_freem(mrep);
 				goto nfsmout;
@@ -1365,8 +1593,8 @@ nfs_readrpc(vp, uiop, cred)
 			nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
 			eof = fxdr_unsigned(int, *(tl + 1));
 		} else
-			nfsm_loadattr(vp, (struct vattr *)0);
-		nfsm_strsiz(retlen, nmp->nm_rsize);
+			nfsm_loadattr(vp, (struct vattr *)0, &xid);
+		nfsm_strsiz(retlen, nmrsize);
 		nfsm_mtouio(uiop, retlen);
 		m_freem(mrep);
 		tsiz -= retlen;
@@ -1377,6 +1605,7 @@ nfs_readrpc(vp, uiop, cred)
 			tsiz = 0;
 	}
 nfsmout:
+	FSDBG_BOT(536, vp, eof, uiop->uio_resid, error);
 	return (error);
 }
 
@@ -1395,19 +1624,32 @@ nfs_writerpc(vp, uiop, cred, iomode, must_commit)
 	register int t1, t2, backup;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
-	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+	struct nfsmount *nmp;
 	int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
-	int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
+	int v3, committed = NFSV3WRITE_FILESYNC;
+	u_int64_t xid;
 
 #if DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1)
 		panic("nfs_writerpc: iovcnt > 1");
 #endif
+	FSDBG_TOP(537, vp, uiop->uio_offset, uiop->uio_resid, *iomode);
+	nmp = VFSTONFS(vp->v_mount);
+	if (!nmp)
+		return (ENXIO);
+	v3 = NFS_ISV3(vp);
 	*must_commit = 0;
 	tsiz = uiop->uio_resid;
-        if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) && !v3)
+        if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) && !v3) {
+		FSDBG_BOT(537, vp, uiop->uio_offset, uiop->uio_resid, EFBIG);
 		return (EFBIG);
+	}
 	while (tsiz > 0) {
+		nmp = VFSTONFS(vp->v_mount);
+		if (!nmp) {
+			error = ENXIO;
+			break;
+		}
 		nfsstats.rpccnt[NFSPROC_WRITE]++;
 		len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
 		nfsm_reqhead(vp, NFSPROC_WRITE,
@@ -1425,11 +1667,15 @@ nfs_writerpc(vp, uiop, cred, iomode, must_commit)
 			tl += 2;
 		}
 		*tl = txdr_unsigned(len);
+		FSDBG(537, vp, uiop->uio_offset, len, 0);
 		nfsm_uiotom(uiop, len);
-		nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred);
+		nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred, &xid);
+		nmp = VFSTONFS(vp->v_mount);
+		if (!nmp)
+			error = ENXIO;
 		if (v3) {
 			wccflag = NFSV3_WCCCHK;
-			nfsm_wcc_data(vp, wccflag);
+			nfsm_wcc_data(vp, wccflag, &xid);
 			if (!error) {
 				nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED +
 					NFSX_V3WRITEVERF);
@@ -1456,10 +1702,10 @@ nfs_writerpc(vp, uiop, cred, iomode, must_commit)
 				else if (committed == NFSV3WRITE_DATASYNC &&
 					commit == NFSV3WRITE_UNSTABLE)
 					committed = commit;
-				if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0) {
+				if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 					NFSX_V3WRITEVERF);
-				    nmp->nm_flag |= NFSMNT_HASWRITEVERF;
+				    nmp->nm_state |= NFSSTA_HASWRITEVERF;
 				} else if (bcmp((caddr_t)tl,
 				    (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
 				    *must_commit = 1;
@@ -1468,27 +1714,29 @@ nfs_writerpc(vp, uiop, cred, iomode, must_commit)
 				}
 			}
 		} else
-		    nfsm_loadattr(vp, (struct vattr *)0);
-		if ((wccflag) && (vp->v_type != VBAD))  /* EINVAL set on VBAD vnode */
+		    nfsm_loadattr(vp, (struct vattr *)0, &xid);
+
+		if (wccflag)
 		    VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec;
 		m_freem(mrep);
-                /*
-                 * we seem to have a case where we end up looping on shutdown and taking down nfs servers.
-                 * For V3, error cases, there is no way to terminate loop, if the len was 0, meaning,
-                 * nmp->nm_wsize was trashed. FreeBSD has this fix in it. Let's try it.
-                 */
-                if (error)
-                    break;
-                tsiz -= len;
+		/*
+		 * we seem to have a case where we end up looping on shutdown
+		 * and taking down nfs servers.  For V3, error cases, there is
+		 * no way to terminate loop, if the len was 0, meaning,
+		 * nmp->nm_wsize was trashed. FreeBSD has this fix in it.
+		 * Let's try it.
+		 */
+		if (error)
+			break;
+		tsiz -= len;
 	}
 nfsmout:
-        /* does it make sense to even say it was committed if we had an error? EKN */
-        /* okay well just don't on bad vnodes then. EINVAL will be returned on bad vnodes */
-        if ((vp->v_type != VBAD) && (vp->v_mount->mnt_flag & MNT_ASYNC))
+        if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_ASYNC))
 		committed = NFSV3WRITE_FILESYNC;
         *iomode = committed;
 	if (error)
 		uiop->uio_resid = tsiz;
+	FSDBG_BOT(537, vp, committed, uiop->uio_resid, error);
 	return (error);
 }
 
@@ -1517,6 +1765,7 @@ nfs_mknodrpc(dvp, vpp, cnp, vap)
 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	u_long rdev;
+	u_int64_t xid;
 	int v3 = NFS_ISV3(dvp);
 
 	if (vap->va_type == VCHR || vap->va_type == VBLK)
@@ -1557,9 +1806,9 @@ nfs_mknodrpc(dvp, vpp, cnp, vap)
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
-	nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred);
+	nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred, &xid);
 	if (!error) {
-		nfsm_mtofh(dvp, newvp, v3, gotvp);
+		nfsm_mtofh(dvp, newvp, v3, gotvp, &xid);
 		if (!gotvp) {
 			if (newvp) {
 				vput(newvp);
@@ -1572,7 +1821,7 @@ nfs_mknodrpc(dvp, vpp, cnp, vap)
 		}
 	}
 	if (v3)
-		nfsm_wcc_data(dvp, wccflag);
+		nfsm_wcc_data(dvp, wccflag, &xid);
 	nfsm_reqdone;
 	if (error) {
 		if (newvp)
@@ -1582,13 +1831,11 @@ nfs_mknodrpc(dvp, vpp, cnp, vap)
 			cache_enter(dvp, newvp, cnp);
 		*vpp = newvp;
 	}
-	FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
-        if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
-            VTONFS(dvp)->n_flag |= NMODIFIED;
-            if (!wccflag)
+	VTONFS(dvp)->n_flag |= NMODIFIED;
+	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
-            }
 	vput(dvp);
+	NFS_FREE_PNBUF(cnp);
 	return (error);
 }
 
@@ -1610,8 +1857,9 @@ nfs_mknod(ap)
 	int error;
 
 	error = nfs_mknodrpc(ap->a_dvp, &newvp, ap->a_cnp, ap->a_vap);
-	if (!error)
+	if (!error && newvp)
 		vput(newvp);
+	*ap->a_vpp = 0;
 	return (error);
 }
 
@@ -1643,6 +1891,7 @@ nfs_create(ap)
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vattr vattr;
 	int v3 = NFS_ISV3(dvp);
+	u_int64_t xid;
 
 	/*
 	 * Oops, not for me..
@@ -1688,9 +1937,9 @@ again:
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
-	nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
+	nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred, &xid);
 	if (!error) {
-		nfsm_mtofh(dvp, newvp, v3, gotvp);
+		nfsm_mtofh(dvp, newvp, v3, gotvp, &xid);
 		if (!gotvp) {
 			if (newvp) {
 				vput(newvp);
@@ -1703,7 +1952,7 @@ again:
 		}
 	}
 	if (v3)
-		nfsm_wcc_data(dvp, wccflag);
+		nfsm_wcc_data(dvp, wccflag, &xid);
 	nfsm_reqdone;
 	if (error) {
 		if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
@@ -1719,13 +1968,11 @@ again:
 			cache_enter(dvp, newvp, cnp);
 		*ap->a_vpp = newvp;
 	}
-	FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
-        if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
-            VTONFS(dvp)->n_flag |= NMODIFIED;
-            if (!wccflag)
-                    VTONFS(dvp)->n_attrstamp = 0;
-        }
+	VTONFS(dvp)->n_flag |= NMODIFIED;
+	if (!wccflag)
+		VTONFS(dvp)->n_attrstamp = 0;
 	vput(dvp);
+	NFS_FREE_PNBUF(cnp);
 	return (error);
 }
 
@@ -1753,9 +2000,8 @@ nfs_remove(ap)
 	register struct vnode *dvp = ap->a_dvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsnode *np = VTONFS(vp);
-	int error = 0;
+	int error = 0, gofree = 0;
 	struct vattr vattr;
-	int file_deleted = 0;
 
 #if DIAGNOSTIC
 	if ((cnp->cn_flags & HASBUF) == 0)
@@ -1763,11 +2009,33 @@ nfs_remove(ap)
 	if (vp->v_usecount < 1)
 		panic("nfs_remove: bad v_usecount");
 #endif
-	if (vp->v_usecount == 1 || 
-		(UBCISVALID(vp)&&(vp->v_usecount==2)) || 
-		(np->n_sillyrename &&
-	    VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 &&
-	    vattr.va_nlink > 1)) {
+
+	if (UBCISVALID(vp)) {
+		/* regular files */
+		if (UBCINFOEXISTS(vp))
+			gofree = (ubc_isinuse(vp, 1)) ? 0 : 1;
+		else {
+			/* dead or dying vnode.With vnode locking panic instead of error */
+			vput(dvp);
+			vput(vp);
+			NFS_FREE_PNBUF(cnp);
+			return (EIO);
+		}
+	} else {
+		/* UBC not in play */
+		if (vp->v_usecount == 1)
+			gofree = 1;
+	}
+	if ((ap->a_cnp->cn_flags & NODELETEBUSY) && !gofree) {
+		/* Caller requested Carbon delete semantics, but file is busy */
+		vput(dvp);
+		vput(vp);
+		NFS_FREE_PNBUF(cnp);
+		return (EBUSY);
+	}
+	if (gofree || (np->n_sillyrename &&
+		VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 &&
+		vattr.va_nlink > 1)) {
 		/*
 		 * Purge the name cache so that the chance of a lookup for
 		 * the name succeeding while the remove is in progress is
@@ -1781,7 +2049,8 @@ nfs_remove(ap)
 		 * unnecessary delayed writes later.
 		 */
 		error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
-		ubc_setsize(vp, (off_t)0);
+		np->n_size = 0;
+		ubc_setsize(vp, (off_t)0); /* XXX check error */
 		/* Do the rpc */
 		if (error != EINTR)
 			error = nfs_removerpc(dvp, cnp->cn_nameptr,
@@ -1794,26 +2063,25 @@ nfs_remove(ap)
 		 */
 		if (error == ENOENT)
 			error = 0;
-		file_deleted = 1;
+		if (!error) {
+			/*
+			 * remove nfsnode from hash now so we can't accidentally find it
+			 * again if another object gets created with the same filehandle
+			 * before this vnode gets reclaimed
+			 */
+			LIST_REMOVE(np, n_hash);
+			np->n_flag &= ~NHASHED;
+		}
 	} else if (!np->n_sillyrename) {
 		error = nfs_sillyrename(dvp, vp, cnp);
 	}
-
-	FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
 	np->n_attrstamp = 0;
 	vput(dvp);
 
-
-	if (vp == dvp)
-		vrele(vp);
-	else
-		vput(vp);
-
-	if (file_deleted && UBCINFOEXISTS(vp)) {
-		(void) ubc_uncache(vp); 
-		ubc_release(vp);
-		/* WARNING vp may not be valid after this */
-	}
+	VOP_UNLOCK(vp, 0, cnp->cn_proc);
+	NFS_FREE_PNBUF(cnp);
+	ubc_uncache(vp);
+	vrele(vp);
 
 	return (error);
 }
@@ -1847,22 +2115,25 @@ nfs_removerpc(dvp, name, namelen, cred, proc)
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
-	int v3 = NFS_ISV3(dvp);
+	int v3;
+	u_int64_t xid;
+
+	if (!VFSTONFS(dvp->v_mount))
+		return (ENXIO);
+	v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_REMOVE]++;
 	nfsm_reqhead(dvp, NFSPROC_REMOVE,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
-	nfsm_request(dvp, NFSPROC_REMOVE, proc, cred);
+	nfsm_request(dvp, NFSPROC_REMOVE, proc, cred, &xid);
 	if (v3)
-		nfsm_wcc_data(dvp, wccflag);
+		nfsm_wcc_data(dvp, wccflag, &xid);
 	nfsm_reqdone;
-        if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
-            VTONFS(dvp)->n_flag |= NMODIFIED;
-            if (!wccflag)
+	VTONFS(dvp)->n_flag |= NMODIFIED;
+	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
-        }
 	return (error);
 }
 
@@ -1886,7 +2157,7 @@ nfs_rename(ap)
 	register struct vnode *tdvp = ap->a_tdvp;
 	register struct componentname *tcnp = ap->a_tcnp;
 	register struct componentname *fcnp = ap->a_fcnp;
-	int error;
+	int error, purged=0, inuse=0;
 
 #if DIAGNOSTIC
 	if ((tcnp->cn_flags & HASBUF) == 0 ||
@@ -1897,6 +2168,8 @@ nfs_rename(ap)
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
+		if (tvp)
+			VOP_UNLOCK(tvp, 0, tcnp->cn_proc);
 		goto out;
 	}
 
@@ -1904,30 +2177,78 @@ nfs_rename(ap)
 	 * If the tvp exists and is in use, sillyrename it before doing the
 	 * rename of the new file over it.
 	 * XXX Can't sillyrename a directory.
+	 * Don't sillyrename if source and target are same vnode (hard
+	 * links or case-variants)
 	 */
-	if (tvp && (tvp->v_usecount>(UBCISVALID(tvp) ? 2 : 1)) &&
-                !VTONFS(tvp)->n_sillyrename &&
-		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
-		vput(tvp);
-		tvp = NULL;
+	if (tvp && tvp != fvp) {
+		if (UBCISVALID(tvp)) {
+			/* regular files */
+			if (UBCINFOEXISTS(tvp))
+				inuse = (ubc_isinuse(tvp, 1)) ? 1 : 0;
+			else {
+				/* dead or dying vnode.With vnode locking panic instead of error */
+				error = EIO;
+				VOP_UNLOCK(tvp, 0, tcnp->cn_proc);
+				goto out;  
+			}
+		} else {
+			/* UBC not in play */
+			if (tvp->v_usecount > 1)
+				inuse = 1;
+		}
+	}
+	if (inuse && !VTONFS(tvp)->n_sillyrename && tvp->v_type != VDIR) {
+		if  (error = nfs_sillyrename(tdvp, tvp, tcnp)) {
+			/* sillyrename failed. Instead of pressing on, return error */
+			VOP_UNLOCK(tvp, 0, tcnp->cn_proc);
+			goto out; /* should not be ENOENT. */
+		} else {
+			/* sillyrename succeeded.*/
+			VOP_UNLOCK(tvp, 0, tcnp->cn_proc);
+			ubc_uncache(tvp); /* get the nfs turd file to disappear */
+			vrele(tvp);
+			tvp = NULL;
+		}
 	}
 
 	error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
 		tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
 		tcnp->cn_proc);
 
+	if (!error && tvp && tvp != fvp && !VTONFS(tvp)->n_sillyrename) {
+		/*
+		 * remove nfsnode from hash now so we can't accidentally find it
+		 * again if another object gets created with the same filehandle
+		 * before this vnode gets reclaimed
+		 */
+		LIST_REMOVE(VTONFS(tvp), n_hash);
+		VTONFS(tvp)->n_flag &= ~NHASHED;
+	}
+
 	if (fvp->v_type == VDIR) {
-		if (tvp != NULL && tvp->v_type == VDIR)
+		if (tvp != NULL && tvp->v_type == VDIR) {
 			cache_purge(tdvp);
+			if (tvp == tdvp) 
+				purged = 1;
+		}
 		cache_purge(fdvp);
 	}
+	
+	cache_purge(fvp);
+	if (tvp) {
+		if (!purged)
+			cache_purge(tvp);
+		VOP_UNLOCK(tvp, 0, tcnp->cn_proc);
+		ubc_uncache(tvp); /* get the nfs turd file to disappear */
+	}
+	
 out:
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp)
-		vput(tvp);
+		vrele(tvp); /* already unlocked */
 	vrele(fdvp);
 	vrele(fvp);
 	/*
@@ -1971,32 +2292,35 @@ nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc)
 	caddr_t bpos, dpos, cp2;
 	int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
-	int v3 = NFS_ISV3(fdvp);
+	int v3;
+	u_int64_t xid;
+
+	if (!VFSTONFS(fdvp->v_mount))
+		return (ENXIO);
+	v3 = NFS_ISV3(fdvp);
 
 	nfsstats.rpccnt[NFSPROC_RENAME]++;
 	nfsm_reqhead(fdvp, NFSPROC_RENAME,
-		(NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
-		nfsm_rndup(tnamelen));
+		     (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
+		      nfsm_rndup(tnamelen));
 	nfsm_fhtom(fdvp, v3);
 	nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
 	nfsm_fhtom(tdvp, v3);
 	nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
-	nfsm_request(fdvp, NFSPROC_RENAME, proc, cred);
+	nfsm_request(fdvp, NFSPROC_RENAME, proc, cred, &xid);
 	if (v3) {
-		nfsm_wcc_data(fdvp, fwccflag);
-		nfsm_wcc_data(tdvp, twccflag);
+		u_int64_t txid = xid;
+
+		nfsm_wcc_data(fdvp, fwccflag, &xid);
+		nfsm_wcc_data(tdvp, twccflag, &txid);
 	}
 	nfsm_reqdone;
-        if (fdvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
-            VTONFS(fdvp)->n_flag |= NMODIFIED;
-            if (!fwccflag)
+	VTONFS(fdvp)->n_flag |= NMODIFIED;
+	if (!fwccflag)
 		VTONFS(fdvp)->n_attrstamp = 0;
-        }
-        if (tdvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
-            VTONFS(tdvp)->n_flag |= NMODIFIED;
-            if (!twccflag)
-                    VTONFS(tdvp)->n_attrstamp = 0;
-        }
+	VTONFS(tdvp)->n_flag |= NMODIFIED;
+	if (!twccflag)
+		VTONFS(tdvp)->n_attrstamp = 0;
 	return (error);
 }
 
@@ -2020,23 +2344,38 @@ nfs_link(ap)
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
-	int v3 = NFS_ISV3(vp);
+	int v3, didhold;
+	u_int64_t xid;
 
 	if (vp->v_mount != tdvp->v_mount) {
 		VOP_ABORTOP(vp, cnp);
-		if (tdvp == vp)
-			vrele(tdvp);
-		else
-			vput(tdvp);
+		vput(tdvp);
 		return (EXDEV);
 	}
 
+	/* need to get vnode lock for vp before calling VOP_FSYNC() */
+	if (error = vn_lock(vp, LK_EXCLUSIVE, cnp->cn_proc)) {
+		VOP_ABORTOP(vp, cnp);
+		vput(tdvp);
+		return (error);
+	}
+
+	if (!VFSTONFS(vp->v_mount)) {
+		VOP_UNLOCK(vp, 0, cnp->cn_proc);
+		VOP_ABORTOP(vp, cnp);
+		vput(tdvp);
+		return (ENXIO);
+	}
+	v3 = NFS_ISV3(vp);
+
 	/*
 	 * Push all writes to the server, so that the attribute cache
 	 * doesn't get "out of sync" with the server.
 	 * XXX There should be a better way!
 	 */
+	didhold = ubc_hold(vp);
 	VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc);
+	VOP_UNLOCK(vp, 0, cnp->cn_proc);
 
 	nfsstats.rpccnt[NFSPROC_LINK]++;
 	nfsm_reqhead(vp, NFSPROC_LINK,
@@ -2044,20 +2383,24 @@ nfs_link(ap)
 	nfsm_fhtom(vp, v3);
 	nfsm_fhtom(tdvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
-	nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred);
+	nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred, &xid);
 	if (v3) {
-		nfsm_postop_attr(vp, attrflag);
-		nfsm_wcc_data(tdvp, wccflag);
+		u_int64_t txid = xid;
+
+		nfsm_postop_attr(vp, attrflag, &xid);
+		nfsm_wcc_data(tdvp, wccflag, &txid);
 	}
 	nfsm_reqdone;
-	FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
 
 	VTONFS(tdvp)->n_flag |= NMODIFIED;
-	if ((!attrflag) && (vp->v_type != VBAD))  /* EINVAL set on VBAD vnode */
+	if (!attrflag)
 		VTONFS(vp)->n_attrstamp = 0;
-	if ((!wccflag) && (tdvp->v_type != VBAD))  /* EINVAL set on VBAD vnode */
+	if (!wccflag)
 		VTONFS(tdvp)->n_attrstamp = 0;
+	if (didhold)
+		ubc_rele(vp);
 	vput(tdvp);
+	NFS_FREE_PNBUF(cnp);
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
 	 */
@@ -2092,6 +2435,7 @@ nfs_symlink(ap)
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vnode *newvp = (struct vnode *)0;
 	int v3 = NFS_ISV3(dvp);
+	u_int64_t xid;
 
 	nfsstats.rpccnt[NFSPROC_SYMLINK]++;
 	slen = strlen(ap->a_target);
@@ -2114,22 +2458,23 @@ nfs_symlink(ap)
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
-	nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred);
+	nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred, &xid);
 	if (v3) {
+		u_int64_t dxid = xid;
+
 		if (!error)
-			nfsm_mtofh(dvp, newvp, v3, gotvp);
-		nfsm_wcc_data(dvp, wccflag);
+			nfsm_mtofh(dvp, newvp, v3, gotvp, &xid);
+		nfsm_wcc_data(dvp, wccflag, &dxid);
 	}
 	nfsm_reqdone;
 	if (newvp)
 		vput(newvp);
-	FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
-        if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
-            VTONFS(dvp)->n_flag |= NMODIFIED;
-            if (!wccflag)
+
+	VTONFS(dvp)->n_flag |= NMODIFIED;
+	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
-        }
 	vput(dvp);
+	NFS_FREE_PNBUF(cnp);
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
 	 */
@@ -2167,6 +2512,7 @@ nfs_mkdir(ap)
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vattr vattr;
 	int v3 = NFS_ISV3(dvp);
+	u_int64_t xid, dxid;
 
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc))) {
 		VOP_ABORTOP(dvp, cnp);
@@ -2191,28 +2537,27 @@ nfs_mkdir(ap)
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
-	nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred);
+	nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred, &xid);
+	dxid = xid;
 	if (!error)
-		nfsm_mtofh(dvp, newvp, v3, gotvp);
+		nfsm_mtofh(dvp, newvp, v3, gotvp, &xid);
 	if (v3)
-		nfsm_wcc_data(dvp, wccflag);
+		nfsm_wcc_data(dvp, wccflag, &dxid);
 	nfsm_reqdone;
-        if (dvp->v_type != VBAD) { /* EINVAL set on this case */
-            VTONFS(dvp)->n_flag |= NMODIFIED;
-            if (!wccflag)
+	VTONFS(dvp)->n_flag |= NMODIFIED;
+	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
-        }
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
 	 * if we can succeed in looking up the directory.
 	 */
 	if (error == EEXIST || (!error && !gotvp)) {
 		if (newvp) {
-			vrele(newvp);
+			vput(newvp);
 			newvp = (struct vnode *)0;
 		}
 		error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
-			cnp->cn_proc, &np);
+				     cnp->cn_proc, &np);
 		if (!error) {
 			newvp = NFSTOV(np);
 			if (newvp->v_type != VDIR)
@@ -2221,11 +2566,11 @@ nfs_mkdir(ap)
 	}
 	if (error) {
 		if (newvp)
-			vrele(newvp);
+			vput(newvp);
 	} else
 		*ap->a_vpp = newvp;
-	FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
 	vput(dvp);
+	NFS_FREE_PNBUF(cnp);
 	return (error);
 }
 
@@ -2250,26 +2595,25 @@ nfs_rmdir(ap)
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(dvp);
+	u_int64_t xid;
 
 	nfsstats.rpccnt[NFSPROC_RMDIR]++;
 	nfsm_reqhead(dvp, NFSPROC_RMDIR,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
-	nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred);
+	nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred, &xid);
 	if (v3)
-		nfsm_wcc_data(dvp, wccflag);
+		nfsm_wcc_data(dvp, wccflag, &xid);
 	nfsm_reqdone;
-	FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
-        if (dvp->v_type != VBAD) { /* EINVAL set on this case */
-            VTONFS(dvp)->n_flag |= NMODIFIED;
-            if (!wccflag)
+	VTONFS(dvp)->n_flag |= NMODIFIED;
+	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
-        }
 	cache_purge(dvp);
 	cache_purge(vp);
 	vput(vp);
 	vput(dvp);
+	NFS_FREE_PNBUF(cnp);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
 	 */
@@ -2307,10 +2651,13 @@ nfs_readdir(ap)
 				nfsstats.direofcache_hits++;
 				return (0);
 			}
-		} else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
-			np->n_mtime == vattr.va_mtime.tv_sec) {
-			nfsstats.direofcache_hits++;
-			return (0);
+		} else if (!VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp)) {
+			if (np->n_mtime == vattr.va_mtime.tv_sec) {
+				nfsstats.direofcache_hits++;
+				return (0);
+			}
+			/* directory changed, purge any name cache entries */
+			cache_purge(vp);
 		}
 	}
 
@@ -2345,12 +2692,13 @@ nfs_readdirrpc(vp, uiop, cred)
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	nfsuint64 cookie;
-	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+	struct nfsmount *nmp;
 	struct nfsnode *dnp = VTONFS(vp);
 	u_quad_t fileno;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
 	int attrflag;
-	int v3 = NFS_ISV3(vp);
+	int v3, nmreaddirsize;
+	u_int64_t xid;
 
 #ifndef nolint
 	dp = (struct dirent *)0;
@@ -2360,6 +2708,11 @@ nfs_readdirrpc(vp, uiop, cred)
 		(uiop->uio_resid & (NFS_DIRBLKSIZ - 1)))
 		panic("nfs_readdirrpc: bad uio");
 #endif
+	nmp = VFSTONFS(vp->v_mount);
+	if (!nmp)
+		return (ENXIO);
+	v3 = NFS_ISV3(vp);
+	nmreaddirsize = nmp->nm_readdirsize;
 
 	/*
 	 * If there is no cookie, assume directory was stale.
@@ -2389,10 +2742,10 @@ nfs_readdirrpc(vp, uiop, cred)
 			nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
 			*tl++ = cookie.nfsuquad[0];
 		}
-		*tl = txdr_unsigned(nmp->nm_readdirsize);
-		nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred);
+		*tl = txdr_unsigned(nmreaddirsize);
+		nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred, &xid);
 		if (v3) {
-			nfsm_postop_attr(vp, attrflag);
+			nfsm_postop_attr(vp, attrflag, &xid);
 			if (!error) {
 				nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
 				dnp->n_cookieverf.nfsuquad[0] = *tl++;
@@ -2532,12 +2885,13 @@ nfs_readdirplusrpc(vp, uiop, cred)
 	struct nameidata nami, *ndp = &nami;
 	struct componentname *cnp = &ndp->ni_cnd;
 	nfsuint64 cookie;
-	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+	struct nfsmount *nmp;
 	struct nfsnode *dnp = VTONFS(vp), *np;
 	nfsfh_t *fhp;
 	u_quad_t fileno;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
-	int attrflag, fhsize;
+	int attrflag, fhsize, nmreaddirsize, nmrsize;
+	u_int64_t xid, savexid;
 
 #ifndef nolint
 	dp = (struct dirent *)0;
@@ -2547,6 +2901,12 @@ nfs_readdirplusrpc(vp, uiop, cred)
 		(uiop->uio_resid & (DIRBLKSIZ - 1)))
 		panic("nfs_readdirplusrpc: bad uio");
 #endif
+	nmp = VFSTONFS(vp->v_mount);
+	if (!nmp)
+		return (ENXIO);
+	nmreaddirsize = nmp->nm_readdirsize;
+	nmrsize = nmp->nm_rsize;
+
 	ndp->ni_dvp = vp;
 	newvp = NULLVP;
 
@@ -2573,10 +2933,12 @@ nfs_readdirplusrpc(vp, uiop, cred)
 		*tl++ = cookie.nfsuquad[1];
 		*tl++ = dnp->n_cookieverf.nfsuquad[0];
 		*tl++ = dnp->n_cookieverf.nfsuquad[1];
-		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
-		*tl = txdr_unsigned(nmp->nm_rsize);
-		nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred);
-		nfsm_postop_attr(vp, attrflag);
+		*tl++ = txdr_unsigned(nmreaddirsize);
+		*tl = txdr_unsigned(nmrsize);
+		nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred,
+			     &xid);
+		savexid = xid;
+		nfsm_postop_attr(vp, attrflag, &xid);
 		if (error) {
 			m_freem(mrep);
 			goto nfsmout;
@@ -2660,6 +3022,20 @@ nfs_readdirplusrpc(vp, uiop, cred)
 				    VREF(vp);
 				    newvp = vp;
 				    np = dnp;
+				} else if (!bigenough ||
+				        (cnp->cn_namelen == 2 &&
+					 cnp->cn_nameptr[1] == '.' &&
+					 cnp->cn_nameptr[0] == '.')) {
+				    /*
+				     * don't doit if we can't guarantee
+				     * that this entry is NOT ".." because
+				     * we would have to drop the lock on
+				     * the directory before getting the
+				     * (lock on) the ".." vnode... and we
+				     * don't want to drop the dvp lock in
+				     * the middle of a readdirplus.
+				     */
+				    doit = 0;
 				} else {
 				    if ((error = nfs_nget(vp->v_mount, fhp,
 					fhsize, &np)))
@@ -2668,12 +3044,13 @@ nfs_readdirplusrpc(vp, uiop, cred)
 					newvp = NFSTOV(np);
 				}
 			    }
-			    if (doit) {
+			    if (doit && bigenough) {
 				dpossav2 = dpos;
 				dpos = dpossav1;
 				mdsav2 = md;
 				md = mdsav1;
-				nfsm_loadattr(newvp, (struct vattr *)0);
+				xid = savexid;
+				nfsm_loadattr(newvp, (struct vattr *)0, &xid);
 				dpos = dpossav2;
 				md = mdsav2;
 				dp->d_type =
@@ -2693,7 +3070,10 @@ nfs_readdirplusrpc(vp, uiop, cred)
 			    nfsm_adv(nfsm_rndup(i));
 			}
 			if (newvp != NULLVP) {
-			    vrele(newvp);
+			    if (newvp == vp)
+				vrele(newvp);
+			    else
+				vput(newvp);
 			    newvp = NULLVP;
 			}
 			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
@@ -2752,6 +3132,11 @@ nfsmout:
  * to create the same funny name between the nfs_lookitup() fails and the
  * nfs_rename() completes, but...
  */
+
+/* format of "random" names and next name to try */
+/* (note: shouldn't exceed size of sillyrename.s_name) */
+static char sillyrename_name[] = ".nfsAAA%04x4.4";
+
 static int
 nfs_sillyrename(dvp, vp, cnp)
 	struct vnode *dvp, *vp;
@@ -2762,6 +3147,7 @@ nfs_sillyrename(dvp, vp, cnp)
 	int error;
 	short pid;
 	struct ucred *cred;
+	int i, j, k;
 
 	cache_purge(dvp);
 	np = VTONFS(vp);
@@ -2777,17 +3163,39 @@ nfs_sillyrename(dvp, vp, cnp)
 
 	/* Fudge together a funny name */
 	pid = cnp->cn_proc->p_pid;
-	sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid);
+	sp->s_namlen = sprintf(sp->s_name, sillyrename_name, pid);
 
 	/* Try lookitups until we get one that isn't there */
+	i = j = k = 0;
 	while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		cnp->cn_proc, (struct nfsnode **)0) == 0) {
-		sp->s_name[4]++;
-		if (sp->s_name[4] > 'z') {
-			error = EINVAL;
-			goto bad;
+		if (sp->s_name[4]++ >= 'z')
+			sp->s_name[4] = 'A';
+		if (++i > ('z' - 'A' + 1)) {
+			i = 0;
+			if (sp->s_name[5]++ >= 'z')
+				sp->s_name[5] = 'A';
+			if (++j > ('z' - 'A' + 1)) {
+				j = 0;
+				if (sp->s_name[6]++ >= 'z')
+					sp->s_name[6] = 'A';
+				if (++k > ('z' - 'A' + 1)) {
+					error = EINVAL;
+					goto bad;
+				}
+			}
+		}
+	}
+	/* make note of next "random" name to try */
+	if ((sillyrename_name[4] = (sp->s_name[4] + 1)) > 'z') {
+		sillyrename_name[4] = 'A';
+		if ((sillyrename_name[5] = (sp->s_name[5] + 1)) > 'z') {
+			sillyrename_name[5] = 'A';
+			if ((sillyrename_name[6] = (sp->s_name[6] + 1)) > 'z')
+				sillyrename_name[6] = 'A';
 		}
 	}
+	/* now, do the rename */
 	if ((error = nfs_renameit(dvp, cnp, sp)))
 		goto bad;
 	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
@@ -2803,7 +3211,7 @@ bad:
 	cred = sp->s_cred;
 	sp->s_cred = NOCRED;
 	crfree(cred);
-	_FREE_ZONE((caddr_t)sp, sizeof (struct sillyrename), M_NFSREQ);
+	FREE_ZONE((caddr_t)sp, sizeof (struct sillyrename), M_NFSREQ);
 	return (error);
 }
 
@@ -2833,20 +3241,25 @@ nfs_lookitup(dvp, name, len, cred, procp, npp)
 	int error = 0, fhlen, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	nfsfh_t *nfhp;
-	int v3 = NFS_ISV3(dvp);
+	int v3;
+	u_int64_t xid;
+
+	if (!VFSTONFS(dvp->v_mount))
+		return (ENXIO);
+	v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
 	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(name, len, NFS_MAXNAMLEN);
-	nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred);
+	nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred, &xid);
 	if (npp && !error) {
 		nfsm_getfh(nfhp, fhlen, v3);
 		if (*npp) {
 		    np = *npp;
 		    if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
-			_FREE_ZONE((caddr_t)np->n_fhp,
+			FREE_ZONE((caddr_t)np->n_fhp,
 					np->n_fhsize, M_NFSBIGFH);
 			np->n_fhp = &np->n_fh;
 		    } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
@@ -2867,7 +3280,7 @@ nfs_lookitup(dvp, name, len, cred, procp, npp)
 		    newvp = NFSTOV(np);
 		}
 		if (v3) {
-			nfsm_postop_attr(newvp, attrflag);
+			nfsm_postop_attr(newvp, attrflag, &xid);
 			if (!attrflag && *npp == NULL) {
 				m_freem(mrep);
 				if (newvp == dvp)
@@ -2877,7 +3290,7 @@ nfs_lookitup(dvp, name, len, cred, procp, npp)
 				return (ENOENT);
 			}
 		} else
-			nfsm_loadattr(newvp, (struct vattr *)0);
+			nfsm_loadattr(newvp, (struct vattr *)0, &xid);
 	}
 	nfsm_reqdone;
 	if (npp && *npp == NULL) {
@@ -2896,7 +3309,7 @@ nfs_lookitup(dvp, name, len, cred, procp, npp)
 /*
  * Nfs Version 3 commit rpc
  */
-static int
+int
 nfs_commit(vp, offset, cnt, cred, procp)
 	register struct vnode *vp;
 	u_quad_t offset;
@@ -2911,8 +3324,12 @@ nfs_commit(vp, offset, cnt, cred, procp)
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	u_int64_t xid;
 	
-	if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0)
+	FSDBG(521, vp, offset, cnt, nmp->nm_state);
+	if (!nmp)
+		return (ENXIO);
+	if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0)
 		return (0);
 	nfsstats.rpccnt[NFSPROC_COMMIT]++;
 	nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
@@ -2921,12 +3338,12 @@ nfs_commit(vp, offset, cnt, cred, procp)
 	txdr_hyper(&offset, tl);
 	tl += 2;
 	*tl = txdr_unsigned(cnt);
-	nfsm_request(vp, NFSPROC_COMMIT, procp, cred);
-	nfsm_wcc_data(vp, wccflag);
+	nfsm_request(vp, NFSPROC_COMMIT, procp, cred, &xid);
+	nfsm_wcc_data(vp, wccflag, &xid);
 	if (!error) {
 		nfsm_dissect(tl, u_long *, NFSX_V3WRITEVERF);
 		if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
-			NFSX_V3WRITEVERF)) {
+			 NFSX_V3WRITEVERF)) {
 			bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 				NFSX_V3WRITEVERF);
 			error = NFSERR_STALEWRITEVERF;
@@ -2936,15 +3353,6 @@ nfs_commit(vp, offset, cnt, cred, procp)
 	return (error);
 }
 
-/*
- * Kludge City..
- * - make nfs_bmap() essentially a no-op that does no translation
- * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc
- *   (Maybe I could use the process's page mapping, but I was concerned that
- *    Kernel Write might not be enabled and also figured copyout() would do
- *    a lot more work than bcopy() and also it currently happens in the
- *    context of the swapper process (2).
- */
 static int
 nfs_bmap(ap)
 	struct vop_bmap_args /* {
@@ -2961,9 +3369,12 @@ nfs_bmap(ap)
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = vp;
-	if (ap->a_bnp != NULL)
+	if (ap->a_bnp != NULL) {
+		if (!vp->v_mount)
+			return (ENXIO);
 		*ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize,
 					      devBlockSize);
+	}
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 #ifdef notyet
@@ -2973,41 +3384,6 @@ nfs_bmap(ap)
 	return (0);
 }
 
-/*
- * Strategy routine.
- * For async requests when nfsiod(s) are running, queue the request by
- * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
- * request.
- */
-static int
-nfs_strategy(ap)
-	struct vop_strategy_args *ap;
-{
-	register struct buf *bp = ap->a_bp;
-	struct ucred *cr;
-	struct proc *p;
-	int error = 0;
-
-	if (ISSET(bp->b_flags, B_PHYS))
-		panic("nfs_strategy: physio");
-	if (ISSET(bp->b_flags, B_ASYNC))
-		p = (struct proc *)0;
-	else
-		p = current_proc();	/* XXX */
-	if (ISSET(bp->b_flags, B_READ))
-		cr = bp->b_rcred;
-	else
-		cr = bp->b_wcred;
-	/*
-	 * If the op is asynchronous and an i/o daemon is waiting
-	 * queue the request, wake it up and wait for completion
-	 * otherwise just do it ourselves.
-	 */
-	if (!ISSET(bp->b_flags, B_ASYNC) || nfs_asyncio(bp, NOCRED))
-		error = nfs_doio(bp, cr, p);
-	return (error);
-}
-
 /*
  * Mmap a file
  *
@@ -3041,299 +3417,334 @@ nfs_fsync(ap)
 		struct proc * a_p;
 	} */ *ap;
 {
-
 	return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1));
 }
-
-/*
- * Flush all the blocks associated with a vnode.
- * 	Walk through the buffer pool and push any dirty pages
- *	associated with the vnode.
- */
-static int
-nfs_flush(vp, cred, waitfor, p, commit)
-	register struct vnode *vp;
-	struct ucred *cred;
-	int waitfor;
-	struct proc *p;
-	int commit;
+ 
+int
+nfs_flushcommits(struct vnode *vp, struct proc *p)
 {
-	register struct nfsnode *np = VTONFS(vp);
-	register struct buf *bp;
-	register int i;
-	struct buf *nbp;
-	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
-	int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos, err;
-	int passone = 1;
+	struct nfsnode *np = VTONFS(vp);
+	struct nfsbuf *bp, *nbp;
+	int i, s, error = 0, retv, bvecpos, wcred_set;
 	u_quad_t off, endoff, toff;
-	struct ucred* wcred = NULL;
-	struct buf **bvec = NULL;
-        void * object;
-        kern_return_t kret;
-        upl_t *upls = NULL;
-
-
-#ifndef NFS_COMMITBVECSIZ
+	struct ucred* wcred;
+	struct nfsbuf **bvec = NULL;
 #define NFS_COMMITBVECSIZ	20
-#endif
-	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
-        struct upl_t *upls_on_stack[NFS_COMMITBVECSIZ]; 
-        int bvecsize = 0, bveccount, buplpos;
+#define NFS_MAXCOMMITBVECSIZ	1024
+	struct nfsbuf *bvec_on_stack[NFS_COMMITBVECSIZ];
+	int bvecsize = NFS_MAXCOMMITBVECSIZ;
 
-	if (nmp->nm_flag & NFSMNT_INT)
-		slpflag = PCATCH;
-	if (!commit)
-		passone = 0;
+	FSDBG_TOP(557, vp, np, 0, 0);
 
 	/*
-	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
+	 * A nb_flags == (NB_DELWRI | NB_NEEDCOMMIT) block has been written to the
 	 * server, but nas not been committed to stable storage on the server
-	 * yet. On the first pass, the byte range is worked out and the commit
-	 * rpc is done. On the second pass, nfs_writebp() is called to do the
-	 * job.
+	 * yet. The byte range is worked out for as many nfsbufs as we can handle
+	 * and the commit rpc is done.
 	 */
-again:
-	if (vp->v_dirtyblkhd.lh_first)
+	if (np->n_dirtyblkhd.lh_first)
 		np->n_flag |= NMODIFIED;
+
 	off = (u_quad_t)-1;
 	endoff = 0;
 	bvecpos = 0;
-        buplpos = 0;
-	if (NFS_ISV3(vp) && commit) {
-		s = splbio();
+	wcred_set = 0;
+
+	if (!VFSTONFS(vp->v_mount)) {
+		error = ENXIO;
+		goto done;
+	}
+	if (!NFS_ISV3(vp)) {
+		error = EINVAL;
+		goto done;
+	}
+	s = splbio();
+
+	/*
+	 * Allocate space to remember the list of bufs to commit.  It is
+	 * important to use M_NOWAIT here to avoid a race with nfs_write
+	 */
+	MALLOC(bvec, struct nfsbuf **,
+		       bvecsize * sizeof(struct nfsbuf *), M_TEMP,
+		       M_NOWAIT);
+	if (bvec == NULL) {
+		bvec = bvec_on_stack;
+		bvecsize = NFS_COMMITBVECSIZ;
+	}
+	for (bp = np->n_dirtyblkhd.lh_first; bp && bvecpos < bvecsize; bp = nbp) {
+		nbp = bp->nb_vnbufs.le_next;
+
+		if (((bp->nb_flags & (NB_BUSY | NB_DELWRI | NB_NEEDCOMMIT))
+			!= (NB_DELWRI | NB_NEEDCOMMIT)))
+			continue;
+
+		nfs_buf_remfree(bp);
+		SET(bp->nb_flags, NB_BUSY);
 		/*
-		 * Count up how many buffers waiting for a commit.
+		 * we need a upl to see if the page has been
+		 * dirtied (think mmap) since the unstable write, and
+		 * also to prevent vm from paging it during our commit rpc
 		 */
-		bveccount = 0;
-		for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
-			nbp = bp->b_vnbufs.le_next;
-			if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
-			    == (B_DELWRI | B_NEEDCOMMIT))
-				bveccount++;
+		if (!ISSET(bp->nb_flags, NB_PAGELIST)) {
+			retv = nfs_buf_upl_setup(bp);
+			if (retv) {
+				/* unable to create upl */
+				/* vm object must no longer exist */
+				/* this could be fatal if we need */
+				/* to write the data again, we'll see...  */
+				printf("nfs_flushcommits: upl create failed %d\n", retv);
+				bp->nb_valid = bp->nb_dirty = 0;
+			}
 		}
+		nfs_buf_upl_check(bp);
+
+		FSDBG(557, bp, bp->nb_flags, bp->nb_valid, bp->nb_dirty);
+		FSDBG(557, bp->nb_validoff, bp->nb_validend,
+		      bp->nb_dirtyoff, bp->nb_dirtyend);
+
 		/*
-		 * Allocate space to remember the list of bufs to commit.  It is
-		 * important to use M_NOWAIT here to avoid a race with nfs_write.
-		 * If we can't get memory (for whatever reason), we will end up
-		 * committing the buffers one-by-one in the loop below.
+		 * We used to check for dirty pages here; if there were any
+		 * we'd abort the commit and force the entire buffer to be
+		 * written again.
+		 *
+		 * Instead of doing that, we now go ahead and commit the dirty
+		 * range, and then leave the buffer around with dirty pages
+		 * that will be written out later.
 		 */
-		if (bveccount > NFS_COMMITBVECSIZ) {
-			if (bvec != NULL && bvec != bvec_on_stack)
-				_FREE(bvec, M_TEMP);
-			MALLOC(bvec, struct buf **,
-			       bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT);
-			if (bvec == NULL) {
-				bvec = bvec_on_stack;
-				bvecsize = NFS_COMMITBVECSIZ;
-			} else
-				bvecsize = bveccount;
-                        /* allocate the upl structure before the loop based on buffers to commit */
-			if (upls != NULL && upls != upls_on_stack)
-               			_FREE(upls, M_TEMP);
-			MALLOC(upls, struct upl_t *,
-                            bveccount * sizeof(upl_t), M_TEMP, M_NOWAIT);
-                        if (upls == NULL)
-                            upls = upls_on_stack;
-		} else {
-			if (bvec && bvec != bvec_on_stack)
-				_FREE(bvec, M_TEMP);
-			bvec = bvec_on_stack;
-			bvecsize = NFS_COMMITBVECSIZ;
-			if (upls && upls != upls_on_stack)
-               			_FREE(upls, M_TEMP);
-                        upls = upls_on_stack;
-		}
-
-		for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
-			nbp = bp->b_vnbufs.le_next;
-			if (bvecpos >= bvecsize)
-				break;
-			if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
-				!= (B_DELWRI | B_NEEDCOMMIT))
-				continue;
-			bremfree(bp);
-			/*
-			 * Work out if all buffers are using the same cred
-			 * so we can deal with them all with one commit.
-			 */
-			if (wcred == NULL)
-				wcred = bp->b_wcred;
-			else if (wcred != bp->b_wcred)
-				wcred = NOCRED;
-			SET(bp->b_flags, (B_BUSY | B_WRITEINPROG));
 
-			/*
-			 * we need vm_fault_list_request so if vm decides to
-			 * do paging while we are waiting on commit rpc,
-			 * that it doesn't pick these pages.
-			 */
-			if (!ISSET(bp->b_flags, B_PAGELIST)) {
-				/* if pagelist exists, assume vm pages are locked/busy already */				off_t file_offset = ubc_blktooff(vp, bp->b_lblkno);
-				object = ubc_getobject(vp, (UBC_NOREACTIVATE|UBC_HOLDOBJECT));
-				if (object == (void*)NULL)
-					panic("nfs_getcacheblk: NULL vmobject");
-				if(bp->b_bufsize & 0xfff)
-					panic("nfs_getcacheblk: list request is less than 4k");
-				kret = vm_fault_list_request(
-						object, (vm_object_offset_t)file_offset,
-						bp->b_bufsize, &(upls[buplpos]), NULL, 0,
-						(int)(UPL_NO_SYNC | UPL_CLEAN_IN_PLACE |UPL_PRECIOUS |
-						UPL_SET_INTERNAL));
-				if (kret != KERN_SUCCESS) 
-					panic("nfs_getcacheblk: get pagelists failed with (%d)", kret);
-                                    
-#ifdef UBC_DEBUG
-				upl_ubc_alias_set(pl, ioaddr, 1);
-#endif /* UBC_DEBUG */
-				buplpos++; /* not same as bvecpos if upl existed already */
-			}
+		/* in case blocking calls were made, re-evaluate nbp */
+		nbp = bp->nb_vnbufs.le_next;
 
-			/*
-			 * A list of these buffers is kept so that the
-			 * second loop knows which buffers have actually
-			 * been committed. This is necessary, since there
-			 * may be a race between the commit rpc and new
-			 * uncommitted writes on the file.
-			 */
-			bvec[bvecpos++] = bp;
-			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
-				bp->b_dirtyoff;
-			if (toff < off)
-				off = toff;
-			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
-			if (toff > endoff)
-				endoff = toff;
-		}
-		splx(s);
-	}
-	if (bvecpos > 0) {
 		/*
-		 * Commit data on the server, as required.
-		 * If all bufs are using the same wcred, then use that with
-		 * one call for all of them, otherwise commit each one
-		 * separately.
+		 * Work out if all buffers are using the same cred
+		 * so we can deal with them all with one commit.
 		 */
-		if (wcred != NOCRED)
-			retv = nfs_commit(vp, off, (int)(endoff - off),
-					  wcred, p);
-		else {
-			retv = 0;
-			for (i = 0; i < bvecpos; i++) {
-				off_t off, size;
-				bp = bvec[i];
-				off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
-					bp->b_dirtyoff;
-				size = (u_quad_t)(bp->b_dirtyend
-						  - bp->b_dirtyoff);
-				retv = nfs_commit(vp, off, (int)size,
-						  bp->b_wcred, p);
-				if (retv) break;
-			}
+		if (wcred_set == 0) {
+			wcred = bp->nb_wcred;
+			if (wcred == NOCRED)
+				panic("nfs: needcommit w/out wcred");
+			wcred_set = 1;
+		} else if ((wcred_set == 1) && crcmp(wcred, bp->nb_wcred)) {
+			wcred_set = -1;
 		}
-
-		if (retv == NFSERR_STALEWRITEVERF)
-			nfs_clearcommit(vp->v_mount);
-                        
-                for (i = 0; i < buplpos; i++) {
-                    /*
-                    * before the VOP_BWRITE and biodone(ASYNC)/brelse, we have to undo
-                    * holding the vm page or we we will deadlock on another vm_fault_list_request.
-                    * Here's a convenient place to put it. 
-                    * Better if we could hold it by setting the PAGELIST flag and kernel_upl_map
-                    * as does nfs_writebp. Then normal biodones and brelse will clean it up and 
-                    * we can avoid this abort. For now make minimal changse and test this out.
-                    */
-                    err = kernel_upl_abort(upls[i], NULL); 
-                    if (err)
-                        printf("nfs_flush: kernel_upl_abort %d\n", err);
-                    }
+		SET(bp->nb_flags, NB_WRITEINPROG);
 
 		/*
-		 * Now, either mark the blocks I/O done or mark the
-		 * blocks dirty, depending on whether the commit
-		 * succeeded.
+		 * A list of these buffers is kept so that the
+		 * second loop knows which buffers have actually
+		 * been committed. This is necessary, since there
+		 * may be a race between the commit rpc and new
+		 * uncommitted writes on the file.
 		 */
+		bvec[bvecpos++] = bp;
+		toff = NBOFF(bp) + bp->nb_dirtyoff;
+		if (toff < off)
+			off = toff;
+		toff += (u_quad_t)(bp->nb_dirtyend - bp->nb_dirtyoff);
+		if (toff > endoff)
+			endoff = toff;
+	}
+	splx(s);
+
+	if (bvecpos == 0) {
+		error = ENOBUFS;
+		goto done;
+	}
+
+	/*
+	 * Commit data on the server, as required.
+	 * If all bufs are using the same wcred, then use that with
+	 * one call for all of them, otherwise commit each one
+	 * separately.
+	 */
+	if (wcred_set == 1)
+		retv = nfs_commit(vp, off, (int)(endoff - off), wcred, p);
+	else {
+		retv = 0;
+
 		for (i = 0; i < bvecpos; i++) {
-                        
+			off_t off, size;
 			bp = bvec[i];
-			CLR(bp->b_flags, (B_NEEDCOMMIT | B_WRITEINPROG));
-			if (retv) {
-			    brelse(bp);
-			} else {
-			    vp->v_numoutput++;
-			    SET(bp->b_flags, B_ASYNC);
-			    s = splbio();
-			    CLR(bp->b_flags, (B_READ|B_DONE|B_ERROR|B_DELWRI));
-			    bp->b_dirtyoff = bp->b_dirtyend = 0;
-			    reassignbuf(bp, vp);
-			    splx(s);
-			    biodone(bp);
+			off = NBOFF(bp) + bp->nb_dirtyoff;
+			size = (u_quad_t)(bp->nb_dirtyend - bp->nb_dirtyoff);
+			retv = nfs_commit(vp, off, (int)size, bp->nb_wcred, p);
+			if (retv) break;
+		}
+	}
+	if (retv == NFSERR_STALEWRITEVERF)
+		nfs_clearcommit(vp->v_mount);
+
+	/*
+	 * Now, either mark the blocks I/O done or mark the
+	 * blocks dirty, depending on whether the commit
+	 * succeeded.
+	 */
+	for (i = 0; i < bvecpos; i++) {
+		bp = bvec[i];
+		FSDBG(557, bp, retv, bp->nb_flags, bp->nb_dirty);
+
+		CLR(bp->nb_flags, (NB_NEEDCOMMIT | NB_WRITEINPROG));
+
+		np->n_needcommitcnt--;
+		CHECK_NEEDCOMMITCNT(np);
+
+		if (retv) {
+			nfs_buf_release(bp);
+		} else {
+			s = splbio();
+			vp->v_numoutput++;
+
+			if (ISSET(bp->nb_flags, NB_DELWRI)) {
+				nfs_nbdwrite--;
+				NFSBUFCNTCHK();
+				wakeup((caddr_t)&nfs_nbdwrite);
+			}
+			CLR(bp->nb_flags, (NB_READ|NB_DONE|NB_ERROR|NB_DELWRI));
+			/* if block still has dirty pages, we don't want it to */
+			/* be released in nfs_buf_iodone().  So, don't set NB_ASYNC. */
+			if (!bp->nb_dirty)
+				SET(bp->nb_flags, NB_ASYNC);
+
+			/* move to clean list */
+			if (bp->nb_vnbufs.le_next != NFSNOLIST)
+				LIST_REMOVE(bp, nb_vnbufs);
+			LIST_INSERT_HEAD(&VTONFS(vp)->n_cleanblkhd, bp, nb_vnbufs);
+
+			bp->nb_dirtyoff = bp->nb_dirtyend = 0;
+			splx(s);
+
+			nfs_buf_iodone(bp);
+			if (bp->nb_dirty) {
+				/* throw it back in as a delayed write buffer */
+				CLR(bp->nb_flags, NB_DONE);
+				nfs_buf_write_delayed(bp);
 			}
 		}
+	}
+
+done:
+	if (bvec != NULL && bvec != bvec_on_stack)
+		_FREE(bvec, M_TEMP);
+	FSDBG_BOT(557, vp, np, 0, error);
+	return (error);
+}
+
+/*
+ * Flush all the blocks associated with a vnode.
+ * 	Walk through the buffer pool and push any dirty pages
+ *	associated with the vnode.
+ */
+static int
+nfs_flush(vp, cred, waitfor, p, commit)
+	register struct vnode *vp;
+	struct ucred *cred;
+	int waitfor;
+	struct proc *p;
+	int commit;
+{
+	struct nfsnode *np = VTONFS(vp);
+	struct nfsbuf *bp, *nbp;
+	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+	int i, s, error = 0, error2, slptimeo = 0, slpflag = 0;
+	int passone = 1;
 
+	FSDBG_TOP(517, vp, np, waitfor, commit);
+
+	if (!nmp) {
+		error = ENXIO;
+		goto done;
 	}
+	if (nmp->nm_flag & NFSMNT_INT)
+		slpflag = PCATCH;
+	if (!commit)
+		passone = 0;
 
 	/*
-	 * Start/do any write(s) that are required.
-         * There is a window here where B_BUSY protects the buffer. The vm pages have been
-         * freed up, yet B_BUSY is set. Don't think you will hit any busy/incore problems while
-         * we sleep, but not absolutely sure. Keep an eye on it. Otherwise we will have to hold
-         * vm page across this locked. - EKN
+	 * On the first pass, commit all the bufs that can be.
+	 * On the second pass, nfs_buf_write() is called to do the job.
 	 */
-loop:
-	if (current_thread_aborted()) {
-		error = EINTR;
+again:
+	FSDBG(518, np->n_dirtyblkhd.lh_first, np->n_flag, 0, 0);
+	if (np->n_dirtyblkhd.lh_first)
+		np->n_flag |= NMODIFIED;
+	if (!VFSTONFS(vp->v_mount)) {
+		error = ENXIO;
 		goto done;
 	}
+	if (NFS_ISV3(vp) && commit) {
+		/* loop while it looks like there are still buffers to be */
+		/* commited and nfs_flushcommits() seems to be handling them. */
+		while (np->n_needcommitcnt)
+			if (nfs_flushcommits(vp, p))
+				break;
+	}
+
+	/* Start/do any write(s) that are required. */
+loop:
 	s = splbio();
-	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
-		nbp = bp->b_vnbufs.le_next;
-		if (ISSET(bp->b_flags, B_BUSY)) {
+	for (bp = np->n_dirtyblkhd.lh_first; bp; bp = nbp) {
+		nbp = bp->nb_vnbufs.le_next;
+		if (ISSET(bp->nb_flags, NB_BUSY)) {
+			FSDBG(524, bp, waitfor, passone, bp->nb_flags);
 			if (waitfor != MNT_WAIT || passone)
 				continue;
-			SET(bp->b_flags, B_WANTED);
+			SET(bp->nb_flags, NB_WANTED);
 			error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
-				"nfsfsync", slptimeo);
+				       "nfsfsync", slptimeo);
 			splx(s);
 			if (error) {
-			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
-				error = EINTR;
-				goto done;
-			    }
-			    if (slpflag == PCATCH) {
-				slpflag = 0;
-				slptimeo = 2 * hz;
-			    }
+				error2 = nfs_sigintr(VFSTONFS(vp->v_mount),
+				    (struct nfsreq *)0, p);
+				if (error2) {
+					error = error2;
+					goto done;
+				}
+				if (slpflag == PCATCH) {
+					slpflag = 0;
+					slptimeo = 2 * hz;
+				}
 			}
 			goto loop;
 		}
-		if (!ISSET(bp->b_flags, B_DELWRI))
+		if (!ISSET(bp->nb_flags, NB_DELWRI))
 			panic("nfs_fsync: not dirty");
-		if ((passone || !commit) && ISSET(bp->b_flags, B_NEEDCOMMIT))
+		FSDBG(525, bp, passone, commit, bp->nb_flags);
+		if ((passone || !commit) && ISSET(bp->nb_flags, NB_NEEDCOMMIT))
+			continue;
+		nfs_buf_remfree(bp);
+		if (ISSET(bp->nb_flags, NB_ERROR)) {
+			np->n_error = bp->nb_error ? bp->nb_error : EIO;
+			np->n_flag |= NWRITEERR;
+			nfs_buf_release(bp);
 			continue;
-		bremfree(bp);
+		}
 		if (passone || !commit)
-		    SET(bp->b_flags, (B_BUSY|B_ASYNC));
-		else
-		    SET(bp->b_flags, (B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT));
-
+			SET(bp->nb_flags, NB_BUSY|NB_ASYNC);
+		else {
+			/* the NB_STABLE forces this to be written FILESYNC */
+			SET(bp->nb_flags, NB_BUSY|NB_ASYNC|NB_STABLE);
+		}
 		splx(s);
-		VOP_BWRITE(bp);
+		nfs_buf_write(bp);
 		goto loop;
 	}
 	splx(s);
+
 	if (passone) {
 		passone = 0;
 		goto again;
 	}
+
 	if (waitfor == MNT_WAIT) {
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			error = tsleep((caddr_t)&vp->v_numoutput,
 				slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
 			if (error) {
-			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
-				error = EINTR;
+				error2 = nfs_sigintr(VFSTONFS(vp->v_mount),
+				    (struct nfsreq *)0, p);
+			    if (error2) {
+				error = error2;
 				goto done;
 			    }
 			    if (slpflag == PCATCH) {
@@ -3342,19 +3753,17 @@ loop:
 			    }
 			}
 		}
-		if (vp->v_dirtyblkhd.lh_first && commit) {
+		if (np->n_dirtyblkhd.lh_first && commit) {
 			goto loop;
 		}
 	}
+	FSDBG(526, np->n_flag, np->n_error, 0, 0);
 	if (np->n_flag & NWRITEERR) {
 		error = np->n_error;
 		np->n_flag &= ~NWRITEERR;
 	}
 done:
-	if (bvec != NULL && bvec != bvec_on_stack)
-		_FREE(bvec, M_TEMP);
-        if (upls != NULL && upls != upls_on_stack)
-                _FREE(upls, M_TEMP);
+	FSDBG_BOT(517, vp, np, error, 0);
 	return (error);
 }
 
@@ -3378,8 +3787,7 @@ nfs_pathconf(ap)
 }
 
 /*
- * NFS advisory byte-level locks.
- * Currently unsupported.
+ * NFS advisory byte-level locks (client)
  */
 static int
 nfs_advlock(ap)
@@ -3391,21 +3799,7 @@ nfs_advlock(ap)
 		int  a_flags;
 	} */ *ap;
 {
-#ifdef __FreeBSD__
-	register struct nfsnode *np = VTONFS(ap->a_vp);
-
-	/*
-	 * The following kludge is to allow diskless support to work
-	 * until a real NFS lockd is implemented. Basically, just pretend
-	 * that this is a local lock.
-	 */
-	return (lf_advlock(ap, &(np->n_lockf), np->n_size));
-#else
-#if DIAGNOSTIC
-	printf("nfs_advlock: pid %d comm %s\n", current_proc()->p_pid, current_proc()->p_comm);
-#endif
-	return (EOPNOTSUPP);
-#endif
+	return (nfs_dolock(ap));
 }
 
 /*
@@ -3525,213 +3919,74 @@ nfs_update(ap)
 	return (EOPNOTSUPP);
 }
 
-int				nfs_aio_threads = 0; /* 1 per nfd (arbitrary) */
-struct slock			nfs_aio_slock;
-TAILQ_HEAD(bqueues, buf)	nfs_aio_bufq;
-int				nfs_aio_bufq_len = 0; /* diagnostic only */
-
-void
-nfs_aio_thread()
-{	/* see comment below in nfs_bwrite() for some rationale */
-	struct buf	*bp;
-	boolean_t funnel_state;
-
-	funnel_state = thread_funnel_set(kernel_flock, TRUE);
-	for(;;) {
-		simple_lock(&nfs_aio_slock);
-		if ((bp = nfs_aio_bufq.tqh_first)) {
-			TAILQ_REMOVE(&nfs_aio_bufq, bp, b_freelist);
-			nfs_aio_bufq_len--;
-			simple_unlock(&nfs_aio_slock);
-			nfs_writebp(bp, 1);
-		} else { /* nothing to do - goodnight */
-			assert_wait(&nfs_aio_bufq, THREAD_UNINT);
-			simple_unlock(&nfs_aio_slock);
-			(void)tsleep((caddr_t)0, PRIBIO+1, "nfs_aio_bufq", 0);
-		}
-	}
-	(void) thread_funnel_set(kernel_flock, FALSE);
-}
-
-
-void
-nfs_aio_thread_init()
-{
-	if (nfs_aio_threads++ == 0) {
-		simple_lock_init(&nfs_aio_slock);
-		TAILQ_INIT(&nfs_aio_bufq);
-	}
-	kernel_thread(kernel_task, nfs_aio_thread);
-}
-
-
 /*
- * Just call nfs_writebp() with the force argument set to 1.
- */
-static int
-nfs_bwrite(ap)
-	struct vop_bwrite_args /* {
-		struct vnode *a_bp;
-	} */ *ap;
-{
-	extern void wakeup_one(caddr_t chan);
-
-	/*
-	 * nfs_writebp will issue a synchronous rpc to if B_ASYNC then
-	 * to avoid distributed deadlocks we handoff the write to the
-	 * nfs_aio threads.  Doing so allows us to complete the
-	 * current request, rather than blocking on a server which may
-	 * be ourself (or blocked on ourself).
-	 *
-	 * Note the loopback deadlocks happened when the thread
-	 * invoking us was nfsd, and also when it was the pagedaemon.
-	 *
-	 * This solution has one known problem.  If *ALL* buffers get
-	 * on the nfs_aio queue then no forward progress can be made
-	 * until one of those writes complete.  And if the current
-	 * nfs_aio writes-in-progress block due to a non-responsive server we
-	 * are in a deadlock circle.  Probably the cure is to limit the
-	 * async write concurrency in getnewbuf as in FreeBSD 3.2.
-	 */
-	if (nfs_aio_threads && ISSET(ap->a_bp->b_flags, B_ASYNC)) {
-		simple_lock(&nfs_aio_slock);
-		nfs_aio_bufq_len++;
-		TAILQ_INSERT_TAIL(&nfs_aio_bufq, ap->a_bp, b_freelist);
-		simple_unlock(&nfs_aio_slock);
-		wakeup_one((caddr_t)&nfs_aio_bufq);
-		return (0);
-	}
-	return (nfs_writebp(ap->a_bp, 1));
-}
-
-/*
- * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless
- * the force flag is one and it also handles the B_NEEDCOMMIT flag.
+ * write (or commit) the given NFS buffer
  */
 int
-nfs_writebp(bp, force)
-	register struct buf *bp;
-	int force;
+nfs_buf_write(struct nfsbuf *bp)
 {
 	int s;
-	register int oldflags = bp->b_flags, retv = 1;
+	int oldflags = bp->nb_flags, rv = 0;
 	off_t off;
-	upl_t upl;
-	void * object;
-	kern_return_t kret;
-	struct vnode *vp = bp->b_vp;
-	upl_page_info_t *pl;
+	struct vnode *vp = bp->nb_vp;
+	struct ucred *cr;
+	struct proc *p = current_proc();
+
+	FSDBG_TOP(553, bp, NBOFF(bp), bp->nb_flags, 0);
 
-	if(!ISSET(bp->b_flags, B_BUSY))
-		panic("nfs_writebp: buffer is not busy???");
+	if (!ISSET(bp->nb_flags, NB_BUSY))
+		panic("nfs_buf_write: buffer is not busy???");
 
 	s = splbio();
-	CLR(bp->b_flags, (B_READ|B_DONE|B_ERROR|B_DELWRI));
+	CLR(bp->nb_flags, (NB_READ|NB_DONE|NB_ERROR|NB_DELWRI));
+	if (ISSET(oldflags, NB_DELWRI)) {
+		nfs_nbdwrite--;
+		NFSBUFCNTCHK();
+		wakeup((caddr_t)&nfs_nbdwrite);
+	}
 
-	if (ISSET(oldflags, (B_ASYNC|B_DELWRI))) {
-		reassignbuf(bp, vp);
+	/* move to clean list */
+	if (ISSET(oldflags, (NB_ASYNC|NB_DELWRI))) {
+		if (bp->nb_vnbufs.le_next != NFSNOLIST)
+			LIST_REMOVE(bp, nb_vnbufs);
+		LIST_INSERT_HEAD(&VTONFS(vp)->n_cleanblkhd, bp, nb_vnbufs);
 	}
 
 	vp->v_numoutput++;
-	current_proc()->p_stats->p_ru.ru_oublock++;
+	if (p && p->p_stats)
+		p->p_stats->p_ru.ru_oublock++;
 	splx(s);
-        
-        /* 
-         * Since the B_BUSY flag is set, we need to lock the page before doing nfs_commit.
-         * Otherwise we may block and get a busy incore pages during a vm pageout.
-         * Move the existing code up before the commit.
-         */
-
-        if (!ISSET(bp->b_flags, B_META) && UBCISVALID(vp)) {
-    
-            if (!ISSET(bp->b_flags, B_PAGELIST)) {
-
-				off_t file_offset = ubc_blktooff(vp, bp->b_lblkno);
-
-				object = ubc_getobject(vp, (UBC_NOREACTIVATE|UBC_HOLDOBJECT));
-				if (object == (void*)NULL)
-					panic("nfs_writebp: NULL vmobject");
-
-				if(bp->b_bufsize & 0xfff)
-					panic("nfs_writebp: list request is with less than 4k");
-	
-				kret = vm_fault_list_request(object, (vm_object_offset_t)file_offset, 
-							bp->b_bufsize, &upl, NULL, 0, 
-							(int)(UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_PRECIOUS | UPL_SET_INTERNAL));
-				if (kret != KERN_SUCCESS) {
-					panic("nfs_writebp: get pagelists failed with (%d)", kret);
-				}
-                    
-#ifdef UBC_DEBUG
-                    upl_ubc_alias_set(pl, ioaddr, 2);
-#endif /* UBC_DEBUG */
-
-                    s = splbio();
-
-                    pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
-                    bp->b_pagelist = upl;
-                    SET(bp->b_flags, B_PAGELIST);
-                    splx(s);
-                    
-                    kret = kernel_upl_map(kernel_map, upl, 
-                            (vm_address_t *)&(bp->b_data));
-                    if (kret != KERN_SUCCESS) {
-                            panic("nfs_writebp: kernel_upl_map() failed with (%d)", kret);
-                    }
-                    if(bp->b_data == 0) 
-                            panic("nfs_writebp: upl_map mapped 0");
-                    if (!upl_page_present(pl, 0)) {
-                            /* 
-                                * may be the page got paged out.
-                                * let's just read it in. It is marked
-                                * busy so we should not have any one
-                                * yanking this page underneath the fileIO
-                                */
-                            panic("nfs_writebp: nopage");
-                    }
-            }
-        }
 
 	/*
-	 * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not
-	 * an actual write will have to be scheduled via. VOP_STRATEGY().
-	 * If B_WRITEINPROG is already set, then push it with a write anyhow.
+	 * For async requests when nfsiod(s) are running, queue the request by
+	 * calling nfs_asyncio(), otherwise just all nfs_doio() to do the request.
 	 */
-	if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) {
-		off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
-		SET(bp->b_flags, B_WRITEINPROG);
-		retv = nfs_commit(vp, off, bp->b_dirtyend-bp->b_dirtyoff,
-			bp->b_wcred, bp->b_proc);
-		CLR(bp->b_flags, B_WRITEINPROG);
-		if (!retv) {
-			bp->b_dirtyoff = bp->b_dirtyend = 0;
-			CLR(bp->b_flags, B_NEEDCOMMIT);
-			biodone(bp);  /* on B_ASYNC will brelse the buffer */
-                        
-		} else if (retv == NFSERR_STALEWRITEVERF)
-			nfs_clearcommit(vp->v_mount);
-	}
-	if (retv) {
-		if (force)
-			SET(bp->b_flags, B_WRITEINPROG);
-                
-                VOP_STRATEGY(bp);
-                
-	} 
-        
-	if( (oldflags & B_ASYNC) == 0) {
-		int rtval = biowait(bp);
-
-		if (oldflags & B_DELWRI) {
+	if (ISSET(bp->nb_flags, NB_ASYNC))
+		p = (struct proc *)0;
+	if (ISSET(bp->nb_flags, NB_READ))
+		cr = bp->nb_rcred;
+	else
+		cr = bp->nb_wcred;
+	if (!ISSET(bp->nb_flags, NB_ASYNC) || nfs_asyncio(bp, NOCRED))
+		rv = nfs_doio(bp, cr, p);
+
+	if ((oldflags & NB_ASYNC) == 0) {
+		rv = nfs_buf_iowait(bp);
+		/* move to clean list */
+		if (oldflags & NB_DELWRI) {
 			s = splbio();
-			reassignbuf(bp, vp);
+			if (bp->nb_vnbufs.le_next != NFSNOLIST)
+				LIST_REMOVE(bp, nb_vnbufs);
+			LIST_INSERT_HEAD(&VTONFS(vp)->n_cleanblkhd, bp, nb_vnbufs);
 			splx(s);
 		}
-		brelse(bp);
-		return (rtval);
+		FSDBG_BOT(553, bp, NBOFF(bp), bp->nb_flags, rv);
+		nfs_buf_release(bp);
+		return (rv);
 	} 
 
-	return (0);
+	FSDBG_BOT(553, bp, NBOFF(bp), bp->nb_flags, rv);
+	return (rv);
 }
 
 /*
@@ -3762,7 +4017,7 @@ nfsspec_access(ap)
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
-	if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+	if ((mode & VWRITE) && vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		case VREG: case VDIR: case VLNK:
 			return (EROFS);
@@ -3810,13 +4065,15 @@ nfsspec_read(ap)
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
+	struct timeval now;
 
 	/*
 	 * Set access flag.
 	 */
 	np->n_flag |= NACC;
-	np->n_atim.tv_sec = time.tv_sec;
-	np->n_atim.tv_nsec = time.tv_usec * 1000;
+	microtime(&now);
+	np->n_atim.tv_sec = now.tv_sec;
+	np->n_atim.tv_nsec = now.tv_usec * 1000;
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
 }
 
@@ -3833,13 +4090,15 @@ nfsspec_write(ap)
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
+	struct timeval now;
 
 	/*
 	 * Set update flag.
 	 */
 	np->n_flag |= NUPD;
-	np->n_mtim.tv_sec = time.tv_sec;
-	np->n_mtim.tv_nsec = time.tv_usec * 1000;
+	microtime(&now);
+	np->n_mtim.tv_sec = now.tv_sec;
+	np->n_mtim.tv_nsec = now.tv_usec * 1000;
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
 }
 
@@ -3863,7 +4122,7 @@ nfsspec_close(ap)
 
 	if (np->n_flag & (NACC | NUPD)) {
 		np->n_flag |= NCHG;
-		if (vp->v_usecount == 1 &&
+		if (vp->v_usecount == 1 && vp->v_mount &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			VATTR_NULL(&vattr);
 			if (np->n_flag & NACC)
@@ -3890,13 +4149,15 @@ nfsfifo_read(ap)
 {
         extern vop_t **fifo_vnodeop_p;
 	register struct nfsnode *np = VTONFS(ap->a_vp);
+	struct timeval now;
 
 	/*
 	 * Set access flag.
 	 */
 	np->n_flag |= NACC;
-	np->n_atim.tv_sec = time.tv_sec;
-	np->n_atim.tv_nsec = time.tv_usec * 1000;
+	microtime(&now);
+	np->n_atim.tv_sec = now.tv_sec;
+	np->n_atim.tv_nsec = now.tv_usec * 1000;
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
 }
 
@@ -3914,13 +4175,15 @@ nfsfifo_write(ap)
 {
         extern vop_t **fifo_vnodeop_p;
 	register struct nfsnode *np = VTONFS(ap->a_vp);
+	struct timeval now;
 
 	/*
 	 * Set update flag.
 	 */
 	np->n_flag |= NUPD;
-	np->n_mtim.tv_sec = time.tv_sec;
-	np->n_mtim.tv_nsec = time.tv_usec * 1000;
+	microtime(&now);
+	np->n_mtim.tv_sec = now.tv_sec;
+	np->n_mtim.tv_nsec = now.tv_usec * 1000;
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
 }
 
@@ -3941,19 +4204,21 @@ nfsfifo_close(ap)
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	struct vattr vattr;
+	struct timeval now;
         extern vop_t **fifo_vnodeop_p;
 
 	if (np->n_flag & (NACC | NUPD)) {
+		microtime(&now);
 		if (np->n_flag & NACC) {
-			np->n_atim.tv_sec = time.tv_sec;
-			np->n_atim.tv_nsec = time.tv_usec * 1000;
+			np->n_atim.tv_sec = now.tv_sec;
+			np->n_atim.tv_nsec = now.tv_usec * 1000;
 		}
 		if (np->n_flag & NUPD) {
-			np->n_mtim.tv_sec = time.tv_sec;
-			np->n_mtim.tv_nsec = time.tv_usec * 1000;
+			np->n_mtim.tv_sec = now.tv_sec;
+			np->n_mtim.tv_nsec = now.tv_usec * 1000;
 		}
 		np->n_flag |= NCHG;
-		if (vp->v_usecount == 1 &&
+		if (vp->v_usecount == 1 && vp->v_mount &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			VATTR_NULL(&vattr);
 			if (np->n_flag & NACC)
@@ -3989,7 +4254,6 @@ nfs_select(ap)
 	return (1);
 }
 
-/* XXX Eliminate use of struct bp here */
 /*
  * Vnode op for pagein using getblk_pages
  * derived from nfs_bioread()
@@ -4014,232 +4278,122 @@ nfs_pagein(ap)
 	vm_offset_t pl_offset = ap->a_pl_offset;
 	int flags  = ap->a_flags;
 	struct ucred *cred;
-	register struct nfsnode *np = VTONFS(vp);
-	register int biosize;
-	register int xsize;
+	struct nfsnode *np = VTONFS(vp);
+	int biosize, xsize, iosize;
 	struct vattr vattr;
 	struct proc *p = current_proc();
-	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+	struct nfsmount *nmp;
 	int error = 0;
 	vm_offset_t ioaddr;
 	struct uio	auio;
 	struct iovec	aiov;
 	struct uio * uio = &auio;
-	int nocommit = flags & UPL_NOCOMMIT;
+	int nofreeupl = flags & UPL_NOCOMMIT;
+	upl_page_info_t *plinfo;
 
-	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 322)) | DBG_FUNC_NONE,
-		     (int)f_offset, size, pl, pl_offset, 0);
+	FSDBG(322, vp, f_offset, size, flags);
+	if (pl == (upl_t)NULL)
+		panic("nfs_pagein: no upl");
 
 	if (UBCINVALID(vp)) {
-#if DIAGNOSTIC
-		panic("nfs_pagein: invalid vp");
-#endif /* DIAGNOSTIC */
+		printf("nfs_pagein: invalid vnode 0x%x", (int)vp);
+		if (!nofreeupl)
+			(void) ubc_upl_abort(pl, NULL); 
 		return (EPERM);
 	}
-
 	UBCINFOCHECK("nfs_pagein", vp);
-	if(pl == (upl_t)NULL) {
-		panic("nfs_pagein: no upl");
-	}
 
-	cred = ubc_getcred(vp);
-	if (cred == NOCRED)
-		cred = ap->a_cred;
-
-	if (size <= 0)
+	if (size <= 0) {
+		printf("nfs_pagein: invalid size %d", size);
+		if (!nofreeupl)
+			(void) ubc_upl_abort(pl, NULL); 
 		return (EINVAL);
-
-	if (f_offset < 0 || f_offset >= np->n_size 
-					|| (f_offset & PAGE_MASK_64)) {
-		if (!nocommit)
-			kernel_upl_abort_range(pl, pl_offset, size, 
+	}
+	if (f_offset < 0 || f_offset >= np->n_size || (f_offset & PAGE_MASK_64)) {
+		if (!nofreeupl)
+			ubc_upl_abort_range(pl, pl_offset, size, 
 				UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
 		return (EINVAL);
 	}
+	cred = ubc_getcred(vp);
+	if (cred == NOCRED)
+		cred = ap->a_cred;
 
-	auio.uio_iov = &aiov;
-	auio.uio_iovcnt = 1;
 	auio.uio_offset = f_offset;
 	auio.uio_segflg = UIO_SYSSPACE;
 	auio.uio_rw = UIO_READ;
 	auio.uio_procp = NULL;
 
-
-	if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
-		(void)nfs_fsinfo(nmp, vp, cred, p);
-	biosize = min(vp->v_mount->mnt_stat.f_iosize, size);
-
-	if (biosize & PAGE_MASK)
-	        panic("nfs_pagein(%x): biosize not page aligned", biosize);
-
-#if 0 /* Why bother? */
-/* DO NOT BOTHER WITH "approximately maintained cache consistency" */
-/* Does not make sense in paging paths -- Umesh*/
-	/*
-	 * For nfs, cache consistency can only be maintained approximately.
-	 * Although RFC1094 does not specify the criteria, the following is
-	 * believed to be compatible with the reference port.
-	 * For nqnfs, full cache consistency is maintained within the loop.
-	 * For nfs:
-	 * If the file's modify time on the server has changed since the
-	 * last read rpc or you have written to the file,
-	 * you may have lost data cache consistency with the
-	 * server, so flush all of the file's data out of the cache.
-	 * Then force a getattr rpc to ensure that you have up to date
-	 * attributes.
-	 * NB: This implies that cache data can be read when up to
-	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
-	 * attributes this could be forced by setting n_attrstamp to 0 before
-	 * the VOP_GETATTR() call.
-	 */
-	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
-		if (np->n_flag & NMODIFIED) {
-			np->n_attrstamp = 0;
-			error = VOP_GETATTR(vp, &vattr, cred, p);
-			if (error) {
-				if (!nocommit)
-					kernel_upl_abort_range(pl, pl_offset, 
-						size, 
-						UPL_ABORT_ERROR |
-						UPL_ABORT_FREE_ON_EMPTY);
-				return (error);
-			}
-			np->n_mtime = vattr.va_mtime.tv_sec;
-		} else {
-			error = VOP_GETATTR(vp, &vattr, cred, p);
-			if (error){
-				if (!nocommit)
-					kernel_upl_abort_range(pl, pl_offset, size,  
-						UPL_ABORT_ERROR |
-						UPL_ABORT_FREE_ON_EMPTY);
-				return (error);
-			}
-			if (np->n_mtime != vattr.va_mtime.tv_sec) {
-				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
-				if (error){
-				        if (!nocommit)
-					        kernel_upl_abort_range(pl, pl_offset, size, 
-								UPL_ABORT_ERROR |
-								UPL_ABORT_FREE_ON_EMPTY);
-					return (error);
-				}
-				np->n_mtime = vattr.va_mtime.tv_sec;
-			}
-		}
+	nmp = VFSTONFS(vp->v_mount);
+	if (!nmp) {
+		if (!nofreeupl)
+			ubc_upl_abort_range(pl, pl_offset, size, 
+				UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
+		return (ENXIO);
 	}
-#endif 0 /* Why bother? */
+	if ((nmp->nm_flag & NFSMNT_NFSV3) && !(nmp->nm_state & NFSSTA_GOTFSINFO))
+		(void)nfs_fsinfo(nmp, vp, cred, p);
+	biosize = vp->v_mount->mnt_stat.f_iosize;
 
-	kernel_upl_map(kernel_map, pl, &ioaddr);
+	plinfo = ubc_upl_pageinfo(pl);
+	ubc_upl_map(pl, &ioaddr);
 	ioaddr += pl_offset;
 	xsize = size;
 
 	do {
-		uio->uio_resid = min(biosize, xsize);
-		aiov.iov_len  = uio->uio_resid;
-		aiov.iov_base = (caddr_t)ioaddr;
-
-		KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 322)) | DBG_FUNC_NONE,
-			(int)uio->uio_offset, uio->uio_resid, ioaddr, xsize, 0);
-
-#warning nfs_pagein does not support NQNFS yet.
-#if 0 /* why bother? */
-/* NO RESOURCES TO FIX NQNFS CASE */
-/* We need to deal with this later -- Umesh */
 		/*
-		 * Get a valid lease. If cached data is stale, flush it.
+		 * It would be nice to be able to issue all these requests
+		 * in parallel instead of waiting for each one to complete
+		 * before sending the next one.
+		 * XXX Should we align these requests to block boundaries?
 		 */
-		if (nmp->nm_flag & NFSMNT_NQNFS) {
-			if (NQNFS_CKINVALID(vp, np, ND_READ)) {
-				do {
-					error = nqnfs_getlease(vp, ND_READ, cred, p);
-				} while (error == NQNFS_EXPIRED);
-				if (error){
-					kernel_upl_unmap(kernel_map, pl);
-					if (!nocommit)
-						kernel_upl_abort_range(pl, pl_offset,
-							size ,UPL_ABORT_ERROR |
-						UPL_ABORT_FREE_ON_EMPTY);
-
-					return (error);
-				}
-				if (np->n_lrev != np->n_brev ||
-					(np->n_flag & NQNFSNONCACHE)) {
-					error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
-					if (error) {
-						kernel_upl_unmap(kernel_map, pl);
-					if (!nocommit)
-						kernel_upl_abort_range(pl,
-								pl_offset,size ,
-								UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
-						return (error);
-					}
-					np->n_brev = np->n_lrev;
-				}
-			}
-		}
-#endif 0 /* why bother? */
-
-		if (np->n_flag & NQNFSNONCACHE) {
-			error = nfs_readrpc(vp, uio, cred);
-			kernel_upl_unmap(kernel_map, pl);
-
-			if (!nocommit) {
-				if(error) 
-					kernel_upl_abort_range(pl, pl_offset, size ,
-						UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
-				else
-					kernel_upl_commit_range(pl, 
-						pl_offset, size, 
-						UPL_COMMIT_CLEAR_DIRTY 
-						   | UPL_COMMIT_FREE_ON_EMPTY,
-						UPL_GET_INTERNAL_PAGE_LIST(pl),
-						MAX_UPL_TRANSFER);
-			}
-			return (error);
-		}
+	        iosize = min(biosize, xsize);
+		uio->uio_resid = iosize;
+		aiov.iov_len  = iosize;
+		aiov.iov_base = (caddr_t)ioaddr;
+		auio.uio_iov = &aiov;
+		auio.uio_iovcnt = 1;
 
+		FSDBG(322, uio->uio_offset, uio->uio_resid, ioaddr, xsize);
+// XXX #warning our nfs_pagein does not support NQNFS
 		/*
 		 * With UBC we get here only when the file data is not in the VM
 		 * page cache, so go ahead and read in.
 		 */
 #ifdef UBC_DEBUG
-		upl_ubc_alias_set(pl, ioaddr, 2);
+		upl_ubc_alias_set(pl, current_act(), 2);
 #endif /* UBC_DEBUG */
 		nfsstats.pageins++;
+
 		error = nfs_readrpc(vp, uio, cred);
 
 		if (!error) {
-			int zoff;
-			int zcnt;
-
 			if (uio->uio_resid) {
 				/*
-				 * If uio_resid > 0, there is a hole in the file and
-				 * no writes after the hole have been pushed to
-				 * the server yet... or we're at the EOF
+				 * If uio_resid > 0, there is a hole in the file
+				 * and no writes after the hole have been pushed
+				 * to the server yet... or we're at the EOF
 				 * Just zero fill the rest of the valid area.
 				 */
-				zcnt = uio->uio_resid;
-				zoff = biosize - zcnt;
+				int zcnt = uio->uio_resid;
+				int zoff = iosize - zcnt;
 				bzero((char *)ioaddr + zoff, zcnt);
 
-				KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 324)) | DBG_FUNC_NONE,
-					(int)uio->uio_offset, zoff, zcnt, ioaddr, 0);
-
+				FSDBG(324, uio->uio_offset, zoff, zcnt, ioaddr);
 				uio->uio_offset += zcnt;
 			}
-			ioaddr += biosize;	
-			xsize  -= biosize;
+			ioaddr += iosize;	
+			xsize  -= iosize;
 		} else
-			KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 322)) | DBG_FUNC_NONE,
-				(int)uio->uio_offset, uio->uio_resid, error, -1, 0);
-
-		if (p && (vp->v_flag & VTEXT) &&
-				(((nmp->nm_flag & NFSMNT_NQNFS) &&
-				NQNFS_CKINVALID(vp, np, ND_READ) &&
-				np->n_lrev != np->n_brev) ||
-				(!(nmp->nm_flag & NFSMNT_NQNFS) &&
-				np->n_mtime != np->n_vattr.va_mtime.tv_sec))) { 
+			FSDBG(322, uio->uio_offset, uio->uio_resid, error, -1);
+
+		nmp = VFSTONFS(vp->v_mount);
+		if (p && (vp->v_flag & VTEXT) && nmp &&
+		    ((nmp->nm_flag & NFSMNT_NQNFS &&
+		      NQNFS_CKINVALID(vp, np, ND_READ) &&
+		      np->n_lrev != np->n_brev) ||
+		     (!(nmp->nm_flag & NFSMNT_NQNFS) &&
+		      np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
 			uprintf("Process killed due to text file modification\n");
 			psignal(p, SIGKILL);
 			p->p_flag |= P_NOSWAP;
@@ -4247,23 +4401,22 @@ nfs_pagein(ap)
 
 	} while (error == 0 && xsize > 0);
 
-	kernel_upl_unmap(kernel_map, pl);
+	ubc_upl_unmap(pl);
 
-	if (!nocommit) {
+	if (!nofreeupl) {
 		if (error) 
-			kernel_upl_abort_range(pl, pl_offset, size, 
-				UPL_ABORT_ERROR |  UPL_ABORT_FREE_ON_EMPTY);
+			ubc_upl_abort_range(pl, pl_offset, size, 
+					    UPL_ABORT_ERROR |
+					    UPL_ABORT_FREE_ON_EMPTY);
 		else
-			kernel_upl_commit_range(pl, pl_offset, size,
-				UPL_COMMIT_CLEAR_DIRTY 
-						| UPL_COMMIT_FREE_ON_EMPTY,
-				UPL_GET_INTERNAL_PAGE_LIST(pl), 
-				MAX_UPL_TRANSFER);
+			ubc_upl_commit_range(pl, pl_offset, size,
+					     UPL_COMMIT_CLEAR_DIRTY |
+					     UPL_COMMIT_FREE_ON_EMPTY);
 	}
-
 	return (error);
 }
 
+
 /*
  * Vnode op for pageout using UPL
  * Derived from nfs_write()
@@ -4288,75 +4441,120 @@ nfs_pageout(ap)
 	vm_offset_t pl_offset = ap->a_pl_offset;
 	int flags  = ap->a_flags;
 	int ioflag = ap->a_flags;
-	register int biosize;
 	struct proc *p = current_proc();
 	struct nfsnode *np = VTONFS(vp);
 	register struct ucred *cred;
-	struct buf *bp;
+	struct nfsbuf *bp;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	daddr_t lbn;
-	int bufsize;
 	int n = 0, on, error = 0, iomode, must_commit, s;
 	off_t off;
 	vm_offset_t ioaddr;
 	struct uio	auio;
 	struct iovec	aiov;
-	struct uio * uio = &auio;
-	int nocommit = flags & UPL_NOCOMMIT;
-	int iosize;
-	int pgsize;
+	int nofreeupl = flags & UPL_NOCOMMIT;
+	int biosize, iosize, pgsize, xsize;
+
+	FSDBG(323, f_offset, size, pl, pl_offset);
 
-	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 323)) | DBG_FUNC_NONE,
-		(int)f_offset, size, pl, pl_offset, 0);
+	if (pl == (upl_t)NULL)
+		panic("nfs_pageout: no upl");
 
 	if (UBCINVALID(vp)) {
-#if DIAGNOSTIC
-		panic("nfs_pageout: invalid vnode");
-#endif
+		printf("nfs_pageout: invalid vnode 0x%x", (int)vp);
+		if (!nofreeupl)
+			ubc_upl_abort(pl, 0); 
 		return (EIO);
 	}
 	UBCINFOCHECK("nfs_pageout", vp);
 
-	if (size <= 0)
+	if (size <= 0) {
+		printf("nfs_pageout: invalid size %d", size);
+		if (!nofreeupl)
+			ubc_upl_abort(pl, 0); 
 		return (EINVAL);
-
-	if (pl == (upl_t)NULL) {
-		panic("nfs_pageout: no upl");
 	}
 
-	/*
-	 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
-	 * will be the same size within a filesystem. nfs_writerpc will
-	 * still use nm_wsize when sizing the rpc's.
-	 */
-        biosize = min(vp->v_mount->mnt_stat.f_iosize, size);
-
-        if (biosize & PAGE_MASK)
-                panic("nfs_pageout(%x): biosize not page aligned", biosize);
-
+	if (!nmp) {
+		if (!nofreeupl)
+			ubc_upl_abort(pl, UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY);
+		return (ENXIO);
+	}
+	biosize = vp->v_mount->mnt_stat.f_iosize;
 
 	/*
-	 * Check to see whether the buffer is incore
-	 * If incore and not busy invalidate it from the cache
-	 * we should not find it BUSY, since we always do a 
-	 * vm_fault_list_request in 'getblk' before returning
-	 * which would block on the page busy status
+	 * Check to see whether the buffer is incore.
+	 * If incore and not busy, invalidate it from the cache.
 	 */
-        lbn = f_offset / PAGE_SIZE; /* to match the size getblk uses */
-        
-	for (iosize = size; iosize > 0; iosize -= PAGE_SIZE, lbn++) {
-
+	for (iosize = 0; iosize < size; iosize += xsize) {
+		off = f_offset + iosize;
+		/* need make sure we do things on block boundaries */
+		xsize = biosize - (off % biosize);
+		if (off + xsize > f_offset + size)
+			xsize = f_offset + size - off;
+		lbn = ubc_offtoblk(vp, off);
 		s = splbio();
-		if (bp = incore(vp, lbn)) {
-			if (ISSET(bp->b_flags, B_BUSY)) {
-                                /* don't panic incore. just tell vm we are busy */
-				(void) kernel_upl_abort(pl, NULL); 
-                                return(EBUSY);
-                                };
-
-			bremfree(bp);
-			SET(bp->b_flags, (B_BUSY | B_INVAL));
-			brelse(bp);
+		if (bp = nfs_buf_incore(vp, lbn)) {
+			FSDBG(323, off, 1, bp, bp->nb_flags);
+			if (ISSET(bp->nb_flags, NB_BUSY)) {
+				/* no panic. just tell vm we are busy */
+				if (!nofreeupl)
+					ubc_upl_abort(pl, 0); 
+				return (EBUSY);
+			}
+			if (bp->nb_dirtyend > 0) {
+				/*
+				 * if there's a dirty range in the buffer, check to
+				 * see if it extends beyond the pageout region
+				 *
+				 * if the dirty region lies completely within the
+				 * pageout region, we just invalidate the buffer
+				 * because it's all being written out now anyway.
+				 *
+				 * if any of the dirty region lies outside the
+				 * pageout region, we'll try to clip the dirty
+				 * region to eliminate the portion that's being
+				 * paged out.  If that's not possible, because
+				 * the dirty region extends before and after the
+				 * pageout region, then we'll just return EBUSY.
+				 */
+				off_t boff, start, end;
+				boff = NBOFF(bp);
+				start = off;
+				end = off + xsize;
+				/* clip end to EOF */
+				if (end > np->n_size)
+					end = np->n_size;
+				start -= boff;
+				end -= boff;
+				if ((bp->nb_dirtyoff < start) &&
+				    (bp->nb_dirtyend > end)) {
+				    /* not gonna be able to clip the dirty region */
+				    FSDBG(323, vp, bp, 0xd00deebc, EBUSY);
+				    if (!nofreeupl)
+					ubc_upl_abort(pl, 0); 
+				    return (EBUSY);
+				}
+				if ((bp->nb_dirtyoff < start) ||
+				    (bp->nb_dirtyend > end)) {
+				    /* clip dirty region, if necessary */
+				    if (bp->nb_dirtyoff < start)
+					bp->nb_dirtyend = min(bp->nb_dirtyend, start);
+				    if (bp->nb_dirtyend > end)
+					bp->nb_dirtyoff = max(bp->nb_dirtyoff, end);
+				    FSDBG(323, bp, bp->nb_dirtyoff, bp->nb_dirtyend, 0xd00dee00);
+				    /* we're leaving this block dirty */
+				    continue;
+				}
+			}
+			nfs_buf_remfree(bp);
+			SET(bp->nb_flags, (NB_BUSY | NB_INVAL));
+			if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
+				CLR(bp->nb_flags, NB_NEEDCOMMIT);
+				np->n_needcommitcnt--;
+				CHECK_NEEDCOMMITCNT(np);
+			}
+			nfs_buf_release(bp);
 		}
 		splx(s);
 	}
@@ -4367,216 +4565,153 @@ nfs_pageout(ap)
 
 	if (np->n_flag & NWRITEERR) {
 		np->n_flag &= ~NWRITEERR;
-		if (!nocommit)
-			kernel_upl_abort_range(pl, pl_offset, size, 
-				UPL_ABORT_FREE_ON_EMPTY);
+		if (!nofreeupl)
+			ubc_upl_abort_range(pl, pl_offset, size,
+					    UPL_ABORT_FREE_ON_EMPTY);
 		return (np->n_error);
 	}
-	if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
+	if ((nmp->nm_flag & NFSMNT_NFSV3) &&
+		!(nmp->nm_state & NFSSTA_GOTFSINFO))
 		(void)nfs_fsinfo(nmp, vp, cred, p);
 
 	if (f_offset < 0 || f_offset >= np->n_size ||
-	   (f_offset & PAGE_MASK_64) || (size & PAGE_MASK)) {
-		if (!nocommit)
-			kernel_upl_abort_range(pl, pl_offset, size, 
-				UPL_ABORT_FREE_ON_EMPTY);
+	    f_offset & PAGE_MASK_64 || size & PAGE_MASK_64) {
+		if (!nofreeupl)
+			ubc_upl_abort_range(pl, pl_offset, size,
+					    UPL_ABORT_FREE_ON_EMPTY);
 		return (EINVAL);
 	}
 
-	kernel_upl_map(kernel_map, pl, &ioaddr);
+	ubc_upl_map(pl, &ioaddr);
+	ioaddr += pl_offset;
 
-	if ((f_offset + size) > np->n_size)
-		iosize = np->n_size - f_offset;
+	if (f_offset + size > np->n_size)
+		xsize = np->n_size - f_offset;
 	else
-		iosize = size;
-
-	pgsize = (iosize + (PAGE_SIZE - 1)) & ~PAGE_MASK;
+		xsize = size;
 
+	pgsize = round_page_64(xsize);
 	if (size > pgsize) {
-		if (!nocommit)
-			kernel_upl_abort_range(pl, pl_offset + pgsize, size - pgsize,
-				UPL_ABORT_FREE_ON_EMPTY);
+		if (!nofreeupl)
+			ubc_upl_abort_range(pl, pl_offset + pgsize,
+					    size - pgsize,
+					    UPL_ABORT_FREE_ON_EMPTY);
 	}
-	auio.uio_iov = &aiov;
-	auio.uio_iovcnt = 1;
-	auio.uio_offset = f_offset;
-	auio.uio_segflg = UIO_SYSSPACE;
-	auio.uio_rw = UIO_READ;
-	auio.uio_resid = iosize;
-	auio.uio_procp = NULL;
-
-	aiov.iov_len = iosize;
-	aiov.iov_base = (caddr_t)ioaddr + pl_offset;
 
 	/* 
 	 * check for partial page and clear the
 	 * contents past end of the file before
 	 * releasing it in the VM page cache
 	 */
-	if ((f_offset < np->n_size) && (f_offset + size) > np->n_size) {
+	if (f_offset < np->n_size && f_offset + size > np->n_size) {
 		size_t io = np->n_size - f_offset;
-
-		bzero((caddr_t)(ioaddr + pl_offset + io), size - io);
-
-		KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 321)) | DBG_FUNC_NONE,
-			(int)np->n_size, (int)f_offset, (int)f_offset + io, size - io, 0);
+		bzero((caddr_t)(ioaddr + io), size - io);
+		FSDBG(321, np->n_size, f_offset, f_offset + io, size - io);
 	}
 
-	do {
-
-#warning nfs_pageout does not support NQNFS yet.
-#if 0 /* why bother? */
-/* NO RESOURCES TO FIX NQNFS CASE */
-/* We need to deal with this later -- Umesh */
+	auio.uio_offset = f_offset;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_rw = UIO_READ;
+	auio.uio_procp = NULL;
 
+	do {
 		/*
-		 * Check for a valid write lease.
+		 * It would be nice to be able to issue all these requests
+		 * in parallel instead of waiting for each one to complete
+		 * before sending the next one.
+		 * XXX Should we align these requests to block boundaries?
 		 */
-		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
-		    NQNFS_CKINVALID(vp, np, ND_WRITE)) {
-			do {
-				error = nqnfs_getlease(vp, ND_WRITE, cred, p);
-			} while (error == NQNFS_EXPIRED);
-			if (error) {
-				kernel_upl_unmap(kernel_map, pl);
-				if (!nocommit)
-					kernel_upl_abort_range(pl, pl_offset, size, 
-						UPL_ABORT_FREE_ON_EMPTY);
-				return (error);
-			}
-			if (np->n_lrev != np->n_brev ||
-			    (np->n_flag & NQNFSNONCACHE)) {
-				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
-				if (error) {
-					kernel_upl_unmap(kernel_map, pl);
-					if (!nocommit)
-						kernel_upl_abort_range(pl, 
-						pl_offset, size, 
-					 	UPL_ABORT_FREE_ON_EMPTY);
-					return (error);
-				}
-				np->n_brev = np->n_lrev;
-			}
-		}
-#endif 0 /* why bother? */
-
-		if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
-			iomode = NFSV3WRITE_FILESYNC;
-			error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
-			if (must_commit)
-				nfs_clearcommit(vp->v_mount);
-			kernel_upl_unmap(kernel_map, pl);
-                        
-                        /* see comments below after other nfs_writerpc and ESTALE */
-                        if (error == ESTALE) {
-                            kernel_upl_abort_range(pl, pl_offset, size, 
-                                UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY);
-                        } else {
-                            if (!nocommit) {
-                                if(error)
-                                    kernel_upl_abort_range(pl, pl_offset, size, 
-                                    UPL_ABORT_FREE_ON_EMPTY);
-                                else
-                                    kernel_upl_commit_range(pl, 
-                                        pl_offset, size, 
-                                        UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY,
-                                        UPL_GET_INTERNAL_PAGE_LIST(pl), MAX_UPL_TRANSFER);
-                            }
-                        }
-			return (error);
-		}
-		nfsstats.pageouts++;
-		lbn = uio->uio_offset / biosize;
-		on = uio->uio_offset & (biosize-1);
-		n = min((unsigned)(biosize - on), uio->uio_resid);
-again:
-		bufsize = biosize;
-#if 0
-		if ((lbn + 1) * biosize > np->n_size) {
-			bufsize = np->n_size - lbn * biosize;
-			bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
-		}
-#endif
-		vp->v_numoutput++;
-
-		np->n_flag |= NMODIFIED;
+		iosize = min(biosize, xsize);
+		auio.uio_resid = iosize;
+		aiov.iov_len = iosize;
+		aiov.iov_base = (caddr_t)ioaddr;
+		auio.uio_iov = &aiov;
+		auio.uio_iovcnt = 1;
 
-#if 0 /* why bother? */
-/* NO RESOURCES TO FIX NQNFS CASE */
-/* We need to deal with this later -- Umesh */
-		/*
-		 * Check for valid write lease and get one as required.
-		 * In case getblk() and/or bwrite() delayed us.
-		 */
-		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
-		    NQNFS_CKINVALID(vp, np, ND_WRITE)) {
-			do {
-				error = nqnfs_getlease(vp, ND_WRITE, cred, p);
-			} while (error == NQNFS_EXPIRED);
-			if (error)
-				goto cleanup;
-
-			if (np->n_lrev != np->n_brev ||
-			    (np->n_flag & NQNFSNONCACHE)) {
-					error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
-					if (error) {
-						kernel_upl_unmap(kernel_map, pl);
-						if (!nocommit)
-							kernel_upl_abort_range(pl, 
-							pl_offset,
-					 		size,
-						  UPL_ABORT_FREE_ON_EMPTY);
-
-						return (error);
-					}
-					np->n_brev = np->n_lrev;
-					goto again;
-			}
-		}
-#endif 0 /* why bother? */
+		FSDBG(323, auio.uio_offset, auio.uio_resid, ioaddr, xsize);
+// XXX #warning our nfs_pageout does not support NQNFS
+		nfsstats.pageouts++;
 
+		vp->v_numoutput++;
+		/* NMODIFIED would be set here if doing unstable writes */
 		iomode = NFSV3WRITE_FILESYNC;
-		error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
+		error = nfs_writerpc(vp, &auio, cred, &iomode, &must_commit);
 		if (must_commit)
 			nfs_clearcommit(vp->v_mount);
-		vp->v_numoutput--;
-
+		vpwakeup(vp);
 		if (error)
 			goto cleanup;
-
-		if (n > 0) {
-			uio->uio_resid -= n;
-			uio->uio_offset += n;
-			uio->uio_iov->iov_base += n;
-			uio->uio_iov->iov_len -= n;
-		}
-	} while (uio->uio_resid > 0 && n > 0);
+		/* Note: no need to check uio_resid, because */
+		/* it'll only be set if there was an error. */
+		ioaddr += iosize;
+		xsize -= iosize;
+	} while (xsize > 0);
 
 cleanup:
-	kernel_upl_unmap(kernel_map, pl);
-	/* 
-	* EStale is special. In this case, we want vm to dump out
-	* the pages. Better yet, sever the object so we don't come
-	* back here on each page of the object to page out. For now,
-	* just dump. 
-	* XXX What about !nocommit case? Should ESTALE only be checked
-	* in that portion? - EKN
-	*/
-	if (error == ESTALE) {
-            kernel_upl_abort_range(pl, pl_offset, size, 
-                UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY);
-	} else {
-            if (!nocommit) {
-                if(error)
-                    kernel_upl_abort_range(pl, pl_offset, pgsize, 
-                        UPL_ABORT_FREE_ON_EMPTY);
-                else
-                    kernel_upl_commit_range(pl, pl_offset, pgsize,
-                        UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY,
-                    UPL_GET_INTERNAL_PAGE_LIST(pl), MAX_UPL_TRANSFER);
-            }
+	ubc_upl_unmap(pl);
+	/*
+	 * We've had several different solutions on what to do when the pageout
+	 * gets an error. If we don't handle it, and return an error to the 
+	 * caller, vm, it will retry . This can end in endless looping 
+	 * between vm and here doing retries of the same page. Doing a dump
+	 * back to vm, will get it out of vm's knowledge and we lose whatever
+	 * data existed. This is risky, but in some cases necessary. For
+	 * example, the initial fix here was to do that for ESTALE. In that case
+	 * the server is telling us that the file is no longer the same. We 
+	 * would not want to keep paging out to that. We also saw some 151 
+	 * errors from Auspex server and NFSv3 can return errors higher than
+	 * ELAST. Those along with NFS known server errors we will "dump" from
+	 * vm.  Errors we don't expect to occur, we dump and log for further
+	 * analysis. Errors that could be transient, networking ones,
+	 * we let vm "retry". Lastly, errors that we retry, but may have potential
+	 * to storm the network, we "retrywithsleep". "sever" will be used in
+	 * in the future to dump all pages of object for cases like ESTALE.
+	 * All this is the basis for the states returned and first guesses on
+	 * error handling. Tweaking expected as more statistics are gathered.
+	 * Note, in the long run we may need another more robust solution to
+	 * have some kind of persistant store when the vm cannot dump nor keep
+	 * retrying as a solution, but this would be a file architectural change
+	 */
+	  
+	if (!nofreeupl) { /* otherwise stacked file system has to handle this */
+		if (error) {
+			int abortflags; 
+			short action = nfs_pageouterrorhandler(error);
+			
+			switch (action) {
+				case DUMP:
+					abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
+					break;
+				case DUMPANDLOG:
+					abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
+					if (error <= ELAST &&
+					    (errorcount[error] % 100 == 0)) 
+						printf("nfs_pageout: unexpected error %d. dumping vm page\n", error);
+					errorcount[error]++;
+					break;
+				case RETRY:
+					abortflags = UPL_ABORT_FREE_ON_EMPTY;
+					break;
+				case RETRYWITHSLEEP:
+					abortflags = UPL_ABORT_FREE_ON_EMPTY;
+					/* pri unused. PSOCK for placeholder. */
+					(void) tsleep(&lbolt, PSOCK,
+						      "nfspageout", 0);
+					break;
+				case SEVER: /* not implemented */
+				default:
+					printf("nfs_pageout: action %d not expected\n", action);
+					break;
+			}
+				
+			ubc_upl_abort_range(pl, pl_offset, size, abortflags);
+			/* return error in all cases above */
+			
+		} else 
+			ubc_upl_commit_range(pl, pl_offset, pgsize,
+					     UPL_COMMIT_CLEAR_DIRTY |
+					     UPL_COMMIT_FREE_ON_EMPTY);
 	}
-
 	return (error);
 }
 
@@ -4592,14 +4727,16 @@ nfs_blktooff(ap)
 	int biosize;
 	register struct vnode *vp = ap->a_vp;
 
-	biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE); /* nfs_bio.c */
+	if (!vp->v_mount)
+		return (ENXIO);
 
-	*ap->a_offset = (off_t)(ap->a_lblkno *  biosize);
+	biosize = vp->v_mount->mnt_stat.f_iosize;
+
+	*ap->a_offset = (off_t)ap->a_lblkno * biosize;
 
 	return (0);
 }
 
-/* Blktooff derives file offset given a logical block number */
 static int
 nfs_offtoblk(ap)
 	struct vop_offtoblk_args /* {
@@ -4611,9 +4748,12 @@ nfs_offtoblk(ap)
 	int biosize;
 	register struct vnode *vp = ap->a_vp;
 
-	biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE); /* nfs_bio.c */
+	if (!vp->v_mount)
+		return (ENXIO);
+
+	biosize = vp->v_mount->mnt_stat.f_iosize;
 
-	*ap->a_lblkno = (daddr_t)(ap->a_offset /  biosize);
+	*ap->a_lblkno = (daddr_t)(ap->a_offset / biosize);
 
 	return (0);
 }