X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/d7e50217d7adf6e52786a38bcaa4cd698cb9a79e..4a2492630c73add3c3aa8a805ba4ff343d4a58ea:/bsd/nfs/nfs_socket.c

diff --git a/bsd/nfs/nfs_socket.c b/bsd/nfs/nfs_socket.c
index f2ee753a3..3e9aac142 100644
--- a/bsd/nfs/nfs_socket.c
+++ b/bsd/nfs/nfs_socket.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -84,6 +84,8 @@
 
 #include <sys/time.h>
 #include <kern/clock.h>
+#include <kern/task.h>
+#include <kern/thread.h>
 #include <sys/user.h>
 
 #include <netinet/in.h>
@@ -178,12 +180,22 @@ static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
 int nfsrtton = 0;
 struct nfsrtt nfsrtt;
 
-static int	nfs_msg __P((struct proc *,char *,char *));
+static int	nfs_msg __P((struct proc *, const char *, const char *, int));
+static void	nfs_up(struct nfsreq *, const char *, int);
+static void	nfs_down(struct nfsreq *, const char *, int);
 static int	nfs_rcvlock __P((struct nfsreq *));
-static void	nfs_rcvunlock __P((int *flagp));
+static void	nfs_rcvunlock __P((struct nfsreq *));
 static int	nfs_receive __P((struct nfsreq *rep, struct mbuf **aname,
 				 struct mbuf **mp));
 static int	nfs_reconnect __P((struct nfsreq *rep));
+static void	nfs_repbusy(struct nfsreq *rep);
+static struct nfsreq *	nfs_repnext(struct nfsreq *rep);
+static void	nfs_repdequeue(struct nfsreq *rep);
+
+/* XXX */
+boolean_t	current_thread_aborted(void);
+kern_return_t	thread_terminate(thread_act_t);
+
 #ifndef NFS_NOSERVER 
 static int	nfsrv_getstream __P((struct nfssvc_sock *,int));
 
@@ -344,20 +356,139 @@ nfsdup(struct nfsreq *rep)
 }
 #endif /* NFSDIAG */
 
+
+/*
+ * attempt to bind a socket to a reserved port
+ */
+static int
+nfs_bind_resv(struct nfsmount *nmp)
+{
+	struct socket *so = nmp->nm_so;
+	struct sockaddr_in sin;
+	int error;
+	u_short tport;
+
+	if (!so)
+		return (EINVAL);
+
+	sin.sin_len = sizeof (struct sockaddr_in);
+	sin.sin_family = AF_INET;
+	sin.sin_addr.s_addr = INADDR_ANY;
+	tport = IPPORT_RESERVED - 1;
+	sin.sin_port = htons(tport);
+
+	while (((error = sobind(so, (struct sockaddr *) &sin)) == EADDRINUSE) &&
+	       (--tport > IPPORT_RESERVED / 2))
+		sin.sin_port = htons(tport);
+	return (error);
+}
+
+/*
+ * variables for managing the nfs_bind_resv_thread
+ */
+int nfs_resv_mounts = 0;
+static int nfs_bind_resv_thread_state = 0;
+#define NFS_BIND_RESV_THREAD_STATE_INITTED	1
+#define NFS_BIND_RESV_THREAD_STATE_RUNNING	2
+static struct slock nfs_bind_resv_slock;
+struct nfs_bind_resv_request {
+	TAILQ_ENTRY(nfs_bind_resv_request) brr_chain;
+	struct nfsmount *brr_nmp;
+	int brr_error;
+};
+static TAILQ_HEAD(, nfs_bind_resv_request) nfs_bind_resv_request_queue;
+
+/*
+ * thread to handle any reserved port bind requests
+ */
+static void
+nfs_bind_resv_thread(void)
+{
+	struct nfs_bind_resv_request *brreq;
+        boolean_t funnel_state;
+
+	funnel_state = thread_funnel_set(network_flock, TRUE);
+	nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_RUNNING;
+
+	while (nfs_resv_mounts > 0) {
+		simple_lock(&nfs_bind_resv_slock);
+		while ((brreq = TAILQ_FIRST(&nfs_bind_resv_request_queue))) {
+			TAILQ_REMOVE(&nfs_bind_resv_request_queue, brreq, brr_chain);
+			simple_unlock(&nfs_bind_resv_slock);
+			brreq->brr_error = nfs_bind_resv(brreq->brr_nmp);
+			wakeup(brreq);
+			simple_lock(&nfs_bind_resv_slock);
+		}
+		simple_unlock(&nfs_bind_resv_slock);
+		(void)tsleep((caddr_t)&nfs_bind_resv_request_queue, PSOCK,
+				"nfs_bind_resv_request_queue", 0);
+	}
+
+	nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_INITTED;
+	(void) thread_funnel_set(network_flock, funnel_state);
+	(void) thread_terminate(current_act());
+}
+
+int
+nfs_bind_resv_thread_wake(void)
+{
+	if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_RUNNING)
+		return (EIO);
+	wakeup(&nfs_bind_resv_request_queue);
+	return (0);
+}
+
+/*
+ * underprivileged procs call this to request nfs_bind_resv_thread
+ * to perform the reserved port binding for them.
+ */
+static int
+nfs_bind_resv_nopriv(struct nfsmount *nmp)
+{
+	struct nfs_bind_resv_request brreq;
+	int error;
+
+	if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_RUNNING) {
+		if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_INITTED) {
+			simple_lock_init(&nfs_bind_resv_slock);
+			TAILQ_INIT(&nfs_bind_resv_request_queue);
+			nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_INITTED;
+		}
+		kernel_thread(kernel_task, nfs_bind_resv_thread);
+		nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_RUNNING;
+	}
+
+	brreq.brr_nmp = nmp;
+	brreq.brr_error = 0;
+
+	simple_lock(&nfs_bind_resv_slock);
+	TAILQ_INSERT_TAIL(&nfs_bind_resv_request_queue, &brreq, brr_chain);
+	simple_unlock(&nfs_bind_resv_slock);
+
+	error = nfs_bind_resv_thread_wake();
+	if (error) {
+		TAILQ_REMOVE(&nfs_bind_resv_request_queue, &brreq, brr_chain);
+		/* Note: we might be able to simply restart the thread */
+		return (error);
+	}
+
+	(void) tsleep((caddr_t)&brreq, PSOCK, "nfsbindresv", 0);
+
+	return (brreq.brr_error);
+}
+
 /*
  * Initialize sockets and congestion for a new NFS connection.
  * We do not free the sockaddr if error.
  */
 int
 nfs_connect(nmp, rep)
-	register struct nfsmount *nmp;
+	struct nfsmount *nmp;
 	struct nfsreq *rep;
 {
-	register struct socket *so;
+	struct socket *so;
 	int s, error, rcvreserve, sndreserve;
 	struct sockaddr *saddr;
-	struct sockaddr_in sin;
-	u_short tport;
 
 	thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
 	nmp->nm_so = (struct socket *)0;
@@ -374,18 +505,22 @@ nfs_connect(nmp, rep)
 	 * Some servers require that the client port be a reserved port number.
 	 */
 	if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
-		sin.sin_len = sizeof (struct sockaddr_in);
-		sin.sin_family = AF_INET;
-		sin.sin_addr.s_addr = INADDR_ANY;
-		tport = IPPORT_RESERVED - 1;
-		sin.sin_port = htons(tport);
-
-		while ((error = sobind(so, (struct sockaddr *) &sin) == EADDRINUSE) &&
-		       (--tport > IPPORT_RESERVED / 2))
-			sin.sin_port = htons(tport);
-		if (error) {
-			goto bad;
+		struct proc *p;
+		/*
+		 * sobind() requires current_proc() to have superuser privs.
+		 * If this bind is part of a reconnect, and the current proc
+		 * doesn't have superuser privs, we hand the sobind() off to
+		 * a kernel thread to process.
+		 */
+		if ((nmp->nm_state & NFSSTA_MOUNTED) &&
+		    (p = current_proc()) && suser(p->p_ucred, &p->p_acflag)) {
+			/* request nfs_bind_resv_thread() to do bind */
+			error = nfs_bind_resv_nopriv(nmp);
+		} else {
+			error = nfs_bind_resv(nmp);
 		}
+		if (error)
+			goto bad;
 	}
 
 	/*
@@ -428,19 +563,24 @@ nfs_connect(nmp, rep)
 		}
 		splx(s);
 	}
+	/*
+	 * Always time out on recieve, this allows us to reconnect the
+	 * socket to deal with network changes.
+	 */
+	so->so_rcv.sb_timeo = (2 * hz);
 	if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
-		so->so_rcv.sb_timeo = (5 * hz);
 		so->so_snd.sb_timeo = (5 * hz);
 	} else {
-		so->so_rcv.sb_timeo = 0;
 		so->so_snd.sb_timeo = 0;
 	}
 	if (nmp->nm_sotype == SOCK_DGRAM) {
-		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
-		rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
+		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
+		rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) *
+			(nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
 	} else if (nmp->nm_sotype == SOCK_SEQPACKET) {
-		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
-		rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
+		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
+		rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) *
+			(nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
 	} else {
 		if (nmp->nm_sotype != SOCK_STREAM)
 			panic("nfscon sotype");
@@ -450,6 +590,7 @@ nfs_connect(nmp, rep)
 			int val;
 
 			bzero(&sopt, sizeof sopt);
+			sopt.sopt_dir = SOPT_SET;
 			sopt.sopt_level = SOL_SOCKET;
 			sopt.sopt_name = SO_KEEPALIVE;
 			sopt.sopt_val = &val;
@@ -462,6 +603,7 @@ nfs_connect(nmp, rep)
 			int val;
 
 			bzero(&sopt, sizeof sopt);
+			sopt.sopt_dir = SOPT_SET;
 			sopt.sopt_level = IPPROTO_TCP;
 			sopt.sopt_name = TCP_NODELAY;
 			sopt.sopt_val = &val;
@@ -470,12 +612,15 @@ nfs_connect(nmp, rep)
 			sosetopt(so, &sopt);
 		}
 
-		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long))
-				* 2;
-		rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long))
-				* 2;
+		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) * 3;
+		rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) *
+				(nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
 	}
 
+	if (sndreserve > NFS_MAXSOCKBUF)
+		sndreserve = NFS_MAXSOCKBUF;
+	if (rcvreserve > NFS_MAXSOCKBUF)
+		rcvreserve = NFS_MAXSOCKBUF;
 	error = soreserve(so, sndreserve, rcvreserve);
 	if (error) {
 		goto bad;
@@ -492,7 +637,7 @@ nfs_connect(nmp, rep)
 		nmp->nm_sdrtt[3] = 0;
 	nmp->nm_cwnd = NFS_MAXCWND / 2;	    /* Initial send window */
 	nmp->nm_sent = 0;
-	FSDBG(529, nmp, nmp->nm_flag, nmp->nm_soflags, nmp->nm_cwnd);
+	FSDBG(529, nmp, nmp->nm_state, nmp->nm_soflags, nmp->nm_cwnd);
 	nmp->nm_timeouts = 0;
 	return (0);
 
@@ -523,6 +668,16 @@ nfs_reconnect(rep)
 	while ((error = nfs_connect(nmp, rep))) {
 		if (error == EINTR || error == ERESTART)
 			return (EINTR);
+		if (error == EIO)
+			return (EIO);
+		nfs_down(rep, "can not connect", error);
+		if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
+			/* we're not yet completely mounted and */
+			/* we can't reconnect, so we fail */
+			return (error);
+		}
+		if ((error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp)))
+			return (error);
 		(void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
 	}
 
@@ -531,7 +686,7 @@ nfs_reconnect(rep)
 	 * Loop through outstanding request list and fix up all requests
 	 * on old socket.
 	 */
-	for (rp = nfs_reqq.tqh_first; rp != 0; rp = rp->r_chain.tqe_next) {
+	TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
 		if (rp->r_nmp == nmp)
 			rp->r_flags |= R_MUSTRESEND;
 	}
@@ -578,15 +733,16 @@ nfs_send(so, nam, top, rep)
 	struct nfsreq *rep;
 {
 	struct sockaddr *sendnam;
-	int error, soflags, flags;
+	int error, error2, soflags, flags;
 	int xidqueued = 0;
 	struct nfsreq *rp;
 	char savenametolog[MNAMELEN];
 	
 	if (rep) {
-		if (rep->r_flags & R_SOFTTERM) {
+		error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp);
+		if (error) {
 			m_freem(top);
-			return (EINTR);
+			return (error);
 		}
 		if ((so = rep->r_nmp->nm_so) == NULL) {
 			rep->r_flags |= R_MUSTRESEND;
@@ -595,7 +751,7 @@ nfs_send(so, nam, top, rep)
 		}
 		rep->r_flags &= ~R_MUSTRESEND;
 		soflags = rep->r_nmp->nm_soflags;
-		for (rp = nfs_reqq.tqh_first; rp; rp = rp->r_chain.tqe_next)
+		TAILQ_FOREACH(rp, &nfs_reqq, r_chain)
 			if (rp == rep)
 				break;
 		if (rp)
@@ -634,8 +790,7 @@ nfs_send(so, nam, top, rep)
 	if (error) {
 		if (rep) {
 			if (xidqueued) {
-				for (rp = nfs_reqq.tqh_first; rp;
-				     rp = rp->r_chain.tqe_next)
+				TAILQ_FOREACH(rp, &nfs_reqq, r_chain)
 					if (rp == rep && rp->r_xid == xidqueued)
 						break;
 				if (!rp)
@@ -647,9 +802,10 @@ nfs_send(so, nam, top, rep)
 			/*
 			 * Deal with errors for the client side.
 			 */
-			if (rep->r_flags & R_SOFTTERM)
-				error = EINTR;
-			else {
+			error2 = nfs_sigintr(rep->r_nmp, rep, rep->r_procp);
+			if (error2) {
+				error = error2;
+			} else {
 				rep->r_flags |= R_MUSTRESEND;
 				NFS_DPF(DUP,
 					("nfs_send RESEND error=%d\n", error));
@@ -660,9 +816,10 @@ nfs_send(so, nam, top, rep)
 		/*
 		 * Handle any recoverable (soft) socket errors here. (???)
 		 */
-		if (error != EINTR && error != ERESTART &&
-			error != EWOULDBLOCK && error != EPIPE)
+		if (error != EINTR && error != ERESTART && error != EIO &&
+			error != EWOULDBLOCK && error != EPIPE) {
 			error = 0;
+		}
 	}
 	return (error);
 }
@@ -692,7 +849,7 @@ nfs_receive(rep, aname, mp)
 	struct sockaddr *tmp_nam;
 	struct mbuf	*mhck;
 	struct sockaddr_in *sin;
-	int error, sotype, rcvflg;
+	int error, error2, sotype, rcvflg;
 	struct proc *p = current_proc();	/* XXX */
 
 	/*
@@ -711,7 +868,7 @@ nfs_receive(rep, aname, mp)
 	 * until we have an entire rpc request/reply.
 	 */
 	if (sotype != SOCK_DGRAM) {
-		error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
+		error = nfs_sndlock(rep);
 		if (error)
 			return (error);
 tryagain:
@@ -724,15 +881,17 @@ tryagain:
 		 * attempt that has essentially shut down this
 		 * mount point.
 		 */
-		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
-			nfs_sndunlock(&rep->r_nmp->nm_flag);
+		if ((error = nfs_sigintr(rep->r_nmp, rep, p)) || rep->r_mrep) {
+			nfs_sndunlock(rep);
+			if (error)
+				return (error);
 			return (EINTR);
 		}
 		so = rep->r_nmp->nm_so;
 		if (!so) {
 			error = nfs_reconnect(rep);
 			if (error) {
-				nfs_sndunlock(&rep->r_nmp->nm_flag);
+				nfs_sndunlock(rep);
 				return (error);
 			}
 			goto tryagain;
@@ -751,13 +910,13 @@ tryagain:
 			if (error) {
 				if (error == EINTR || error == ERESTART ||
 				    (error = nfs_reconnect(rep))) {
-					nfs_sndunlock(&rep->r_nmp->nm_flag);
+					nfs_sndunlock(rep);
 					return (error);
 				}
 				goto tryagain;
 			}
 		}
-		nfs_sndunlock(&rep->r_nmp->nm_flag);
+		nfs_sndunlock(rep);
 		if (sotype == SOCK_STREAM) {
 			aio.iov_base = (caddr_t) &len;
 			aio.iov_len = sizeof(u_long);
@@ -773,12 +932,13 @@ tryagain:
 			   thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
 			   error = soreceive(so, (struct sockaddr **)0, &auio,
 				(struct mbuf **)0, (struct mbuf **)0, &rcvflg);
-				thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
+			   thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
 			   if (!rep->r_nmp) /* if unmounted then bailout */
 				goto shutout;
 			   if (error == EWOULDBLOCK && rep) {
-				if (rep->r_flags & R_SOFTTERM)
-					return (EINTR);
+				error2 = nfs_sigintr(rep->r_nmp, rep, p);
+				if (error2)
+					error = error2;
 			   }
 			} while (error == EWOULDBLOCK);
 			if (!error && auio.uio_resid > 0) {
@@ -844,16 +1004,18 @@ tryagain:
 			    rcvflg = 0;
 			    error =  soreceive(so, (struct sockaddr **)0,
 					       &auio, mp, &control, &rcvflg);
+			    if (control)
+				m_freem(control);
 			    if (!rep->r_nmp) /* if unmounted then bailout */ {
 				thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
 				goto shutout;
  			    }   
-			    if (control)
-				m_freem(control);
 			    if (error == EWOULDBLOCK && rep) {
-				if (rep->r_flags & R_SOFTTERM) {
-				    thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
-				    return (EINTR);
+				error2 = nfs_sigintr(rep->r_nmp, rep, p);
+				if (error2) {
+					thread_funnel_switch(NETWORK_FUNNEL,
+					    KERNEL_FUNNEL);
+					return (error2);
 				}
 			    }
 			} while (error == EWOULDBLOCK ||
@@ -876,15 +1038,29 @@ errout:
 				    "receive error %d from nfs server %s\n",
 				    error,
 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
-			error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
+			error = nfs_sndlock(rep);
 			if (!error)
 				error = nfs_reconnect(rep);
 			if (!error)
 				goto tryagain;
 		}
 	} else {
-		if ((so = rep->r_nmp->nm_so) == NULL)
-			return (EACCES);
+		/*
+		 * We could have failed while rebinding the datagram socket
+		 * so we need to attempt to rebind here.
+		 */
+		if ((so = rep->r_nmp->nm_so) == NULL) {
+			error = nfs_sndlock(rep);
+			if (!error) {
+				error = nfs_reconnect(rep);
+				nfs_sndunlock(rep);
+			}
+			if (error)
+				return (error);
+			if (!rep->r_nmp) /* if unmounted then bailout */
+				return (ENXIO);
+			so = rep->r_nmp->nm_so;
+		}
 		if (so->so_state & SS_ISCONNECTED)
 			getnam = (struct sockaddr **)0;
 		else
@@ -907,18 +1083,44 @@ errout:
 			    FREE(*getnam, M_SONAME);
 			    *aname = mhck;
 			}
-			if (!rep->r_nmp) /* if unmounted then bailout */ {
-				thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
-			    goto shutout;
-			}    
-
-			if (error == EWOULDBLOCK &&
-			    (rep->r_flags & R_SOFTTERM)) {
-				thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
-				return (EINTR);
+			if (!rep->r_nmp) /* if unmounted then bailout */
+				goto dgramout;
+			if (error) {
+				error2 = nfs_sigintr(rep->r_nmp, rep, p);
+				if (error2) {
+					error = error2;
+					goto dgramout;
+				}
+			}
+			/* Reconnect for all errors.  We may be receiving
+			 * soft/hard/blocking errors because of a network
+			 * change.
+			 * XXX: we should rate limit or delay this
+			 * to once every N attempts or something.
+			 * although TCP doesn't seem to.
+			 */
+			if (error) {
+				thread_funnel_switch(NETWORK_FUNNEL,
+				    KERNEL_FUNNEL);
+				error2 = nfs_sndlock(rep);
+				if (!error2) {
+					error2 = nfs_reconnect(rep);
+					if (error2)
+						error = error2;
+					else if (!rep->r_nmp) /* if unmounted then bailout */
+						error = ENXIO;
+					else
+						so = rep->r_nmp->nm_so;
+					nfs_sndunlock(rep);
+				} else {
+					error = error2;
+				}
+				thread_funnel_switch(KERNEL_FUNNEL,
+				    NETWORK_FUNNEL);
 			}
 		} while (error == EWOULDBLOCK);
 
+dgramout:
 		thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
 		len -= auio.uio_resid;
 	}
@@ -976,7 +1178,7 @@ nfs_reply(myrep)
 		 * would hang trying to nfs_receive an already received reply.
 		 */
 		if (myrep->r_mrep != NULL) {
-			nfs_rcvunlock(&nmp->nm_flag);
+			nfs_rcvunlock(myrep);
 			FSDBG(530, myrep->r_xid, myrep, myrep->r_nmp, -1);
 			return (0);
 		}
@@ -985,20 +1187,22 @@ nfs_reply(myrep)
 		 * is still intact by checks done in nfs_rcvlock.
 		 */
 		error = nfs_receive(myrep, &nam, &mrep);
+		if (nam)
+			m_freem(nam);
 		/*
 		 * Bailout asap if nfsmount struct gone (unmounted). 
 		 */
 		if (!myrep->r_nmp || !nmp->nm_so) {
 			FSDBG(530, myrep->r_xid, myrep, nmp, -2);
-			return (ECONNABORTED);
+			return (ENXIO);
 		}
 		if (error) {
 			FSDBG(530, myrep->r_xid, myrep, nmp, error);
-			nfs_rcvunlock(&nmp->nm_flag);
+			nfs_rcvunlock(myrep);
 
 			/* Bailout asap if nfsmount struct gone (unmounted). */
 			if (!myrep->r_nmp || !nmp->nm_so)
-				return (ECONNABORTED);
+				return (ENXIO);
 
 			/*
 			 * Ignore routing errors on connectionless protocols??
@@ -1011,8 +1215,6 @@ nfs_reply(myrep)
 			}
 			return (error);
 		}
-		if (nam)
-			m_freem(nam);
 
 		/*
 		 * We assume all is fine, but if we did not have an error
@@ -1029,7 +1231,7 @@ nfs_reply(myrep)
 		 */
 		if (!mrep) {
                         FSDBG(530, myrep->r_xid, myrep, nmp, -3);
-                        return (ECONNABORTED); /* sounds good */
+                        return (ENXIO); /* sounds good */
                 }
                         
 		/*
@@ -1053,8 +1255,8 @@ nfs_reply(myrep)
 			m_freem(mrep);
 #endif
 nfsmout:
-			if (nmp->nm_flag & NFSMNT_RCVLOCK)
-				nfs_rcvunlock(&nmp->nm_flag);
+			if (nmp->nm_state & NFSSTA_RCVLOCK)
+				nfs_rcvunlock(myrep);
 			if (myrep->r_flags & R_GETONEREP)
 				return (0); /* this path used by NQNFS */
 			continue;
@@ -1064,13 +1266,17 @@ nfsmout:
 		 * Loop through the request list to match up the reply
 		 * Iff no match, just drop the datagram
 		 */
-		for (rep = nfs_reqq.tqh_first; rep != 0;
-		    rep = rep->r_chain.tqe_next) {
+		TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
 			if (rep->r_mrep == NULL && rxid == rep->r_xid) {
 				/* Found it.. */
 				rep->r_mrep = mrep;
 				rep->r_md = md;
 				rep->r_dpos = dpos;
+				/*
+				 * If we're tracking the round trip time
+				 * then we update the circular log here
+				 * with the stats from our current request.
+				 */
 				if (nfsrtton) {
 					struct rttl *rt;
 
@@ -1084,7 +1290,7 @@ nfsmout:
 					rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
 					rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
 					rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
-					rt->tstamp = time;
+					microtime(&rt->tstamp); // XXX unused
 					if (rep->r_flags & R_TIMING)
 						rt->rtt = rep->r_rtt;
 					else
@@ -1105,11 +1311,10 @@ nfsmout:
 					if (nmp->nm_cwnd > NFS_MAXCWND)
 						nmp->nm_cwnd = NFS_MAXCWND;
 				}
-				if (!(rep->r_flags & R_SENT))
-					printf("nfs_reply: unsent xid=%x",
-					      rep->r_xid);
-				rep->r_flags &= ~R_SENT;
-				nmp->nm_sent -= NFS_CWNDSCALE;
+                                if (rep->r_flags & R_SENT) {
+                                    rep->r_flags &= ~R_SENT;
+                                    nmp->nm_sent -= NFS_CWNDSCALE;
+                               }
 				/*
 				 * Update rtt using a gain of 0.125 on the mean
 				 * and a gain of 0.25 on the deviation.
@@ -1137,7 +1342,7 @@ nfsmout:
 				break;
 			}
 		}
-		nfs_rcvunlock(&nmp->nm_flag);
+		nfs_rcvunlock(myrep);
 		/*
 		 * If not matched to a request, drop it.
 		 * If it's mine, get out.
@@ -1179,7 +1384,7 @@ nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp, xidp)
 	caddr_t *dposp;
 	u_int64_t *xidp;
 {
-	register struct mbuf *m, *mrep;
+	register struct mbuf *m, *mrep, *m2;
 	register struct nfsreq *rep, *rp;
 	register u_long *tl;
 	register int i;
@@ -1196,33 +1401,33 @@ nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp, xidp)
 	u_quad_t frev;
 	char *auth_str, *verf_str;
 	NFSKERBKEY_T key;		/* save session key */
+	int nmsotype;
+	struct timeval now;
 
 	if (xidp)
 		*xidp = 0;
-	nmp = VFSTONFS(vp->v_mount);
+
 	MALLOC_ZONE(rep, struct nfsreq *,
 		    sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
-	FSDBG_TOP(531, vp, procnum, nmp, rep);
 
-	/*
-	 * make sure if we blocked above, that the file system didn't get
-	 * unmounted leaving nmp bogus value to trip on later and crash.
-	 * Note nfs_unmount will set rep->r_nmp if unmounted volume, but we
-	 * aren't that far yet. SO this is best we can do.  I wanted to check
-	 * for vp->v_mount = 0 also below, but that caused reboot crash.
-	 * Something must think it's okay for vp-v_mount=0 during booting.
-	 * Thus the best I can do here is see if we still have a vnode.
-	 */
-
-	if (vp->v_type == VBAD) {
-		FSDBG_BOT(531, 1, vp, nmp, rep);
-		_FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
-		return (EINVAL);
+	nmp = VFSTONFS(vp->v_mount);
+	if (nmp == NULL ||
+	    (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
+	    (NFSSTA_FORCE|NFSSTA_TIMEO)) {
+		FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
+		return (ENXIO);
 	}
+	nmsotype = nmp->nm_sotype;
+
+	FSDBG_TOP(531, vp, procnum, nmp, rep);
+
 	rep->r_nmp = nmp;
 	rep->r_vp = vp;
 	rep->r_procp = procp;
 	rep->r_procnum = procnum;
+	microuptime(&now);
+	rep->r_lastmsg = now.tv_sec -
+	    ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
 	i = 0;
 	m = mrest;
 	while (m) {
@@ -1235,6 +1440,12 @@ nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp, xidp)
 	 * Get the RPC header with authorization.
 	 */
 kerbauth:
+	nmp = VFSTONFS(vp->v_mount);
+	if (!nmp) {
+		FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
+		FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
+		return (ENXIO);
+	}
 	verf_str = auth_str = (char *)0;
 	if (nmp->nm_flag & NFSMNT_KERB) {
 		verf_str = nickv;
@@ -1243,11 +1454,22 @@ kerbauth:
 		bzero((caddr_t)key, sizeof (key));
 		if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
 			&auth_len, verf_str, verf_len)) {
+			nmp = VFSTONFS(vp->v_mount);
+			if (!nmp) {
+				FSDBG_BOT(531, 2, vp, error, rep);
+				FREE_ZONE((caddr_t)rep,
+					sizeof (struct nfsreq), M_NFSREQ);
+				m_freem(mrest);
+				return (ENXIO);
+			}
 			error = nfs_getauth(nmp, rep, cred, &auth_str,
 				&auth_len, verf_str, &verf_len, key);
+			nmp = VFSTONFS(vp->v_mount);
+			if (!error && !nmp)
+				error = ENXIO;
 			if (error) {
 				FSDBG_BOT(531, 2, vp, error, rep);
-				_FREE_ZONE((caddr_t)rep,
+				FREE_ZONE((caddr_t)rep,
 					sizeof (struct nfsreq), M_NFSREQ);
 				m_freem(mrest);
 				return (error);
@@ -1271,7 +1493,7 @@ kerbauth:
 	/*
 	 * For stream protocols, insert a Sun RPC Record Mark.
 	 */
-	if (nmp->nm_sotype == SOCK_STREAM) {
+	if (nmsotype == SOCK_STREAM) {
 		M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
 		*mtod(m, u_long *) = htonl(0x80000000 |
 					   (m->m_pkthdr.len - NFSX_UNSIGNED));
@@ -1279,7 +1501,8 @@ kerbauth:
 	rep->r_mreq = m;
 	rep->r_xid = xid;
 tryagain:
-	if (nmp->nm_flag & NFSMNT_SOFT)
+	nmp = VFSTONFS(vp->v_mount);
+	if (nmp && (nmp->nm_flag & NFSMNT_SOFT))
 		rep->r_retry = nmp->nm_retry;
 	else
 		rep->r_retry = NFS_MAXREXMIT + 1;	/* past clip limit */
@@ -1302,19 +1525,22 @@ tryagain:
 	TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
 
 	/* Get send time for nqnfs */
-	reqtime = time.tv_sec;
+	microtime(&now);
+	reqtime = now.tv_sec;
 
 	/*
 	 * If backing off another request or avoiding congestion, don't
 	 * send this one now but let timer do it. If not timing a request,
 	 * do it now.
 	 */
-	if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
+	if (nmp && nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
 			   (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
 			   nmp->nm_sent < nmp->nm_cwnd)) {
+		int connrequired = (nmp->nm_soflags & PR_CONNREQUIRED);
+
 		splx(s);
-		if (nmp->nm_soflags & PR_CONNREQUIRED)
-			error = nfs_sndlock(&nmp->nm_flag, rep);
+		if (connrequired)
+			error = nfs_sndlock(rep);
 
 		/*
 		 * Set the R_SENT before doing the send in case another thread
@@ -1328,13 +1554,15 @@ tryagain:
 				rep->r_flags |= R_SENT;
 			}
 
-			m = m_copym(m, 0, M_COPYALL, M_WAIT);
-			error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
-			if (nmp->nm_soflags & PR_CONNREQUIRED)
-				nfs_sndunlock(&nmp->nm_flag);
+			m2 = m_copym(m, 0, M_COPYALL, M_WAIT);
+			error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep);
+			if (connrequired)
+				nfs_sndunlock(rep);
 		}
+		nmp = VFSTONFS(vp->v_mount);
 		if (error) {
-			nmp->nm_sent -= NFS_CWNDSCALE;
+			if (nmp)
+				nmp->nm_sent -= NFS_CWNDSCALE;
 			rep->r_flags &= ~R_SENT;
 		}
 	} else {
@@ -1351,39 +1579,35 @@ tryagain:
 	/*
 	 * RPC done, unlink the request.
 	 */
-	s = splsoftclock();
-	for (rp = nfs_reqq.tqh_first; rp;
-	     rp = rp->r_chain.tqe_next)
-		if (rp == rep && rp->r_xid == xid)
-			break;
-	if (!rp)
-		panic("nfs_request race, rep %x xid %x", rep, xid);
-	TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
-	splx(s);
+	nfs_repdequeue(rep);
+
+	nmp = VFSTONFS(vp->v_mount);
 
 	/*
 	 * Decrement the outstanding request count.
 	 */
 	if (rep->r_flags & R_SENT) {
-		FSDBG(531, rep->r_xid, rep, nmp->nm_sent, nmp->nm_cwnd);
 		rep->r_flags &= ~R_SENT;	/* paranoia */
-		nmp->nm_sent -= NFS_CWNDSCALE;
+		if (nmp) {
+			FSDBG(531, rep->r_xid, rep, nmp->nm_sent, nmp->nm_cwnd);
+			nmp->nm_sent -= NFS_CWNDSCALE;
+		}
 	}
 
 	/*
 	 * If there was a successful reply and a tprintf msg.
 	 * tprintf a response.
 	 */
-	if (!error && (rep->r_flags & R_TPRINTFMSG))
-		nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
-		    "is alive again");
+	nfs_up(rep, "is alive again", error);
 	mrep = rep->r_mrep;
 	md = rep->r_md;
 	dpos = rep->r_dpos;
+	if (!error && !nmp)
+		error = ENXIO;
 	if (error) {
 		m_freem(rep->r_mreq);
 		FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
-		_FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
+		FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
 		return (error);
 	}
 
@@ -1408,7 +1632,7 @@ tryagain:
 		m_freem(mrep);
 		m_freem(rep->r_mreq);
 		FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
-		_FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
+		FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
 		return (error);
 	}
 
@@ -1433,16 +1657,21 @@ tryagain:
 				error == NFSERR_TRYLATER) {
 				m_freem(mrep);
 				error = 0;
-				waituntil = time.tv_sec + trylater_delay;
+				microuptime(&now);
+				waituntil = now.tv_sec + trylater_delay;
 				NFS_DPF(DUP,
 					("nfs_request %s flag=%x trylater_cnt=%x waituntil=%lx trylater_delay=%x\n",
 					 nmp->nm_mountp->mnt_stat.f_mntfromname,
 					 nmp->nm_flag, trylater_cnt, waituntil,
 					 trylater_delay));
-				while (time.tv_sec < waituntil)
+				while (now.tv_sec < waituntil) {
 					(void)tsleep((caddr_t)&lbolt,
 						     PSOCK, "nqnfstry", 0);
-				trylater_delay *= nfs_backoff[trylater_cnt];
+					microuptime(&now);
+				}
+				trylater_delay *= 2;
+				if (trylater_delay > 60)
+					trylater_delay = 60;
 				if (trylater_cnt < 7)
 					trylater_cnt++;
 				goto tryagain;
@@ -1463,7 +1692,7 @@ tryagain:
 				m_freem(mrep);
 			m_freem(rep->r_mreq);
 			FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
-			_FREE_ZONE((caddr_t)rep,
+			FREE_ZONE((caddr_t)rep,
 				   sizeof (struct nfsreq), M_NFSREQ);
 			return (error);
 		}
@@ -1479,7 +1708,8 @@ tryagain:
 				nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
 				cachable = fxdr_unsigned(int, *tl++);
 				reqtime += fxdr_unsigned(int, *tl++);
-				if (reqtime > time.tv_sec) {
+				microtime(&now);
+				if (reqtime > now.tv_sec) {
 				    fxdr_hyper(tl, &frev);
 				    nqnfs_clientlease(nmp, np, nqlflag,
 						      cachable, reqtime, frev);
@@ -1499,7 +1729,7 @@ tryagain:
 nfsmout:
 	m_freem(rep->r_mreq);
 	FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
-	_FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
+	FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
 	return (error);
 }
 
@@ -1670,6 +1900,7 @@ nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp)
 static void
 nfs_softterm(struct nfsreq *rep)
 {
+
 	rep->r_flags |= R_SOFTTERM;
 	if (rep->r_flags & R_SENT) {
 		FSDBG(532, rep->r_xid, rep, rep->r_nmp->nm_sent,
@@ -1689,6 +1920,63 @@ nfs_timer_funnel(arg)
 
 }
 
+/*
+ * Ensure rep isn't in use by the timer, then dequeue it.
+ */
+void
+nfs_repdequeue(struct nfsreq *rep)
+{
+	int s;
+
+	while ((rep->r_flags & R_BUSY)) {
+		rep->r_flags |= R_WAITING;
+		tsleep(rep, PSOCK, "repdeq", 0);
+	}
+	s = splsoftclock();
+	TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
+	splx(s);
+}
+
+/*
+ * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
+ * free()'d out from under it.
+ */
+void
+nfs_repbusy(struct nfsreq *rep)
+{
+
+	if ((rep->r_flags & R_BUSY))
+		panic("rep locked");
+	rep->r_flags |= R_BUSY;
+}
+
+/*
+ * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
+ */
+struct nfsreq *
+nfs_repnext(struct nfsreq *rep)
+{
+	struct nfsreq * nextrep;
+
+	if (rep == NULL)
+		return (NULL);
+	/*
+	 * We need to get and busy the next req before signalling the
+	 * current one, otherwise wakeup() may block us and we'll race to
+	 * grab the next req.
+	 */
+	nextrep = TAILQ_NEXT(rep, r_chain);
+	if (nextrep != NULL)
+		nfs_repbusy(nextrep);
+	/* unbusy and signal. */
+	rep->r_flags &= ~R_BUSY;
+	if ((rep->r_flags & R_WAITING)) {
+		rep->r_flags &= ~R_WAITING;
+		wakeup(rep);
+	}
+	return (nextrep);
+}
+
 /*
  * Nfs timer routine
  * Scan the nfsreq list and retranmit any requests that have timed out
@@ -1699,7 +1987,7 @@ void
 nfs_timer(arg)
 	void *arg;	/* never used */
 {
-	register struct nfsreq *rep, *rp;
+	register struct nfsreq *rep;
 	register struct mbuf *m;
 	register struct socket *so;
 	register struct nfsmount *nmp;
@@ -1715,17 +2003,16 @@ nfs_timer(arg)
 #endif
 	int flags, rexmit, cwnd, sent;
 	u_long xid;
+	struct timeval now;
 
 	s = splnet();
 	/*
 	 * XXX If preemptable threads are implemented the spls used for the
 	 * outstanding request queue must be replaced with mutexes.
 	 */
-rescan:
 #ifdef NFSTRACESUSPENDERS
 	if (NFSTRACE_SUSPENDING) {
-		for (rep = nfs_reqq.tqh_first; rep != 0;
-		     rep = rep->r_chain.tqe_next)
+		TAILQ_FOREACH(rep, &nfs_reqq, r_chain)
 			if (rep->r_xid == nfstracexid)
 				break;
 		if (!rep) {
@@ -1735,7 +2022,11 @@ rescan:
 		}
 	}
 #endif
-	for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) {
+	rep = TAILQ_FIRST(&nfs_reqq);
+	if (rep != NULL)
+		nfs_repbusy(rep);
+	microuptime(&now);
+	for ( ; rep != NULL ; rep = nfs_repnext(rep)) {
 #ifdef NFSTRACESUSPENDERS
 		if (rep->r_mrep && !NFSTRACE_SUSPENDING) {
 			nfstracexid = rep->r_xid;
@@ -1747,9 +2038,20 @@ rescan:
 		    continue;
 		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
 			continue;
-		if (nfs_sigintr(nmp, rep, rep->r_procp)) {
-			nfs_softterm(rep);
+		if (nfs_sigintr(nmp, rep, rep->r_procp))
 			continue;
+		if (nmp->nm_tprintf_initial_delay != 0 &&
+		    (rep->r_rexmit > 2 || (rep->r_flags & R_RESENDERR)) &&
+		    rep->r_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) {
+			rep->r_lastmsg = now.tv_sec;
+			nfs_down(rep, "not responding", 0);
+			if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
+				/* we're not yet completely mounted and */
+				/* we can't complete an RPC, so we fail */
+				nfsstats.rpctimeouts++;
+				nfs_softterm(rep);
+				continue;
+			}
 		}
 		if (rep->r_rtt >= 0) {
 			rep->r_rtt++;
@@ -1768,15 +2070,10 @@ rescan:
 				nmp->nm_timeouts++;
 		}
 		/*
-		 * Check for server not responding
+		 * Check for too many retransmits.  This is never true for
+		 * 'hard' mounts because we set r_retry to NFS_MAXREXMIT + 1
+		 * and never allow r_rexmit to be more than NFS_MAXREXMIT.
 		 */
-		if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
-		     rep->r_rexmit > nmp->nm_deadthresh) {
-			nfs_msg(rep->r_procp,
-			    nmp->nm_mountp->mnt_stat.f_mntfromname,
-			    "not responding");
-			rep->r_flags |= R_TPRINTFMSG;
-		}
 		if (rep->r_rexmit >= rep->r_retry) {	/* too many */
 			nfsstats.rpctimeouts++;
 			nfs_softterm(rep);
@@ -1857,29 +2154,11 @@ rescan:
 			thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
 
 			FSDBG(535, xid, error, sent, cwnd);
-			/*
-			 * This is to fix "nfs_sigintr" DSI panics.
-			 * We may have slept during the send so the current
-			 * place in the request queue may have been released.
-			 * Due to zone_gc it may even be part of an
-			 * unrelated newly allocated data structure.
-			 * Restart the list scan from the top if needed...
-			 */
-			for (rp = nfs_reqq.tqh_first; rp;
-			     rp = rp->r_chain.tqe_next)
-				if (rp == rep && rp->r_xid == xid)
-					break;
-			if (!rp) {
-				if (!error)
-					goto rescan;
-				panic("nfs_timer: race error %d xid 0x%x\n",
-				      error, xid);
-			}
 
 			if (error) {
 				if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
 					so->so_error = 0;
-				rep->r_flags  = flags;
+				rep->r_flags  = flags | R_RESENDERR;
 				rep->r_rexmit = rexmit;
 				nmp->nm_cwnd = cwnd;
 				nmp->nm_sent = sent;
@@ -1893,8 +2172,9 @@ rescan:
 	/*
 	 * Call the nqnfs server timer once a second to handle leases.
 	 */
-	if (lasttime != time.tv_sec) {
-		lasttime = time.tv_sec;
+	microuptime(&now);
+	if (lasttime != now.tv_sec) {
+		lasttime = now.tv_sec;
 		nqnfs_serverd();
 	}
 
@@ -1902,10 +2182,10 @@ rescan:
 	 * Scan the write gathering queues for writes that need to be
 	 * completed now.
 	 */
-	cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec;
-	for (slp = nfssvc_sockhead.tqh_first; slp != 0;
-	    slp = slp->ns_chain.tqe_next) {
-	    if (slp->ns_tq.lh_first && slp->ns_tq.lh_first->nd_time<=cur_usec)
+	cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec;
+	TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
+	    if (LIST_FIRST(&slp->ns_tq) &&
+		LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec)
 		nfsrv_wakenfsd(slp);
 	}
 #endif /* NFS_NOSERVER */
@@ -1917,26 +2197,82 @@ rescan:
 
 /*
  * Test for a termination condition pending on the process.
- * This is used for NFSMNT_INT mounts.
+ * This is used to determine if we need to bail on a mount.
+ * EIO is returned if there has been a soft timeout.
+ * EINTR is returned if there is a signal pending that is not being ignored
+ * and the mount is interruptable, or if we are a thread that is in the process
+ * of cancellation (also SIGKILL posted).
  */
 int
 nfs_sigintr(nmp, rep, p)
 	struct nfsmount *nmp;
 	struct nfsreq *rep;
-	register struct proc *p;
+	struct proc *p;
 {
+	struct uthread *curr_td;
+	sigset_t pending_sigs;
+	int context_good = 0;
+	struct nfsmount *repnmp;
+
+	if (nmp == NULL)
+		return (ENXIO);
+	if (rep != NULL) {
+		repnmp = rep->r_nmp;
+		/* we've had a forced unmount. */
+		if (repnmp == NULL)
+			return (ENXIO);
+		/* request has timed out on a 'soft' mount. */
+		if (rep->r_flags & R_SOFTTERM)
+			return (EIO);
+		/*
+		 * We're in the progress of a force unmount and there's
+		 * been a timeout we're dead and fail IO.
+		 */
+		if ((repnmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
+		   (NFSSTA_FORCE|NFSSTA_TIMEO))
+			return (EIO);
+		/* Someone is unmounting us, go soft and mark it. */
+		if ((repnmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT)) {
+			repnmp->nm_flag |= NFSMNT_SOFT;
+			nmp->nm_state |= NFSSTA_FORCE;
+		}
+		/*
+		 * If the mount is hung and we've requested not to hang
+		 * on remote filesystems, then bail now.
+		 */
+		if (p != NULL && (p->p_flag & P_NOREMOTEHANG) != 0 &&
+		    (repnmp->nm_state & NFSSTA_TIMEO) != 0)
+			return (EIO);
+	}
+	/* XXX: is this valid?  this probably should be an assertion. */
+	if (p == NULL)
+		return (0);
 
-	struct uthread *ut;
-
-	ut = (struct uthread *)get_bsdthread_info(current_act());
-
-	if (rep && (rep->r_flags & R_SOFTTERM))
+	/*
+	 * XXX: Since nfs doesn't have a good shot at getting the current
+	 * thread we take a guess.  (only struct proc * are passed to VOPs)
+	 * What we do is look at the current thread, if it belongs to the
+	 * passed in proc pointer then we have a "good/accurate" context
+	 * and can make an accurate guess as to what to do.
+	 * However if we have a bad context we have to make due with what
+	 * is in the proc struct which may not be as up to date as we'd
+	 * like.
+	 * This is ok because the process will call us with the correct
+	 * context after a short timeout while waiting for a response.
+	 */
+	curr_td = (struct uthread *)get_bsdthread_info(current_act());
+	if (curr_td->uu_proc == p)
+		context_good = 1;
+	if (context_good && current_thread_aborted())
 		return (EINTR);
-	if (!(nmp->nm_flag & NFSMNT_INT))
-		return (0);
-	if (p && ut &&  ut->uu_siglist &&
-	    (((ut->uu_siglist & ~ut->uu_sigmask) & ~p->p_sigignore) &
-	    NFSINT_SIGMASK))
+	/* mask off thread and process blocked signals. */
+	if (context_good)
+		pending_sigs = curr_td->uu_siglist & ~curr_td->uu_sigmask;
+	else
+		pending_sigs = p->p_siglist;
+	/* mask off process level and NFS ignored signals. */
+	pending_sigs &= ~p->p_sigignore & NFSINT_SIGMASK;
+	if (pending_sigs && (nmp->nm_flag & NFSMNT_INT) != 0)
 		return (EINTR);
 	return (0);
 }
@@ -1948,25 +2284,29 @@ nfs_sigintr(nmp, rep, p)
  * in progress when a reconnect is necessary.
  */
 int
-nfs_sndlock(flagp, rep)
-	register int *flagp;
+nfs_sndlock(rep)
 	struct nfsreq *rep;
 {
+	register int *statep;
 	struct proc *p;
-	int slpflag = 0, slptimeo = 0;
+	int error, slpflag = 0, slptimeo = 0;
 
-	if (rep) {
-		p = rep->r_procp;
-		if (rep->r_nmp->nm_flag & NFSMNT_INT)
-			slpflag = PCATCH;
-	} else
-		p = (struct proc *)0;
-	while (*flagp & NFSMNT_SNDLOCK) {
-		if (nfs_sigintr(rep->r_nmp, rep, p))
-			return (EINTR);
-		*flagp |= NFSMNT_WANTSND;
-		(void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
-			slptimeo);
+	if (rep->r_nmp == NULL)
+		return (ENXIO);
+	statep = &rep->r_nmp->nm_state;
+
+	p = rep->r_procp;
+	if (rep->r_nmp->nm_flag & NFSMNT_INT)
+		slpflag = PCATCH;
+	while (*statep & NFSSTA_SNDLOCK) {
+		error = nfs_sigintr(rep->r_nmp, rep, p);
+		if (error)
+			return (error);
+		*statep |= NFSSTA_WANTSND;
+		if (p != NULL && (p->p_flag & P_NOREMOTEHANG) != 0)
+			slptimeo = hz;
+		(void) tsleep((caddr_t)statep, slpflag | (PZERO - 1),
+			"nfsndlck", slptimeo);
 		if (slpflag == PCATCH) {
 			slpflag = 0;
 			slptimeo = 2 * hz;
@@ -1976,9 +2316,9 @@ nfs_sndlock(flagp, rep)
 		 * nfs_sigintr and callers expect it in tact.
 		 */
 		if (!rep->r_nmp) 
-			return (ECONNABORTED); /* don't have lock until out of loop */
+			return (ENXIO); /* don't have lock until out of loop */
 	}
-	*flagp |= NFSMNT_SNDLOCK;
+	*statep |= NFSSTA_SNDLOCK;
 	return (0);
 }
 
@@ -1986,16 +2326,20 @@ nfs_sndlock(flagp, rep)
  * Unlock the stream socket for others.
  */
 void
-nfs_sndunlock(flagp)
-	register int *flagp;
+nfs_sndunlock(rep)
+	struct nfsreq *rep;
 {
+	register int *statep;
 
-	if ((*flagp & NFSMNT_SNDLOCK) == 0)
+	if (rep->r_nmp == NULL)
+		return;
+	statep = &rep->r_nmp->nm_state;
+	if ((*statep & NFSSTA_SNDLOCK) == 0)
 		panic("nfs sndunlock");
-	*flagp &= ~NFSMNT_SNDLOCK;
-	if (*flagp & NFSMNT_WANTSND) {
-		*flagp &= ~NFSMNT_WANTSND;
-		wakeup((caddr_t)flagp);
+	*statep &= ~NFSSTA_SNDLOCK;
+	if (*statep & NFSSTA_WANTSND) {
+		*statep &= ~NFSSTA_WANTSND;
+		wakeup((caddr_t)statep);
 	}
 }
 
@@ -2003,26 +2347,26 @@ static int
 nfs_rcvlock(rep)
 	register struct nfsreq *rep;
 {
-	register int *flagp;
-	int slpflag, slptimeo = 0;
+	register int *statep;
+	int error, slpflag, slptimeo = 0;
 
 	/* make sure we still have our mountpoint */
 	if (!rep->r_nmp) {
 		if (rep->r_mrep != NULL)
 			return (EALREADY);
-		return (ECONNABORTED);
+		return (ENXIO);
 	}
 
-	flagp = &rep->r_nmp->nm_flag;
-	FSDBG_TOP(534, rep->r_xid, rep, rep->r_nmp, *flagp);
-	if (*flagp & NFSMNT_INT)
+	statep = &rep->r_nmp->nm_state;
+	FSDBG_TOP(534, rep->r_xid, rep, rep->r_nmp, *statep);
+	if (rep->r_nmp->nm_flag & NFSMNT_INT)
 		slpflag = PCATCH;
 	else
 		slpflag = 0;
-	while (*flagp & NFSMNT_RCVLOCK) {
-		if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) {
+	while (*statep & NFSSTA_RCVLOCK) {
+		if ((error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp))) {
 			FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x100);
-			return (EINTR);
+			return (error);
 		} else if (rep->r_mrep != NULL) {
 			/*
 			 * Don't bother sleeping if reply already arrived
@@ -2031,9 +2375,16 @@ nfs_rcvlock(rep)
 			return (EALREADY);
 		}
 		FSDBG(534, rep->r_xid, rep, rep->r_nmp, 0x102);
-		*flagp |= NFSMNT_WANTRCV;
-		(void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
-			      slptimeo);
+		*statep |= NFSSTA_WANTRCV;
+		/*
+		 * We need to poll if we're P_NOREMOTEHANG so that we
+		 * call nfs_sigintr periodically above.
+		 */
+		if (rep->r_procp != NULL &&
+		    (rep->r_procp->p_flag & P_NOREMOTEHANG) != 0)
+			slptimeo = hz;
+		(void) tsleep((caddr_t)statep, slpflag | (PZERO - 1),
+			      "nfsrcvlk", slptimeo);
 		if (slpflag == PCATCH) {
 			slpflag = 0;
 			slptimeo = 2 * hz;
@@ -2044,15 +2395,15 @@ nfs_rcvlock(rep)
 		 */
 		if (!rep->r_nmp)  {
 			FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x103);
-			return (ECONNABORTED); /* don't have lock until out of loop */
+			return (ENXIO); /* don't have lock until out of loop */
 		}
 	}
 	/*
 	 * nfs_reply will handle it if reply already arrived.
 	 * (We may have slept or been preempted while on network funnel).
 	 */
-	FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, *flagp);
-	*flagp |= NFSMNT_RCVLOCK;
+	FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, *statep);
+	*statep |= NFSSTA_RCVLOCK;
 	return (0);
 }
 
@@ -2060,17 +2411,22 @@ nfs_rcvlock(rep)
  * Unlock the stream socket for others.
  */
 static void
-nfs_rcvunlock(flagp)
-	register int *flagp;
+nfs_rcvunlock(rep)
+	register struct nfsreq *rep;
 {
+	register int *statep;
+	
+	if (rep->r_nmp == NULL)
+		return;
+	statep = &rep->r_nmp->nm_state;
 
-	FSDBG(533, flagp, *flagp, 0, 0);
-	if ((*flagp & NFSMNT_RCVLOCK) == 0)
+	FSDBG(533, statep, *statep, 0, 0);
+	if ((*statep & NFSSTA_RCVLOCK) == 0)
 		panic("nfs rcvunlock");
-	*flagp &= ~NFSMNT_RCVLOCK;
-	if (*flagp & NFSMNT_WANTRCV) {
-		*flagp &= ~NFSMNT_WANTRCV;
-		wakeup((caddr_t)flagp);
+	*statep &= ~NFSSTA_RCVLOCK;
+	if (*statep & NFSSTA_WANTRCV) {
+		*statep &= ~NFSSTA_WANTRCV;
+		wakeup((caddr_t)statep);
 	}
 }
 
@@ -2083,7 +2439,7 @@ nfs_rcvunlock(flagp)
  * be called with M_WAIT from an nfsd.
  */
  /* 
- * Needs to eun under network funnel 
+ * Needs to run under network funnel 
  */
 void
 nfsrv_rcv(so, arg, waitflag)
@@ -2094,9 +2450,9 @@ nfsrv_rcv(so, arg, waitflag)
 	register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
 	register struct mbuf *m;
 	struct mbuf *mp, *mhck;
-	struct sockaddr *nam=0;
+	struct sockaddr *nam;
 	struct uio auio;
-	int flags, error;
+	int flags, ns_nflag=0, error;
 	struct sockaddr_in  *sin;
 
 	if ((slp->ns_flag & SLP_VALID) == 0)
@@ -2106,7 +2462,8 @@ nfsrv_rcv(so, arg, waitflag)
 	 * Define this to test for nfsds handling this under heavy load.
 	 */
 	if (waitflag == M_DONTWAIT) {
-		slp->ns_flag |= SLP_NEEDQ; goto dorecs;
+		ns_nflag = SLPN_NEEDQ;
+		goto dorecs;
 	}
 #endif
 	auio.uio_procp = NULL;
@@ -2117,7 +2474,7 @@ nfsrv_rcv(so, arg, waitflag)
 		 * the nfs servers are heavily loaded.
 		 */
 		if (slp->ns_rec && waitflag == M_DONTWAIT) {
-			slp->ns_flag |= SLP_NEEDQ;
+			ns_nflag = SLPN_NEEDQ;
 			goto dorecs;
 		}
 
@@ -2129,9 +2486,9 @@ nfsrv_rcv(so, arg, waitflag)
 		error = soreceive(so, (struct sockaddr **) 0, &auio, &mp, (struct mbuf **)0, &flags);
 		if (error || mp == (struct mbuf *)0) {
 			if (error == EWOULDBLOCK)
-				slp->ns_flag |= SLP_NEEDQ;
+				ns_nflag = SLPN_NEEDQ;
 			else
-				slp->ns_flag |= SLP_DISCONN;
+				ns_nflag = SLPN_DISCONN;
 			goto dorecs;
 		}
 		m = mp;
@@ -2152,15 +2509,16 @@ nfsrv_rcv(so, arg, waitflag)
 		error = nfsrv_getstream(slp, waitflag);
 		if (error) {
 			if (error == EPERM)
-				slp->ns_flag |= SLP_DISCONN;
+				ns_nflag = SLPN_DISCONN;
 			else
-				slp->ns_flag |= SLP_NEEDQ;
+				ns_nflag = SLPN_NEEDQ;
 		}
 	} else {
 		do {
 			auio.uio_resid = 1000000000;
-			flags = MSG_DONTWAIT;
+			flags = MSG_DONTWAIT | MSG_NEEDSA;
 			nam = 0;
+			mp = 0;
 			error = soreceive(so, &nam, &auio, &mp,
 						(struct mbuf **)0, &flags);
 			
@@ -2171,7 +2529,6 @@ nfsrv_rcv(so, arg, waitflag)
 					sin = mtod(mhck, struct sockaddr_in *);
 					bcopy(nam, sin, sizeof(struct sockaddr_in));
 					mhck->m_hdr.mh_len = sizeof(struct sockaddr_in);
-					FREE(nam, M_SONAME);
 
 					m = mhck;
 					m->m_next = mp;
@@ -2184,10 +2541,13 @@ nfsrv_rcv(so, arg, waitflag)
 				slp->ns_recend = m;
 				m->m_nextpkt = (struct mbuf *)0;
 			}
+			if (nam) {
+				FREE(nam, M_SONAME);
+			}
 			if (error) {
 				if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
 					&& error != EWOULDBLOCK) {
-					slp->ns_flag |= SLP_DISCONN;
+					ns_nflag = SLPN_DISCONN;
 					goto dorecs;
 				}
 			}
@@ -2198,8 +2558,10 @@ nfsrv_rcv(so, arg, waitflag)
 	 * Now try and process the request records, non-blocking.
 	 */
 dorecs:
+	if (ns_nflag)
+		slp->ns_nflag |= ns_nflag;
 	if (waitflag == M_DONTWAIT &&
-		(slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) {
+		(slp->ns_rec || (slp->ns_nflag & (SLPN_NEEDQ | SLPN_DISCONN)))) {
 		thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
 		nfsrv_wakenfsd(slp);
 		thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
@@ -2222,13 +2584,13 @@ nfsrv_getstream(slp, waitflag)
 	struct mbuf *om, *m2, *recm;
 	u_long recmark;
 
-	if (slp->ns_flag & SLP_GETSTREAM)
+	if (slp->ns_nflag & SLPN_GETSTREAM)
 		panic("nfs getstream");
-	slp->ns_flag |= SLP_GETSTREAM;
+	slp->ns_nflag |= SLPN_GETSTREAM;
 	for (;;) {
 	    if (slp->ns_reclen == 0) {
 		if (slp->ns_cc < NFSX_UNSIGNED) {
-			slp->ns_flag &= ~SLP_GETSTREAM;
+			slp->ns_nflag &= ~SLPN_GETSTREAM;
 			return (0);
 		}
 		m = slp->ns_raw;
@@ -2253,11 +2615,11 @@ nfsrv_getstream(slp, waitflag)
 		recmark = ntohl(recmark);
 		slp->ns_reclen = recmark & ~0x80000000;
 		if (recmark & 0x80000000)
-			slp->ns_flag |= SLP_LASTFRAG;
+			slp->ns_nflag |= SLPN_LASTFRAG;
 		else
-			slp->ns_flag &= ~SLP_LASTFRAG;
+			slp->ns_nflag &= ~SLPN_LASTFRAG;
 		if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
-			slp->ns_flag &= ~SLP_GETSTREAM;
+			slp->ns_nflag &= ~SLPN_GETSTREAM;
 			return (EPERM);
 		}
 	    }
@@ -2291,7 +2653,7 @@ nfsrv_getstream(slp, waitflag)
 					m->m_len -= slp->ns_reclen - len;
 					len = slp->ns_reclen;
 				} else {
-					slp->ns_flag &= ~SLP_GETSTREAM;
+					slp->ns_nflag &= ~SLPN_GETSTREAM;
 					return (EWOULDBLOCK);
 				}
 			} else if ((len + m->m_len) == slp->ns_reclen) {
@@ -2310,7 +2672,7 @@ nfsrv_getstream(slp, waitflag)
 		slp->ns_cc -= len;
 		slp->ns_reclen = 0;
 	    } else {
-		slp->ns_flag &= ~SLP_GETSTREAM;
+		slp->ns_nflag &= ~SLPN_GETSTREAM;
 		return (0);
 	    }
 
@@ -2321,7 +2683,7 @@ nfsrv_getstream(slp, waitflag)
 	    while (*mpp)
 		mpp = &((*mpp)->m_next);
 	    *mpp = recm;
-	    if (slp->ns_flag & SLP_LASTFRAG) {
+	    if (slp->ns_nflag & SLPN_LASTFRAG) {
 		if (slp->ns_recend)
 		    slp->ns_recend->m_nextpkt = slp->ns_frag;
 		else
@@ -2368,8 +2730,9 @@ nfsrv_dorec(slp, nfsd, ndp)
 	nd->nd_dpos = mtod(m, caddr_t);
 	error = nfs_getreq(nd, nfsd, TRUE);
 	if (error) {
-		m_freem(nam);
-		_FREE_ZONE((caddr_t)nd,	sizeof *nd, M_NFSRVDESC);
+		if (nam)
+			m_freem(nam);
+		FREE_ZONE((caddr_t)nd,	sizeof *nd, M_NFSRVDESC);
 		return (error);
 	}
 	*ndp = nd;
@@ -2399,7 +2762,7 @@ nfs_getreq(nd, nfsd, has_header)
 	int error = 0, nqnfs = 0, ticklen;
 	struct mbuf *mrep, *md;
 	register struct nfsuid *nuidp;
-	struct timeval tvin, tvout;
+	struct timeval tvin, tvout, now;
 #if 0				/* until encrypted keys are implemented */
 	NFSKERBKEYSCHED_T keys;	/* stores key schedule */
 #endif
@@ -2585,7 +2948,8 @@ nfs_getreq(nd, nfsd, has_header)
 
 			tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
 			tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
-			if (nuidp->nu_expire < time.tv_sec ||
+			microtime(&now);
+			if (nuidp->nu_expire < now.tv_sec ||
 			    nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
 			    (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
 			     nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
@@ -2637,7 +3001,7 @@ nfsrv_wakenfsd(slp)
 
 	if ((slp->ns_flag & SLP_VALID) == 0)
 		return;
-	for (nd = nfsd_head.tqh_first; nd != 0; nd = nd->nfsd_chain.tqe_next) {
+	TAILQ_FOREACH(nd, &nfsd_head, nfsd_chain) {
 		if (nd->nfsd_flag & NFSD_WAITING) {
 			nd->nfsd_flag &= ~NFSD_WAITING;
 			if (nd->nfsd_slp)
@@ -2654,9 +3018,10 @@ nfsrv_wakenfsd(slp)
 #endif /* NFS_NOSERVER */
 
 static int
-nfs_msg(p, server, msg)
+nfs_msg(p, server, msg, error)
 	struct proc *p;
-	char *server, *msg;
+	const char *server, *msg;
+	int error;
 {
 	tpr_t tpr;
 
@@ -2664,7 +3029,50 @@ nfs_msg(p, server, msg)
 		tpr = tprintf_open(p);
 	else
 		tpr = NULL;
-	tprintf(tpr, "nfs server %s: %s\n", server, msg);
+	if (error)
+		tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg,
+		    error);
+	else
+		tprintf(tpr, "nfs server %s: %s\n", server, msg);
 	tprintf_close(tpr);
 	return (0);
 }
+
+static void
+nfs_down(rep, msg, error)
+	struct nfsreq *rep;
+	const char *msg;
+	int error;
+{
+	int dosignal;
+
+	if (rep == NULL || rep->r_nmp == NULL)
+		return;
+	if (!(rep->r_nmp->nm_state & NFSSTA_TIMEO)) {
+		vfs_event_signal(&rep->r_nmp->nm_mountp->mnt_stat.f_fsid,
+		    VQ_NOTRESP, 0);
+		rep->r_nmp->nm_state |= NFSSTA_TIMEO;
+	}
+	rep->r_flags |= R_TPRINTFMSG;
+	nfs_msg(rep->r_procp, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname,
+	    msg, error);
+}
+
+static void
+nfs_up(rep, msg, error)
+	struct nfsreq *rep;
+	const char *msg;
+	int error;
+{
+
+	if (error != 0 || rep == NULL || rep->r_nmp == NULL)
+		return;
+	if ((rep->r_flags & R_TPRINTFMSG) != 0)
+		nfs_msg(rep->r_procp,
+		    rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
+	if ((rep->r_nmp->nm_state & NFSSTA_TIMEO)) {
+		rep->r_nmp->nm_state &= ~NFSSTA_TIMEO;
+		vfs_event_signal(&rep->r_nmp->nm_mountp->mnt_stat.f_fsid,
+		    VQ_NOTRESP, 1);
+	}
+}