X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/55e303ae13a4cf49d70f2294092726f2fffb9ef2..13f56ec4e58bf8687e2a68032c093c0213dd519b:/bsd/nfs/nfs_lock.c

diff --git a/bsd/nfs/nfs_lock.c b/bsd/nfs/nfs_lock.c
index 4edfce39a..f76a9b6d0 100644
--- a/bsd/nfs/nfs_lock.c
+++ b/bsd/nfs/nfs_lock.c
@@ -1,16 +1,19 @@
 /*
- * Copyright (c) 2002-2003 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2002-2010 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
  * 
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
@@ -20,7 +23,7 @@
  * Please see the License for the specific language governing rights and
  * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*-
  * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
@@ -57,22 +60,21 @@
 #include <sys/systm.h>
 #include <sys/fcntl.h>
 #include <sys/kernel.h>		/* for hz */
-#include <sys/file.h>
-#include <sys/lock.h>
+#include <sys/file_internal.h>
 #include <sys/malloc.h>
 #include <sys/lockf.h>		/* for hz */ /* Must come after sys/malloc.h */
-#include <sys/mbuf.h>
-#include <sys/mount.h>
-#include <sys/namei.h>
-#include <sys/proc.h>
+#include <sys/kpi_mbuf.h>
+#include <sys/mount_internal.h>
+#include <sys/proc_internal.h>	/* for p_start */
+#include <sys/kauth.h>
 #include <sys/resourcevar.h>
 #include <sys/socket.h>
-#include <sys/socket.h>
 #include <sys/unistd.h>
 #include <sys/user.h>
-#include <sys/vnode.h>
+#include <sys/vnode_internal.h>
 
-#include <kern/thread_act.h>
+#include <kern/thread.h>
+#include <kern/host.h>
 
 #include <machine/limits.h>
 
@@ -81,432 +83,920 @@
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
+#include <nfs/nfs_gss.h>
 #include <nfs/nfsmount.h>
 #include <nfs/nfsnode.h>
 #include <nfs/nfs_lock.h>
-#include <nfs/nlminfo.h>
 
-#define OFF_MAX QUAD_MAX
+#include <mach/host_priv.h>
+#include <mach/mig_errors.h>
+#include <mach/host_special_ports.h>
+#include <lockd/lockd_mach.h>
+
+extern void ipc_port_release_send(ipc_port_t);
+
+/*
+ * pending lock request messages are kept in this queue which is
+ * kept sorted by transaction ID (xid).
+ */
+static uint64_t nfs_lockxid = 0;
+static LOCKD_MSG_QUEUE nfs_pendlockq;
+
+/* list of mounts that are (potentially) making lockd requests */
+TAILQ_HEAD(nfs_lockd_mount_list,nfsmount) nfs_lockd_mount_list;
 
-uint64_t nfsadvlocks = 0;
-struct timeval nfsadvlock_longest = {0, 0};
-struct timeval nfsadvlocks_time = {0, 0};
+static lck_grp_t *nfs_lock_lck_grp;
+static lck_mtx_t *nfs_lock_mutex;
 
-pid_t nfslockdpid = 0;
-struct file *nfslockdfp = 0;
-int nfslockdwaiting = 0;
-int nfslockdfifowritten = 0;
-int nfslockdfifolock = 0;
-#define NFSLOCKDFIFOLOCK_LOCKED	1
-#define NFSLOCKDFIFOLOCK_WANT	2
+void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *);
+void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *);
+int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *, struct lockd_ans *);
+LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_answer(struct lockd_ans *);
+LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_xid(uint64_t);
+uint64_t nfs_lockxid_get(void);
+int nfs_lockd_send_request(LOCKD_MSG *, int);
 
 /*
- * XXX
- * We have to let the process know if the call succeeded.  I'm using an extra
- * field in the uu_nlminfo field in the uthread structure, as it is already for
- * lockd stuff.
+ * initialize global nfs lock state
  */
+void
+nfs_lockinit(void)
+{
+	TAILQ_INIT(&nfs_pendlockq);
+	TAILQ_INIT(&nfs_lockd_mount_list);
+
+	nfs_lock_lck_grp = lck_grp_alloc_init("nfs_lock", LCK_GRP_ATTR_NULL);
+	nfs_lock_mutex = lck_mtx_alloc_init(nfs_lock_lck_grp, LCK_ATTR_NULL);
+}
 
 /*
- * nfs_advlock --
- *      NFS advisory byte-level locks.
+ * Register a mount as (potentially) making lockd requests.
  */
-int
-nfs_dolock(struct vop_advlock_args *ap)
-/* struct vop_advlock_args {
-        struct vnodeop_desc *a_desc;
-        struct vnode *a_vp;
-        caddr_t a_id;
-        int a_op;
-        struct flock *a_fl;
-        int a_flags;
-}; */
+void
+nfs_lockd_mount_register(struct nfsmount *nmp)
 {
-	LOCKD_MSG msg;
-	struct nameidata nd;
-	struct vnode *vp, *wvp;
-	struct nfsnode *np;
-	int error, error1;
-	struct flock *fl;
-	int fmode, ioflg;
-	struct proc *p;
-        struct uthread *ut;
-	struct timeval elapsed;
-	struct nfsmount *nmp;
-	struct vattr vattr;
-	off_t start, end;
+	lck_mtx_lock(nfs_lock_mutex);
+	TAILQ_INSERT_HEAD(&nfs_lockd_mount_list, nmp, nm_ldlink);
+	nfs_lockd_mounts++;
+	lck_mtx_unlock(nfs_lock_mutex);
+}
+
+/*
+ * Unregister a mount as (potentially) making lockd requests.
+ *
+ * When the lockd mount count drops to zero, then send a shutdown request to
+ * lockd if we've sent any requests to it.
+ */
+void
+nfs_lockd_mount_unregister(struct nfsmount *nmp)
+{
+	int send_shutdown;
+	mach_port_t lockd_port = IPC_PORT_NULL;
+	kern_return_t kr;
 
-        ut = get_bsdthread_info(current_act());
-	p = current_proc();
+	lck_mtx_lock(nfs_lock_mutex);
+	TAILQ_REMOVE(&nfs_lockd_mount_list, nmp, nm_ldlink);
+	nfs_lockd_mounts--;
 
-	vp = ap->a_vp;
-	fl = ap->a_fl;
-	np = VTONFS(vp);
+	/* send a shutdown request if there are no more lockd mounts */
+	send_shutdown = ((nfs_lockd_mounts == 0) && nfs_lockd_request_sent);
+	if (send_shutdown)
+		nfs_lockd_request_sent = 0;
 
-	nmp = VFSTONFS(vp->v_mount);
-	if (!nmp)
-		return (ENXIO);
-	if (nmp->nm_flag & NFSMNT_NOLOCKS)
-		return (EOPNOTSUPP);
+	lck_mtx_unlock(nfs_lock_mutex);
+
+	if (!send_shutdown)
+		return;
 
 	/*
-	 * The NLM protocol doesn't allow the server to return an error
-	 * on ranges, so we do it.  Pre LFS (Large File Summit)
-	 * standards required EINVAL for the range errors.  More recent
-	 * standards use EOVERFLOW, but their EINVAL wording still
-	 * encompasses these errors.
-	 * Any code sensitive to this is either:
-	 *  1) written pre-LFS and so can handle only EINVAL, or
-	 *  2) written post-LFS and thus ought to be tolerant of pre-LFS
-	 *     implementations.
-	 * Since returning EOVERFLOW certainly breaks 1), we return EINVAL.
-	 */
-	if (fl->l_whence != SEEK_END) {
-		if ((fl->l_whence != SEEK_CUR && fl->l_whence != SEEK_SET) ||
-		    fl->l_start < 0 ||
-		    (fl->l_len > 0 && fl->l_len - 1 > OFF_MAX - fl->l_start) ||
-		    (fl->l_len < 0 && fl->l_start + fl->l_len < 0))
-			return (EINVAL);
-	}
-	/*
-	 * If daemon is running take a ref on its fifo
-	 */
-	if (!nfslockdfp || !(wvp = (struct vnode *)nfslockdfp->f_data)) {
-		if (!nfslockdwaiting)
-			return (EOPNOTSUPP);
-		/*
-		 * Don't wake lock daemon if it hasn't been started yet and
-		 * this is an unlock request (since we couldn't possibly
-		 * actually have a lock on the file).  This could be an
-		 * uninformed unlock request due to closef()'s behavior of doing
-		 * unlocks on all files if a process has had a lock on ANY file.
-		 */
-		if (!nfslockdfp && (fl->l_type == F_UNLCK))
-			return (EINVAL);
-		/* wake up lock daemon */
-		(void)wakeup((void *)&nfslockdwaiting);
-		/* wait on nfslockdfp for a while to allow daemon to start */
-		tsleep((void *)&nfslockdfp, PCATCH | PUSER, "lockd", 60*hz);
-		/* check for nfslockdfp and f_data */
-		if (!nfslockdfp || !(wvp = (struct vnode *)nfslockdfp->f_data))
-			return (EOPNOTSUPP);
-	}
-	VREF(wvp);
-	/*
-	 * if there is no nfsowner table yet, allocate one.
+	 * Let lockd know that it is no longer needed for any NFS mounts
 	 */
-	if (ut->uu_nlminfo == NULL) {
-		if (ap->a_op == F_UNLCK) {
-			vrele(wvp);
-			return (0);
-		}
-		MALLOC(ut->uu_nlminfo, struct nlminfo *,
-			sizeof(struct nlminfo), M_LOCKF, M_WAITOK | M_ZERO);
-		ut->uu_nlminfo->pid_start = p->p_stats->p_start;
+	kr = host_get_lockd_port(host_priv_self(), &lockd_port);
+	if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(lockd_port)) {
+		printf("nfs_lockd_mount_change: shutdown couldn't get port, kr %d, port %s\n",
+			kr, (lockd_port == IPC_PORT_NULL) ? "NULL" :
+			(lockd_port == IPC_PORT_DEAD) ? "DEAD" : "VALID");
+		return;
 	}
-	/*
-	 * Fill in the information structure.
-	 */
-	msg.lm_version = LOCKD_MSG_VERSION;
-	msg.lm_msg_ident.pid = p->p_pid;
-	msg.lm_msg_ident.ut = ut;
-	msg.lm_msg_ident.pid_start = ut->uu_nlminfo->pid_start;
-	msg.lm_msg_ident.msg_seq = ++(ut->uu_nlminfo->msg_seq);
 
-	/*
-	 * The NFS Lock Manager protocol doesn't directly handle
-	 * negative lengths or SEEK_END, so we need to normalize
-	 * things here where we have all the info.
-	 * (Note: SEEK_CUR is already adjusted for at this point)
-	 */
-	/* Convert the flock structure into a start and end. */
-	switch (fl->l_whence) {
-	case SEEK_SET:
-	case SEEK_CUR:
-		/*
-		 * Caller is responsible for adding any necessary offset
-		 * to fl->l_start when SEEK_CUR is used.
-		 */
-		start = fl->l_start;
-		break;
-	case SEEK_END:
-		/* need to flush, and refetch attributes to make */
-		/* sure we have the correct end of file offset   */
-		if (np->n_flag & NMODIFIED) {
-			np->n_attrstamp = 0;
-			error = nfs_vinvalbuf(vp, V_SAVE, p->p_ucred, p, 1);
-			if (error) {
-				vrele(wvp);
-				return (error);
-			}
-		}
-		np->n_attrstamp = 0;
-		error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
-		if (error) {
-			vrele(wvp);
-			return (error);
-		}
-		start = np->n_size + fl->l_start;
-		break;
-	default:
-		vrele(wvp);
-		return (EINVAL);
+	kr = lockd_shutdown(lockd_port);
+	if (kr != KERN_SUCCESS)
+		printf("nfs_lockd_mount_change: shutdown %d\n", kr);
+
+	ipc_port_release_send(lockd_port);
+}
+
+/*
+ * insert a lock request message into the pending queue
+ * (nfs_lock_mutex must be held)
+ */
+void
+nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
+{
+	LOCKD_MSG_REQUEST *mr;
+
+	mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue);
+	if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
+		/* fast path: empty queue or new largest xid */
+		TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next);
+		return;
 	}
-	if (fl->l_len == 0)
-		end = -1;
-	else if (fl->l_len > 0)
-		end = start + fl->l_len - 1;
-	else { /* l_len is negative */
-		end = start - 1;
-		start += fl->l_len;
+	/* slow path: need to walk list to find insertion point */
+	while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
+		mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next);
 	}
-	if (start < 0) {
-		vrele(wvp);
-		return (EINVAL);
+	if (mr) {
+		TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next);
+	} else {
+		TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next);
 	}
+}
 
-	msg.lm_fl = *fl;
-	msg.lm_fl.l_start = start;
-	if (end != -1)
-		msg.lm_fl.l_len = end - start + 1;
+/*
+ * remove a lock request message from the pending queue
+ * (nfs_lock_mutex must be held)
+ */
+void
+nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
+{
+	TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
+}
 
-	msg.lm_wait = ap->a_flags & F_WAIT;
-	msg.lm_getlk = ap->a_op == F_GETLK;
+/*
+ * find a pending lock request message by xid
+ *
+ * We search from the head of the list assuming that the message we're
+ * looking for is for an older request (because we have an answer to it).
+ * This assumes that lock request will be answered primarily in FIFO order.
+ * However, this may not be the case if there are blocked requests.  We may
+ * want to move blocked requests to a separate queue (but that'll complicate
+ * duplicate xid checking).
+ *
+ * (nfs_lock_mutex must be held)
+ */
+LOCKD_MSG_REQUEST *
+nfs_lockdmsg_find_by_xid(uint64_t lockxid)
+{
+	LOCKD_MSG_REQUEST *mr;
 
-	nmp = VFSTONFS(vp->v_mount);
-	if (!nmp) {
-		vrele(wvp);
-		return (ENXIO);
+	TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
+		if (mr->lmr_msg.lm_xid == lockxid)
+			return mr;
+		if (mr->lmr_msg.lm_xid > lockxid)
+			return NULL;
 	}
+	return mr;
+}
 
-	bcopy(mtod(nmp->nm_nam, struct sockaddr *), &msg.lm_addr,
-	      min(sizeof msg.lm_addr,
-		  mtod(nmp->nm_nam, struct sockaddr *)->sa_len));
-	msg.lm_fh_len = NFS_ISV3(vp) ? VTONFS(vp)->n_fhsize : NFSX_V2FH;
-	bcopy(VTONFS(vp)->n_fhp, msg.lm_fh, msg.lm_fh_len);
-	msg.lm_nfsv3 = NFS_ISV3(vp);
-	cru2x(p->p_ucred, &msg.lm_cred);
+/*
+ * Because we can't depend on nlm_granted messages containing the same
+ * cookie we sent with the original lock request, we need code to test
+ * if an nlm_granted answer matches the lock request.  We also need code
+ * that can find a lockd message based solely on the nlm_granted answer.
+ */
 
-	microuptime(&ut->uu_nlminfo->nlm_lockstart);
+/*
+ * compare lockd message to answer
+ *
+ * returns 0 on equality and 1 if different
+ */
+int
+nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp)
+{
+	if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
+		return 1;
+	if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid)
+		return 1;
+	if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start)
+		return 1;
+	if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len)
+		return 1;
+	if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len)
+		return 1;
+	if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len))
+		return 1;
+	return 0;
+}
 
-	fmode = FFLAGS(O_WRONLY);
-	if ((error = VOP_OPEN(wvp, fmode, kernproc->p_ucred, p))) {
-		vrele(wvp);
-		return (error);
+/*
+ * find a pending lock request message based on the lock info provided
+ * in the lockd_ans/nlm_granted data.  We need this because we can't
+ * depend on nlm_granted messages containing the same cookie we sent
+ * with the original lock request.
+ *
+ * We search from the head of the list assuming that the message we're
+ * looking for is for an older request (because we have an answer to it).
+ * This assumes that lock request will be answered primarily in FIFO order.
+ * However, this may not be the case if there are blocked requests.  We may
+ * want to move blocked requests to a separate queue (but that'll complicate
+ * duplicate xid checking).
+ *
+ * (nfs_lock_mutex must be held)
+ */
+LOCKD_MSG_REQUEST *
+nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
+{
+	LOCKD_MSG_REQUEST *mr;
+
+	if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
+		return NULL;
+	TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
+		if (!nfs_lockdmsg_compare_to_answer(mr, ansp))
+			break;
 	}
-	++wvp->v_writecount;
+	return mr;
+}
 
-#define IO_NOMACCHECK 0;
-	ioflg = IO_UNIT | IO_NOMACCHECK;
-	for (;;) {
-		VOP_LEASE(wvp, p, kernproc->p_ucred, LEASE_WRITE);
+/*
+ * return the next unique lock request transaction ID
+ * (nfs_lock_mutex must be held)
+ */
+uint64_t
+nfs_lockxid_get(void)
+{
+	LOCKD_MSG_REQUEST *mr;
 
-		while (nfslockdfifolock & NFSLOCKDFIFOLOCK_LOCKED) {
-			nfslockdfifolock |= NFSLOCKDFIFOLOCK_WANT;
-			if (tsleep((void *)&nfslockdfifolock, PCATCH | PUSER, "lockdfifo", 20*hz))
-				break;
+	/* derive initial lock xid from system time */
+	if (!nfs_lockxid) {
+		/*
+		 * Note: it's OK if this code inits nfs_lockxid to 0 (for example,
+		 * due to a broken clock) because we immediately increment it
+		 * and we guarantee to never use xid 0.  So, nfs_lockxid should only
+		 * ever be 0 the first time this function is called.
+		 */
+		struct timeval tv;
+		microtime(&tv);
+		nfs_lockxid = (uint64_t)tv.tv_sec << 12;
+	}
+
+	/* make sure we get a unique xid */
+	do {
+		/* Skip zero xid if it should ever happen.  */
+		if (++nfs_lockxid == 0)
+			nfs_lockxid++;
+		if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) ||
+		     (mr->lmr_msg.lm_xid < nfs_lockxid)) {
+			/* fast path: empty queue or new largest xid */
+			break;
 		}
-		nfslockdfifolock |= NFSLOCKDFIFOLOCK_LOCKED;
+		/* check if xid is already in use */
+	} while (nfs_lockdmsg_find_by_xid(nfs_lockxid));
 
-		error = vn_rdwr(UIO_WRITE, wvp, (caddr_t)&msg, sizeof(msg), 0,
-		    UIO_SYSSPACE, ioflg, kernproc->p_ucred, NULL, p);
+	return nfs_lockxid;
+}
 
-		nfslockdfifowritten = 1;
+#define MACH_MAX_TRIES 3
 
-		nfslockdfifolock &= ~NFSLOCKDFIFOLOCK_LOCKED;
-		if (nfslockdfifolock & NFSLOCKDFIFOLOCK_WANT) {
-			nfslockdfifolock &= ~NFSLOCKDFIFOLOCK_WANT;
-			wakeup((void *)&nfslockdfifolock);
-		}
-		/* wake up lock daemon */
-		if (nfslockdwaiting)
-			(void)wakeup((void *)&nfslockdwaiting);
+int
+nfs_lockd_send_request(LOCKD_MSG *msg, int interruptable)
+{
+	kern_return_t kr;
+	int retries = 0;
+	mach_port_t lockd_port = IPC_PORT_NULL;
+
+	kr = host_get_lockd_port(host_priv_self(), &lockd_port);
+	if (kr != KERN_SUCCESS || !IPC_PORT_VALID(lockd_port))
+		return (ENOTSUP);
+
+	do {
+		/* In the kernel all mach messaging is interruptable */
+		do {
+			kr = lockd_request(
+				lockd_port,
+				msg->lm_version,
+				msg->lm_flags,
+				msg->lm_xid,
+				msg->lm_fl.l_start,
+				msg->lm_fl.l_len,
+				msg->lm_fl.l_pid,
+				msg->lm_fl.l_type,
+				msg->lm_fl.l_whence,
+				(uint32_t *)&msg->lm_addr,
+				(uint32_t *)&msg->lm_cred,
+				msg->lm_fh_len,
+				msg->lm_fh);
+			if (kr != KERN_SUCCESS)
+				printf("lockd_request received %d!\n", kr);
+		} while (!interruptable && kr == MACH_SEND_INTERRUPTED);
+	} while (kr == MIG_SERVER_DIED && retries++ < MACH_MAX_TRIES);
+
+	ipc_port_release_send(lockd_port);
+	switch (kr) {
+	case MACH_SEND_INTERRUPTED: 
+		return (EINTR);
+	default:
+		/*
+		 * Other MACH or MIG errors we will retry. Eventually
+		 * we will call nfs_down and allow the user to disable 
+		 * locking.
+		 */
+		return (EAGAIN);
+	}
+	return (kr);
+}
+				
+
+/*
+ * NFS advisory byte-level locks (client)
+ */
+int
+nfs3_lockd_request(
+	nfsnode_t np,
+	int type,
+	LOCKD_MSG_REQUEST *msgreq,
+	int flags,
+	thread_t thd)
+{
+	LOCKD_MSG *msg = &msgreq->lmr_msg;
+	int error, error2;
+	int interruptable, slpflag;
+	struct nfsmount *nmp;
+	struct timeval now;
+	int timeo, starttime, endtime, lastmsg, wentdown = 0;
+	struct timespec ts;
+	struct sockaddr *saddr;
 
-		if (error && (((ioflg & IO_NDELAY) == 0) || error != EAGAIN)) {
+	nmp = NFSTONMP(np);
+	if (!nmp || !nmp->nm_saddr)
+		return (ENXIO);
+
+	lck_mtx_lock(&nmp->nm_lock);
+	saddr = nmp->nm_saddr;
+	bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
+	if (nmp->nm_vers == NFS_VER3)
+		msg->lm_flags |= LOCKD_MSG_NFSV3;
+#if 0 /* not yet */
+	if (nmp->nm_sotype != SOCK_DGRAM)
+		msg->lm_flags |= LOCKD_MSG_TCP;
+#endif
+
+	microuptime(&now);
+	starttime = now.tv_sec;
+	lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
+	interruptable = NMFLAG(nmp, INTR);
+	lck_mtx_unlock(&nmp->nm_lock);
+
+	lck_mtx_lock(nfs_lock_mutex);
+
+	/* allocate unique xid */
+	msg->lm_xid = nfs_lockxid_get();
+	nfs_lockdmsg_enqueue(msgreq);
+
+	timeo = 4;
+
+	for (;;) {
+		nfs_lockd_request_sent = 1;
+
+		/* need to drop nfs_lock_mutex while calling nfs_lockd_send_request() */
+		lck_mtx_unlock(nfs_lock_mutex);
+		error = nfs_lockd_send_request(msg, interruptable);
+		lck_mtx_lock(nfs_lock_mutex);
+		if (error && error != EAGAIN)
 			break;
-		}
+
 		/*
-		 * If we're locking a file, wait for an answer.  Unlocks succeed
-		 * immediately.
+		 * Always wait for an answer.  Not waiting for unlocks could
+		 * cause a lock to be left if the unlock request gets dropped.
 		 */
-		if (fl->l_type == F_UNLCK)
-			/*
-			 * XXX this isn't exactly correct.  The client side
-			 * needs to continue sending it's unlock until
-			 * it gets a response back.
-			 */
-			break;
 
 		/*
-		 * retry after 20 seconds if we haven't gotten a response yet.
-		 * This number was picked out of thin air... but is longer
-		 * then even a reasonably loaded system should take (at least
-		 * on a local network).  XXX Probably should use a back-off
-		 * scheme.
+		 * Retry if it takes too long to get a response.
+		 *
+		 * The timeout numbers were picked out of thin air... they start
+		 * at 4 and double each timeout with a max of 30 seconds.
+		 *
+		 * In order to maintain responsiveness, we pass a small timeout
+		 * to msleep and calculate the timeouts ourselves.  This allows
+		 * us to pick up on mount changes quicker.
 		 */
-		if ((error = tsleep((void *)ut->uu_nlminfo,
-				    PCATCH | PUSER, "lockd", 20*hz)) != 0) {
-			if (error == EWOULDBLOCK) {
+wait_for_granted:
+		error = EWOULDBLOCK;
+		slpflag = (interruptable && (type != F_UNLCK)) ? PCATCH : 0;
+		ts.tv_sec = 2;
+		ts.tv_nsec = 0;
+		microuptime(&now);
+		endtime = now.tv_sec + timeo;
+		while (now.tv_sec < endtime) {
+			error = error2 = 0;
+			if (!msgreq->lmr_answered) {
+				error = msleep(msgreq, nfs_lock_mutex, slpflag | PUSER, "lockd", &ts);
+				slpflag = 0;
+			}
+			if (msgreq->lmr_answered) {
 				/*
-				 * We timed out, so we rewrite the request
-				 * to the fifo, but only if it isn't already
-				 * full.
+				 * Note: it's possible to have a lock granted at
+				 * essentially the same time that we get interrupted.
+				 * Since the lock may be granted, we can't return an
+				 * error from this request or we might not unlock the
+				 * lock that's been granted.
 				 */
-				ioflg |= IO_NDELAY;
+				nmp = NFSTONMP(np);
+				if ((msgreq->lmr_errno == ENOTSUP) && nmp &&
+				    (nmp->nm_state & NFSSTA_LOCKSWORK)) {
+					/*
+					 * We have evidence that locks work, yet lockd
+					 * returned ENOTSUP.  This is probably because
+					 * it was unable to contact the server's lockd
+					 * to send it the request.
+					 *
+					 * Because we know locks work, we'll consider
+					 * this failure to be a timeout.
+					 */
+					error = EWOULDBLOCK;
+				} else {
+					error = 0;
+				}
+				break;
+			}
+			if (error != EWOULDBLOCK)
+				break;
+			/* check that we still have our mount... */
+			/* ...and that we still support locks */
+			/* ...and that there isn't a recovery pending */
+			nmp = NFSTONMP(np);
+			if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
+				error = error2;
+				if (type == F_UNLCK)
+					printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
+				break;
+			}
+			lck_mtx_lock(&nmp->nm_lock);
+			if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
+				lck_mtx_unlock(&nmp->nm_lock);
+				break;
+			}
+			if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
+				/* recovery pending... return an error that'll get this operation restarted */
+				error = NFSERR_GRACE;
+				lck_mtx_unlock(&nmp->nm_lock);
+				break;
+			}
+			interruptable = NMFLAG(nmp, INTR);
+			lck_mtx_unlock(&nmp->nm_lock);
+			microuptime(&now);
+		}
+		if (error) {
+			/* check that we still have our mount... */
+			nmp = NFSTONMP(np);
+			if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
+				error = error2;
+				if (error2 != EINTR) {
+					if (type == F_UNLCK)
+						printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
+					break;
+				}
+			}
+			/* ...and that we still support locks */
+			lck_mtx_lock(&nmp->nm_lock);
+			if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
+				if (error == EWOULDBLOCK)
+					error = ENOTSUP;
+				lck_mtx_unlock(&nmp->nm_lock);
+				break;
+			}
+			/* ...and that there isn't a recovery pending */
+			if ((error == EWOULDBLOCK) && (nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
+				/* recovery pending... return to allow recovery to occur */
+				error = NFSERR_DENIED;
+				lck_mtx_unlock(&nmp->nm_lock);
+				break;
+			}
+			interruptable = NMFLAG(nmp, INTR);
+			if ((error != EWOULDBLOCK) ||
+			    ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) ||
+			    ((flags & R_RECOVER) && ((now.tv_sec - starttime) > 30))) {
+				if ((error == EWOULDBLOCK) && (flags & R_RECOVER)) {
+					/* give up if this is for recovery and taking too long */
+					error = ETIMEDOUT;
+				} else if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
+					/* recovery pending... return an error that'll get this operation restarted */
+					error = NFSERR_GRACE;
+				}
+				lck_mtx_unlock(&nmp->nm_lock);
+				/*
+				 * We're going to bail on this request.
+				 * If we were a blocked lock request, send a cancel.
+				 */
+				if ((msgreq->lmr_errno == EINPROGRESS) &&
+				    !(msg->lm_flags & LOCKD_MSG_CANCEL)) {
+					/* set this request up as a cancel */
+					msg->lm_flags |= LOCKD_MSG_CANCEL;
+					nfs_lockdmsg_dequeue(msgreq);
+					msg->lm_xid = nfs_lockxid_get();
+					nfs_lockdmsg_enqueue(msgreq);
+					msgreq->lmr_saved_errno = error;
+					msgreq->lmr_errno = 0;
+					msgreq->lmr_answered = 0;
+					/* reset timeout */
+					timeo = 2;
+					/* send cancel request */
+					continue;
+				}
+				break;
+			}
+
+			/* warn if we're not getting any response */
+			microuptime(&now);
+			if ((msgreq->lmr_errno != EINPROGRESS) &&
+			    !(msg->lm_flags & LOCKD_MSG_DENIED_GRACE) &&
+			    (nmp->nm_tprintf_initial_delay != 0) &&
+			    ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
+				lck_mtx_unlock(&nmp->nm_lock);
+				lastmsg = now.tv_sec;
+				nfs_down(nmp, thd, 0, NFSSTA_LOCKTIMEO, "lockd not responding");
+				wentdown = 1;
+			} else
+				lck_mtx_unlock(&nmp->nm_lock);
+
+			if (msgreq->lmr_errno == EINPROGRESS) {
+				/*
+				 * We've got a blocked lock request that we are
+				 * going to retry.  First, we'll want to try to
+				 * send a cancel for the previous request.
+				 *
+				 * Clear errno so if we don't get a response
+				 * to the resend we'll call nfs_down().
+				 * Also reset timeout because we'll expect a
+				 * quick response to the cancel/resend (even if
+				 * it is NLM_BLOCKED).
+				 */
+				msg->lm_flags |= LOCKD_MSG_CANCEL;
+				nfs_lockdmsg_dequeue(msgreq);
+				msg->lm_xid = nfs_lockxid_get();
+				nfs_lockdmsg_enqueue(msgreq);
+				msgreq->lmr_saved_errno = msgreq->lmr_errno;
+				msgreq->lmr_errno = 0;
+				msgreq->lmr_answered = 0;
+				timeo = 2;
+				/* send cancel then resend request */
 				continue;
 			}
 
+			/*
+			 * We timed out, so we will resend the request.
+			 */
+			if (!(flags & R_RECOVER))
+				timeo *= 2;
+			if (timeo > 30)
+				timeo = 30;
+			/* resend request */
+			continue;
+		}
+
+		/* we got a reponse, so the server's lockd is OK */
+		nfs_up(NFSTONMP(np), thd, NFSSTA_LOCKTIMEO,
+			wentdown ? "lockd alive again" : NULL);
+		wentdown = 0;
+
+		if (msgreq->lmr_answered && (msg->lm_flags & LOCKD_MSG_DENIED_GRACE)) {
+			/*
+			 * The lock request was denied because the server lockd is
+			 * still in its grace period.  So, we need to try the
+			 * request again in a little bit.  Return the GRACE error so
+			 * the higher levels can perform the retry.
+			 */
+			msgreq->lmr_saved_errno = msgreq->lmr_errno = error = NFSERR_GRACE;
+		}
+
+		if (msgreq->lmr_errno == EINPROGRESS) {
+			/* got NLM_BLOCKED response */
+			/* need to wait for NLM_GRANTED */
+			timeo = 30;
+			msgreq->lmr_answered = 0;
+			goto wait_for_granted;
+		}
+
+		if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
+		    (msgreq->lmr_saved_errno == EINPROGRESS)) {
+			/*
+			 * We just got a successful reply to the
+			 * cancel of the previous blocked lock request.
+			 * Now, go ahead and return a DENIED error so the
+			 * higher levels can resend the request.
+			 */
+			msg->lm_flags &= ~LOCKD_MSG_CANCEL;
+			nfs_lockdmsg_dequeue(msgreq);
+			error = NFSERR_DENIED;
 			break;
 		}
 
-		if (msg.lm_getlk && ut->uu_nlminfo->retcode == 0) {
-			if (ut->uu_nlminfo->set_getlk) {
-				fl->l_pid = ut->uu_nlminfo->getlk_pid;
-				fl->l_start = ut->uu_nlminfo->getlk_start;
-				fl->l_len = ut->uu_nlminfo->getlk_len;
-				fl->l_whence = SEEK_SET;
-			} else {
-				fl->l_type = F_UNLCK;
+		/*
+		 * If the blocked lock request was cancelled.
+		 * Restore the error condition from when we
+		 * originally bailed on the request.
+		 */
+		if (msg->lm_flags & LOCKD_MSG_CANCEL) {
+			msg->lm_flags &= ~LOCKD_MSG_CANCEL;
+			error = msgreq->lmr_saved_errno;
+		} else {
+			error = msgreq->lmr_errno;
+		}
+
+		nmp = NFSTONMP(np);
+		if ((error == ENOTSUP) && nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) {
+			/*
+			 * We have NO evidence that locks work and lockd
+			 * returned ENOTSUP.  Let's take this as a hint
+			 * that locks aren't supported and disable them
+			 * for this mount.
+			 */
+			nfs_lockdmsg_dequeue(msgreq);
+			lck_mtx_unlock(nfs_lock_mutex);
+			lck_mtx_lock(&nmp->nm_lock);
+			if (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED) {
+				nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
+				nfs_lockd_mount_unregister(nmp);
+			}
+			nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
+			lck_mtx_unlock(&nmp->nm_lock);
+			printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n",
+				vfs_statfs(nmp->nm_mountp)->f_mntfromname);
+			return (error);
+		}
+		if (!error) {
+			/* record that NFS file locking has worked on this mount */
+			if (nmp) {
+				lck_mtx_lock(&nmp->nm_lock);
+				if (!(nmp->nm_state & NFSSTA_LOCKSWORK))
+					nmp->nm_state |= NFSSTA_LOCKSWORK;
+				lck_mtx_unlock(&nmp->nm_lock);
 			}
 		}
-		error = ut->uu_nlminfo->retcode;
 		break;
 	}
 
-	/* XXX stats */
-	nfsadvlocks++;
-	microuptime(&elapsed);
-	timevalsub(&elapsed, &ut->uu_nlminfo->nlm_lockstart);
-	if (timevalcmp(&elapsed, &nfsadvlock_longest, >))
-		nfsadvlock_longest = elapsed;
-	timevaladd(&nfsadvlocks_time, &elapsed);
-	timerclear(&ut->uu_nlminfo->nlm_lockstart);
-
-	error1 = vn_close(wvp, FWRITE, kernproc->p_ucred, p);
-	/* prefer any previous 'error' to our vn_close 'error1'. */
-	return (error != 0 ? error : error1);
+	nfs_lockdmsg_dequeue(msgreq);
+
+	lck_mtx_unlock(nfs_lock_mutex);
+
+	return (error);
 }
 
 /*
- * nfslockdans --
- *      NFS advisory byte-level locks answer from the lock daemon.
+ * Send an NLM LOCK message to the server
  */
 int
-nfslockdans(struct proc *p, struct lockd_ans *ansp)
+nfs3_setlock_rpc(
+	nfsnode_t np,
+	struct nfs_open_file *nofp,
+	struct nfs_file_lock *nflp,
+	int reclaim,
+	int flags,
+	thread_t thd,
+	kauth_cred_t cred)
 {
-	struct proc *targetp;
-	struct uthread *targetut, *uth;
+	struct nfs_lock_owner *nlop = nflp->nfl_owner;
+	struct nfsmount *nmp;
 	int error;
+	LOCKD_MSG_REQUEST msgreq;
+	LOCKD_MSG *msg;
 
-	/*
-	 * Let root, or someone who once was root (lockd generally
-	 * switches to the daemon uid once it is done setting up) make
-	 * this call.
-	 *
-	 * XXX This authorization check is probably not right.
-	 */
-	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0 &&
-	    p->p_cred->p_svuid != 0)
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+
+	if (!nlop->nlo_open_owner) {
+		nfs_open_owner_ref(nofp->nof_owner);
+		nlop->nlo_open_owner = nofp->nof_owner;
+	}
+	if ((error = nfs_lock_owner_set_busy(nlop, thd)))
 		return (error);
 
-	/* the version should match, or we're out of sync */
-	if (ansp->la_vers != LOCKD_ANS_VERSION)
-		return (EINVAL);
+	/* set up lock message request structure */
+	bzero(&msgreq, sizeof(msgreq));
+	msg = &msgreq.lmr_msg;
+	msg->lm_version = LOCKD_MSG_VERSION;
+	if ((nflp->nfl_flags & NFS_FILE_LOCK_WAIT) && !reclaim)
+		msg->lm_flags |= LOCKD_MSG_BLOCK;
+	if (reclaim)
+		msg->lm_flags |= LOCKD_MSG_RECLAIM;
+	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
+	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
+	cru2x(cred, &msg->lm_cred);
+
+	msg->lm_fl.l_whence = SEEK_SET;
+	msg->lm_fl.l_start = nflp->nfl_start;
+	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end);
+	msg->lm_fl.l_type = nflp->nfl_type;
+	msg->lm_fl.l_pid = nlop->nlo_pid;
+
+	error = nfs3_lockd_request(np, 0, &msgreq, flags, thd);
+
+	nfs_lock_owner_clear_busy(nlop);
+	return (error);
+}
 
-	/* Find the process & thread */
-	if ((targetp = pfind(ansp->la_msg_ident.pid)) == NULL)
-		return (ESRCH);
-	targetut = ansp->la_msg_ident.ut;
-	TAILQ_FOREACH(uth, &targetp->p_uthlist, uu_list) {
-		if (uth == targetut)
-			break;
-	}
-	/*
-	 * Verify the pid hasn't been reused (if we can), and it isn't waiting
-	 * for an answer from a more recent request.  We return an EPIPE if
-	 * the match fails, because we've already used ESRCH above, and this
-	 * is sort of like writing on a pipe after the reader has closed it.
-	 * If only the seq# is off, don't return an error just return.  It could
-	 * just be a response to a retransmitted request.
-	 */
-	if (uth == NULL || uth != targetut || targetut->uu_nlminfo == NULL)
-		return (EPIPE);
-	if (ansp->la_msg_ident.msg_seq != -1) {
-		if (timevalcmp(&targetut->uu_nlminfo->pid_start,
-		               &ansp->la_msg_ident.pid_start, !=))
-			return (EPIPE);
-		if (targetut->uu_nlminfo->msg_seq != ansp->la_msg_ident.msg_seq)
-			return (0);
-	}
+/*
+ * Send an NLM UNLOCK message to the server
+ */
+int
+nfs3_unlock_rpc(
+	nfsnode_t np,
+	struct nfs_lock_owner *nlop,
+	__unused int type,
+	uint64_t start,
+	uint64_t end,
+	int flags,
+	thread_t thd,
+	kauth_cred_t cred)
+{
+	struct nfsmount *nmp;
+	LOCKD_MSG_REQUEST msgreq;
+	LOCKD_MSG *msg;
 
-	/* Found the thread, so set its return errno and wake it up. */
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
 
-	targetut->uu_nlminfo->retcode = ansp->la_errno;
-	targetut->uu_nlminfo->set_getlk = ansp->la_getlk_set;
-	targetut->uu_nlminfo->getlk_pid = ansp->la_getlk_pid;
-	targetut->uu_nlminfo->getlk_start = ansp->la_getlk_start;
-	targetut->uu_nlminfo->getlk_len = ansp->la_getlk_len;
+	/* set up lock message request structure */
+	bzero(&msgreq, sizeof(msgreq));
+	msg = &msgreq.lmr_msg;
+	msg->lm_version = LOCKD_MSG_VERSION;
+	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
+	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
+	cru2x(cred, &msg->lm_cred);
+
+	msg->lm_fl.l_whence = SEEK_SET;
+	msg->lm_fl.l_start = start;
+	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
+	msg->lm_fl.l_type = F_UNLCK;
+	msg->lm_fl.l_pid = nlop->nlo_pid;
+
+	return (nfs3_lockd_request(np, F_UNLCK, &msgreq, flags, thd));
+}
 
-	(void)wakeup((void *)targetut->uu_nlminfo);
+/*
+ * Send an NLM LOCK TEST message to the server
+ */
+int
+nfs3_getlock_rpc(
+	nfsnode_t np,
+	struct nfs_lock_owner *nlop,
+	struct flock *fl,
+	uint64_t start,
+	uint64_t end,
+	vfs_context_t ctx)
+{
+	struct nfsmount *nmp;
+	int error;
+	LOCKD_MSG_REQUEST msgreq;
+	LOCKD_MSG *msg;
 
-	return (0);
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+
+	/* set up lock message request structure */
+	bzero(&msgreq, sizeof(msgreq));
+	msg = &msgreq.lmr_msg;
+	msg->lm_version = LOCKD_MSG_VERSION;
+	msg->lm_flags |= LOCKD_MSG_TEST;
+	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
+	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
+	cru2x(vfs_context_ucred(ctx), &msg->lm_cred);
+
+	msg->lm_fl.l_whence = SEEK_SET;
+	msg->lm_fl.l_start = start;
+	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
+	msg->lm_fl.l_type = fl->l_type;
+	msg->lm_fl.l_pid = nlop->nlo_pid;
+
+	error = nfs3_lockd_request(np, 0, &msgreq, 0, vfs_context_thread(ctx));
+
+	if (!error && (msg->lm_flags & LOCKD_MSG_TEST) && !msgreq.lmr_errno) {
+		if (msg->lm_fl.l_type != F_UNLCK) {
+			fl->l_type = msg->lm_fl.l_type;
+			fl->l_pid = msg->lm_fl.l_pid;
+			fl->l_start = msg->lm_fl.l_start;
+			fl->l_len = msg->lm_fl.l_len;
+			fl->l_whence = SEEK_SET;
+		} else
+			fl->l_type = F_UNLCK;
+	}
+
+	return (error);
 }
 
 /*
- * nfslockdfd --
- *      NFS advisory byte-level locks: fifo file# from the lock daemon.
+ * nfslockdans --
+ *      NFS advisory byte-level locks answer from the lock daemon.
  */
 int
-nfslockdfd(struct proc *p, int fd)
+nfslockdans(proc_t p, struct lockd_ans *ansp)
 {
+	LOCKD_MSG_REQUEST *msgreq;
 	int error;
-	struct file *fp, *ofp;
 
-	error = suser(p->p_ucred, &p->p_acflag);
+	/* Let root make this call. */
+	error = proc_suser(p);
 	if (error)
 		return (error);
-	if (fd < 0) {
-		fp = 0;
-	} else {
-		error = getvnode(p, fd, &fp);
-		if (error)
-			return (error);
-		(void)fref(fp);
+
+	/* the version should match, or we're out of sync */
+	if (ansp->la_version != LOCKD_ANS_VERSION)
+		return (EINVAL);
+
+	lck_mtx_lock(nfs_lock_mutex);
+
+	/* try to find the lockd message by transaction id (cookie) */
+	msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
+	if (ansp->la_flags & LOCKD_ANS_GRANTED) {
+		/*
+		 * We can't depend on the granted message having our cookie,
+		 * so we check the answer against the lockd message found.
+		 * If no message was found or it doesn't match the answer,
+		 * we look for the lockd message by the answer's lock info.
+		 */
+		if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp))
+			msgreq = nfs_lockdmsg_find_by_answer(ansp);
+		/*
+		 * We need to make sure this request isn't being cancelled
+		 * If it is, we don't want to accept the granted message.
+		 */
+		if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL))
+			msgreq = NULL;
+	}
+	if (!msgreq) {
+		lck_mtx_unlock(nfs_lock_mutex);
+		return (EPIPE);
+	}
+
+	msgreq->lmr_errno = ansp->la_errno;
+	if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) {
+		if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) {
+			if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL)
+				msgreq->lmr_msg.lm_fl.l_type = F_WRLCK;
+			else
+				msgreq->lmr_msg.lm_fl.l_type = F_RDLCK;
+			msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid;
+			msgreq->lmr_msg.lm_fl.l_start = ansp->la_start;
+			msgreq->lmr_msg.lm_fl.l_len = ansp->la_len;
+		} else {
+			msgreq->lmr_msg.lm_fl.l_type = F_UNLCK;
+		}
 	}
-	ofp = nfslockdfp;
-	nfslockdfp = fp;
-	if (ofp)
-		(void)frele(ofp);
-	nfslockdpid = nfslockdfp ? p->p_pid : 0;
-	(void)wakeup((void *)&nfslockdfp);
+	if (ansp->la_flags & LOCKD_ANS_DENIED_GRACE)
+		msgreq->lmr_msg.lm_flags |= LOCKD_MSG_DENIED_GRACE;
+
+	msgreq->lmr_answered = 1;
+	lck_mtx_unlock(nfs_lock_mutex);
+	wakeup(msgreq);
+
 	return (0);
 }
 
 /*
- * nfslockdwait --
- *      lock daemon waiting for lock request
+ * nfslockdnotify --
+ *      NFS host restart notification from the lock daemon.
+ *
+ * Used to initiate reclaiming of held locks when a server we
+ * have mounted reboots.
  */
 int
-nfslockdwait(struct proc *p)
+nfslockdnotify(proc_t p, user_addr_t argp)
 {
-	int error;
-	struct file *fp, *ofp;
+	int error, i, headsize;
+	struct lockd_notify ln;
+	struct nfsmount *nmp;
+	struct sockaddr *saddr;
 
-	if (p->p_pid != nfslockdpid) {
-		error = suser(p->p_ucred, &p->p_acflag);
+	/* Let root make this call. */
+	error = proc_suser(p);
+	if (error)
+		return (error);
+
+	headsize = (char*)&ln.ln_addr[0] - (char*)&ln.ln_version;
+	error = copyin(argp, &ln, headsize);
+	if (error)
+		return (error);
+	if (ln.ln_version != LOCKD_NOTIFY_VERSION)
+		return (EINVAL);
+	if ((ln.ln_addrcount < 1) || (ln.ln_addrcount > 128))
+		return (EINVAL);
+	argp += headsize;
+	saddr = (struct sockaddr *)&ln.ln_addr[0];
+
+	lck_mtx_lock(nfs_lock_mutex);
+
+	for (i=0; i < ln.ln_addrcount; i++) {
+		error = copyin(argp, &ln.ln_addr[0], sizeof(ln.ln_addr[0]));
 		if (error)
-			return (error);
-	}
-	if (nfslockdwaiting)
-		return (EBUSY);
-	if (nfslockdfifowritten) {
-		nfslockdfifowritten = 0;
-		return (0);
+			break;
+		argp += sizeof(ln.ln_addr[0]);
+		/* scan lockd mount list for match to this address */
+		TAILQ_FOREACH(nmp, &nfs_lockd_mount_list, nm_ldlink) {
+			/* check if address matches this mount's server address */
+			if (!nmp->nm_saddr || nfs_sockaddr_cmp(saddr, nmp->nm_saddr))
+				continue;
+			/* We have a match!  Mark it as needing recovery. */
+			lck_mtx_lock(&nmp->nm_lock);
+			nfs_need_recover(nmp, 0);
+			lck_mtx_unlock(&nmp->nm_lock);
+		}
 	}
 
-	nfslockdwaiting = 1;
-	tsleep((void *)&nfslockdwaiting, PCATCH | PUSER, "lockd", 0);
-	nfslockdwaiting = 0;
+	lck_mtx_unlock(nfs_lock_mutex);
 
-	return (0);
+	return (error);
 }
+