X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/6d2010ae8f7a6078e10b361c6962983bab233e0f..c6bf4f310a33a9262d455ea4d3f0630b1255e3fe:/bsd/kern/kern_lockf.c?ds=sidebyside

diff --git a/bsd/kern/kern_lockf.c b/bsd/kern/kern_lockf.c
index b7775864c..d67a8f84b 100644
--- a/bsd/kern/kern_lockf.c
+++ b/bsd/kern/kern_lockf.c
@@ -1,8 +1,8 @@
 /*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2019 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
@@ -11,10 +11,10 @@
  * unlawful or unlicensed copies of an Apple operating system, or to
  * circumvent, violate, or enable the circumvention or violation of, any
  * terms of an Apple operating system software license agreement.
- * 
+ *
  * Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -22,7 +22,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
+ *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
@@ -75,42 +75,55 @@
 #include <sys/malloc.h>
 #include <sys/fcntl.h>
 #include <sys/lockf.h>
+#include <sys/sdt.h>
+#include <kern/policy_internal.h>
 
-/*
- * This variable controls the maximum number of processes that will
- * be checked in doing deadlock detection.
- */
-static int maxlockdepth = MAXDEPTH;
+#include <sys/file_internal.h>
+
+#if (DEVELOPMENT || DEBUG)
+#define LOCKF_DEBUGGING 1
+#endif
 
 #ifdef LOCKF_DEBUGGING
 #include <sys/sysctl.h>
-#include <ufs/ufs/quota.h>
-#include <ufs/ufs/inode.h>
 void lf_print(const char *tag, struct lockf *lock);
 void lf_printlist(const char *tag, struct lockf *lock);
-static int	lockf_debug = 2;
+
+#define LF_DBG_LOCKOP   (1 << 0)        /* setlk, getlk, clearlk */
+#define LF_DBG_LIST     (1 << 1)        /* split, coalesce */
+#define LF_DBG_IMPINH   (1 << 2)        /* importance inheritance */
+#define LF_DBG_TRACE    (1 << 3)        /* errors, exit */
+#define LF_DBG_DEADLOCK (1 << 4)        /* deadlock detection */
+
+static int      lockf_debug = 0;        /* was 2, could be 3 ;-) */
 SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &lockf_debug, 0, "");
 
 /*
- * If there is no mask bit selector, or there is on, and the selector is
+ * If there is no mask bit selector, or there is one, and the selector is
  * set, then output the debugging diagnostic.
  */
-#define LOCKF_DEBUG(mask, ...)					\
-	do {							\
-		if( !(mask) || ((mask) & lockf_debug)) {	\
-			printf(__VA_ARGS__);			\
-		}						\
+#define LOCKF_DEBUG(mask, ...)                                  \
+	do {                                                    \
+	        if (!(mask) || ((mask) & lockf_debug)) {        \
+	                printf("%s>", __FUNCTION__);            \
+	                printf(__VA_ARGS__);                    \
+	        }                                               \
 	} while(0)
-#else	/* !LOCKF_DEBUGGING */
-#define LOCKF_DEBUG(mask, ...)		/* mask */
-#endif	/* !LOCKF_DEBUGGING */
+
+#define LOCKF_DEBUGP(mask)                                      \
+	({                                                      \
+	        ((mask) & lockf_debug);                         \
+	})
+#else   /* !LOCKF_DEBUGGING */
+#define LOCKF_DEBUG(mask, ...)          /* mask */
+#endif  /* !LOCKF_DEBUGGING */
 
 MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
 
 #define NOLOCKF (struct lockf *)0
-#define SELF	0x1
-#define OTHERS	0x2
-#define OFF_MAX	0x7fffffffffffffffULL	/* max off_t */
+#define SELF    0x1
+#define OTHERS  0x2
+#define OFF_MAX 0x7fffffffffffffffULL   /* max off_t */
 
 /*
  * Overlapping lock states
@@ -124,17 +137,31 @@ typedef enum {
 	OVERLAP_ENDS_AFTER_LOCK
 } overlap_t;
 
-static int	 lf_clearlock(struct lockf *);
+static int       lf_clearlock(struct lockf *);
 static overlap_t lf_findoverlap(struct lockf *,
-	    struct lockf *, int, struct lockf ***, struct lockf **);
-static struct lockf *lf_getblock(struct lockf *);
-static int	 lf_getlock(struct lockf *, struct flock *);
-#if CONFIG_EMBEDDED
-static int	 lf_getlockpid(struct vnode *, struct flock *);
-#endif
-static int	 lf_setlock(struct lockf *);
-static int	 lf_split(struct lockf *, struct lockf *);
-static void	 lf_wakelock(struct lockf *, boolean_t);
+    struct lockf *, int, struct lockf ***, struct lockf **);
+static struct lockf *lf_getblock(struct lockf *, pid_t);
+static int       lf_getlock(struct lockf *, struct flock *, pid_t);
+static int       lf_setlock(struct lockf *, struct timespec *);
+static int       lf_split(struct lockf *, struct lockf *);
+static void      lf_wakelock(struct lockf *, boolean_t);
+#if IMPORTANCE_INHERITANCE
+static void      lf_hold_assertion(task_t, struct lockf *);
+static void      lf_jump_to_queue_head(struct lockf *, struct lockf *);
+static void      lf_drop_assertion(struct lockf *);
+static void      lf_boost_blocking_proc(struct lockf *, struct lockf *);
+static void      lf_adjust_assertion(struct lockf *block);
+#endif /* IMPORTANCE_INHERITANCE */
+
+static lck_mtx_t lf_dead_lock;
+static lck_grp_t *lf_dead_lock_grp;
+
+void
+lf_init(void)
+{
+	lf_dead_lock_grp = lck_grp_alloc_init("lf_dead_lock", LCK_GRP_ATTR_NULL);
+	lck_mtx_init(&lf_dead_lock, lf_dead_lock_grp, LCK_ATTR_NULL);
+}
 
 /*
  * lf_advlock
@@ -153,6 +180,7 @@ static void	 lf_wakelock(struct lockf *, boolean_t);
  *	lf_setlock:EDEADLK
  *	lf_setlock:EINTR
  *	lf_setlock:ENOLCK
+ *	lf_setlock:ETIMEDOUT
  *	lf_clearlock:ENOLCK
  *	vnode_size:???
  *
@@ -174,19 +202,16 @@ lf_advlock(struct vnop_advlock_args *ap)
 
 	/* XXX HFS may need a !vnode_isreg(vp) EISDIR error here */
 
-#if CONFIG_EMBEDDED
-	if (ap->a_op == F_GETLKPID)
-		return lf_getlockpid(vp, fl);
-#endif
-
 	/*
 	 * Avoid the common case of unlocking when inode has no locks.
 	 */
 	if (*head == (struct lockf *)0) {
 		if (ap->a_op != F_SETLK) {
 			fl->l_type = F_UNLCK;
-			LOCKF_DEBUG(0, "lf_advlock: '%s' unlock without lock\n", vfs_context_proc(context)->p_comm);
-			return (0);
+			LOCKF_DEBUG(LF_DBG_TRACE,
+			    "lf_advlock: '%s' unlock without lock\n",
+			    vfs_context_proc(context)->p_comm);
+			return 0;
 		}
 	}
 
@@ -194,7 +219,6 @@ lf_advlock(struct vnop_advlock_args *ap)
 	 * Convert the flock structure into a start and end.
 	 */
 	switch (fl->l_whence) {
-
 	case SEEK_SET:
 	case SEEK_CUR:
 		/*
@@ -213,43 +237,49 @@ lf_advlock(struct vnop_advlock_args *ap)
 		 * do this because we will use size to force range checks.
 		 */
 		if ((error = vnode_size(vp, (off_t *)&size, context))) {
-			LOCKF_DEBUG(0, "lf_advlock: vnode_getattr failed: %d\n", error);
-			return (error);
+			LOCKF_DEBUG(LF_DBG_TRACE,
+			    "lf_advlock: vnode_getattr failed: %d\n", error);
+			return error;
 		}
 
 		if (size > OFF_MAX ||
 		    (fl->l_start > 0 &&
-		     size > (u_quad_t)(OFF_MAX - fl->l_start)))
-			return (EOVERFLOW);
+		    size > (u_quad_t)(OFF_MAX - fl->l_start))) {
+			return EOVERFLOW;
+		}
 		start = size + fl->l_start;
 		break;
 
 	default:
-		LOCKF_DEBUG(0, "lf_advlock: unknown whence %d\n", fl->l_whence);
-		return (EINVAL);
+		LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: unknown whence %d\n",
+		    fl->l_whence);
+		return EINVAL;
 	}
 	if (start < 0) {
-		LOCKF_DEBUG(0, "lf_advlock: start < 0 (%qd)\n", start);
-		return (EINVAL);
+		LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: start < 0 (%qd)\n",
+		    start);
+		return EINVAL;
 	}
 	if (fl->l_len < 0) {
 		if (start == 0) {
-			LOCKF_DEBUG(0, "lf_advlock: len < 0 & start == 0\n");
-			return (EINVAL);
+			LOCKF_DEBUG(LF_DBG_TRACE,
+			    "lf_advlock: len < 0 & start == 0\n");
+			return EINVAL;
 		}
 		end = start - 1;
 		start += fl->l_len;
 		if (start < 0) {
-			LOCKF_DEBUG(0, "lf_advlock: start < 0 (%qd)\n", start);
-			return (EINVAL);
+			LOCKF_DEBUG(LF_DBG_TRACE,
+			    "lf_advlock: start < 0 (%qd)\n", start);
+			return EINVAL;
 		}
-	} else if (fl->l_len == 0)
+	} else if (fl->l_len == 0) {
 		end = -1;
-	else {
+	} else {
 		oadd = fl->l_len - 1;
 		if (oadd > (off_t)(OFF_MAX - start)) {
-			LOCKF_DEBUG(0, "lf_advlock: overflow\n");
-			return (EOVERFLOW);
+			LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: overflow\n");
+			return EOVERFLOW;
 		}
 		end = start + oadd;
 	}
@@ -257,8 +287,9 @@ lf_advlock(struct vnop_advlock_args *ap)
 	 * Create the lockf structure
 	 */
 	MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
-	if (lock == NULL)
-		return (ENOLCK);
+	if (lock == NULL) {
+		return ENOLCK;
+	}
 	lock->lf_start = start;
 	lock->lf_end = end;
 	lock->lf_id = ap->a_id;
@@ -268,17 +299,40 @@ lf_advlock(struct vnop_advlock_args *ap)
 	lock->lf_next = (struct lockf *)0;
 	TAILQ_INIT(&lock->lf_blkhd);
 	lock->lf_flags = ap->a_flags;
+#if IMPORTANCE_INHERITANCE
+	lock->lf_boosted = LF_NOT_BOOSTED;
+#endif
+	if (ap->a_flags & F_POSIX) {
+		lock->lf_owner = (struct proc *)lock->lf_id;
+	} else {
+		lock->lf_owner = NULL;
+	}
 
-	if (ap->a_flags & F_FLOCK)
-	        lock->lf_flags |= F_WAKE1_SAFE;
+	if (ap->a_flags & F_FLOCK) {
+		lock->lf_flags |= F_WAKE1_SAFE;
+	}
 
-	lck_mtx_lock(&vp->v_lock);	/* protect the lockf list */
+	lck_mtx_lock(&vp->v_lock);      /* protect the lockf list */
 	/*
 	 * Do the requested operation.
 	 */
-	switch(ap->a_op) {
+	switch (ap->a_op) {
 	case F_SETLK:
-		error = lf_setlock(lock);
+		/*
+		 * For F_OFD_* locks, lf_id is the fileglob.
+		 * Record an "lf_owner" iff this is a confined fd
+		 * i.e. it cannot escape this process and will be
+		 * F_UNLCKed before the owner exits.  (This is
+		 * the implicit guarantee needed to ensure lf_owner
+		 * remains a valid reference here.)
+		 */
+		if (ap->a_flags & F_OFD_LOCK) {
+			struct fileglob *fg = (void *)lock->lf_id;
+			if (fg->fg_lflags & FG_CONFINED) {
+				lock->lf_owner = current_proc();
+			}
+		}
+		error = lf_setlock(lock, ap->a_timeout);
 		break;
 
 	case F_UNLCK:
@@ -287,7 +341,12 @@ lf_advlock(struct vnop_advlock_args *ap)
 		break;
 
 	case F_GETLK:
-		error = lf_getlock(lock, fl);
+		error = lf_getlock(lock, fl, -1);
+		FREE(lock, M_LOCKF);
+		break;
+
+	case F_GETLKPID:
+		error = lf_getlock(lock, fl, fl->l_pid);
 		FREE(lock, M_LOCKF);
 		break;
 
@@ -296,12 +355,43 @@ lf_advlock(struct vnop_advlock_args *ap)
 		error = EINVAL;
 		break;
 	}
-	lck_mtx_unlock(&vp->v_lock);	/* done manipulating the list */
+	lck_mtx_unlock(&vp->v_lock);    /* done manipulating the list */
 
-	LOCKF_DEBUG(0, "lf_advlock: normal exit: %d\n\n", error);
-	return (error);
+	LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: normal exit: %d\n", error);
+	return error;
 }
 
+/*
+ * Empty the queue of msleeping requests for a lock on the given vnode.
+ * Called with the vnode already locked.  Used for forced unmount, where
+ * a flock(2) invoker sleeping on a blocked lock holds an iocount reference
+ * that prevents the vnode from ever being drained.  Force unmounting wins.
+ */
+void
+lf_abort_advlocks(vnode_t vp)
+{
+	struct lockf *lock;
+
+	if ((lock = vp->v_lockf) == NULL) {
+		return;
+	}
+
+	lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED);
+
+	if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
+		struct lockf *tlock;
+
+		TAILQ_FOREACH(tlock, &lock->lf_blkhd, lf_block) {
+			/*
+			 * Setting this flag should cause all
+			 * currently blocked F_SETLK request to
+			 * return to userland with an errno.
+			 */
+			tlock->lf_flags |= F_ABORT;
+		}
+		lf_wakelock(lock, TRUE);
+	}
+}
 
 /*
  * Take any lock attempts which are currently blocked by a given lock ("from")
@@ -348,16 +438,14 @@ lf_coalesce_adjacent(struct lockf *lock)
 		}
 
 		/*
-		 * NOTE: Assumes that if two locks are adjacent on the number line 
+		 * NOTE: Assumes that if two locks are adjacent on the number line
 		 * and belong to the same owner, then they are adjacent on the list.
 		 */
-
-		/* If the lock ends adjacent to us, we can coelesce it */
 		if ((*lf)->lf_end != -1 &&
 		    ((*lf)->lf_end + 1) == lock->lf_start) {
 			struct lockf *adjacent = *lf;
 
-			LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent previous\n");
+			LOCKF_DEBUG(LF_DBG_LIST, "lf_coalesce_adjacent: coalesce adjacent previous\n");
 			lock->lf_start = (*lf)->lf_start;
 			*lf = lock;
 			lf = &(*lf)->lf_next;
@@ -372,7 +460,7 @@ lf_coalesce_adjacent(struct lockf *lock)
 		    (lock->lf_end + 1) == (*lf)->lf_start) {
 			struct lockf *adjacent = *lf;
 
-			LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent following\n");
+			LOCKF_DEBUG(LF_DBG_LIST, "lf_coalesce_adjacent: coalesce adjacent following\n");
 			lock->lf_end = (*lf)->lf_end;
 			lock->lf_next = (*lf)->lf_next;
 			lf = &lock->lf_next;
@@ -388,7 +476,6 @@ lf_coalesce_adjacent(struct lockf *lock)
 	}
 }
 
-
 /*
  * lf_setlock
  *
@@ -400,108 +487,178 @@ lf_coalesce_adjacent(struct lockf *lock)
  *					the set is successful, and freed if the
  *					set is unsuccessful.
  *
+ *		timeout			Timeout specified in the case of
+ *                                      SETLKWTIMEOUT.
+ *
  * Returns:	0			Success
  *		EAGAIN
  *		EDEADLK
  *	lf_split:ENOLCK
  *	lf_clearlock:ENOLCK
  *	msleep:EINTR
+ *	msleep:ETIMEDOUT
  *
  * Notes:	We add the lock to the provisional lock list.  We do not
  *		coalesce at this time; this has implications for other lock
  *		requestors in the blocker search mechanism.
  */
 static int
-lf_setlock(struct lockf *lock)
+lf_setlock(struct lockf *lock, struct timespec *timeout)
 {
 	struct lockf *block;
 	struct lockf **head = lock->lf_head;
 	struct lockf **prev, *overlap, *ltmp;
-	static char lockstr[] = "lockf";
+	static const char lockstr[] = "lockf";
 	int priority, needtolink, error;
 	struct vnode *vp = lock->lf_vnode;
 	overlap_t ovcase;
 
 #ifdef LOCKF_DEBUGGING
-	if (lockf_debug & 1) {
+	if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
 		lf_print("lf_setlock", lock);
 		lf_printlist("lf_setlock(in)", lock);
 	}
 #endif /* LOCKF_DEBUGGING */
+	LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p Looking for deadlock, vnode %p\n", lock, lock->lf_vnode);
 
 	/*
 	 * Set the priority
 	 */
 	priority = PLOCK;
-	if (lock->lf_type == F_WRLCK)
+	if (lock->lf_type == F_WRLCK) {
 		priority += 4;
+	}
 	priority |= PCATCH;
+scan:
 	/*
 	 * Scan lock list for this file looking for locks that would block us.
 	 */
-	while ((block = lf_getblock(lock))) {
+	while ((block = lf_getblock(lock, -1))) {
 		/*
 		 * Free the structure and return if nonblocking.
 		 */
 		if ((lock->lf_flags & F_WAIT) == 0) {
+			DTRACE_FSINFO(advlock__nowait, vnode_t, vp);
 			FREE(lock, M_LOCKF);
-			return (EAGAIN);
+			return EAGAIN;
 		}
 
+		LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p found blocking lock %p\n", lock, block);
+
 		/*
 		 * We are blocked. Since flock style locks cover
 		 * the whole file, there is no chance for deadlock.
-		 * For byte-range locks we must check for deadlock.
+		 *
+		 * OFD byte-range locks currently do NOT support
+		 * deadlock detection.
+		 *
+		 * For POSIX byte-range locks we must check for deadlock.
 		 *
 		 * Deadlock detection is done by looking through the
 		 * wait channels to see if there are any cycles that
-		 * involve us. MAXDEPTH is set just to make sure we
-		 * do not go off into neverland.
+		 * involve us.
 		 */
 		if ((lock->lf_flags & F_POSIX) &&
 		    (block->lf_flags & F_POSIX)) {
-			struct proc *wproc, *bproc;
-			struct uthread *ut;
-			struct lockf *waitblock;
-			int i = 0;
+			lck_mtx_lock(&lf_dead_lock);
 
-			/* The block is waiting on something */
-			wproc = (struct proc *)block->lf_id;
+			/* The blocked process is waiting on something */
+			struct proc *wproc = block->lf_owner;
 			proc_lock(wproc);
+
+			LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p owned by pid %d\n", lock, proc_pid(wproc));
+
+			struct uthread *ut;
 			TAILQ_FOREACH(ut, &wproc->p_uthlist, uu_list) {
 				/*
-				 * While the thread is asleep (uu_wchan != 0)
-				 * in this code (uu_wmesg == lockstr)
-				 * and we have not exceeded the maximum cycle
-				 * depth (i < maxlockdepth), then check for a
-				 * cycle to see if the lock is blocked behind
+				 * If the thread is (a) asleep (uu_wchan != 0)
+				 * and (b) in this code (uu_wmesg == lockstr)
+				 * then check to see if the lock is blocked behind
 				 * someone blocked behind us.
+				 *
+				 * Note: (i) vp->v_lock is held, preventing other
+				 * threads from mutating the blocking list for our vnode.
+				 * and (ii) the proc_lock is held i.e the thread list
+				 * is stable.
+				 *
+				 * HOWEVER some thread in wproc might be sleeping on a lockf
+				 * structure for a different vnode, and be woken at any
+				 * time. Thus the waitblock list could mutate while
+				 * it's being inspected by this thread, and what
+				 * ut->uu_wchan was just pointing at could even be freed.
+				 *
+				 * Nevertheless this is safe here because of lf_dead_lock; if
+				 * any thread blocked with uu_wmesg == lockstr wakes (see below)
+				 * it will try to acquire lf_dead_lock which is already held
+				 * here. Holding that lock prevents the lockf structure being
+				 * pointed at by ut->uu_wchan from going away. Thus the vnode
+				 * involved can be found and locked, and the corresponding
+				 * blocking chain can then be examined safely.
 				 */
-				while (((waitblock = (struct lockf *)ut->uu_wchan) != NULL) &&
-				    ut->uu_wmesg == lockstr &&
-				    (i++ < maxlockdepth)) {
-					waitblock = (struct lockf *)ut->uu_wchan;
+				const struct lockf *waitblock = (const void *)ut->uu_wchan;
+				if ((waitblock != NULL) && (ut->uu_wmesg == lockstr)) {
+					LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is also blocked on lock %p vnode %p\n", lock, waitblock, waitblock->lf_vnode);
+
+					vnode_t othervp = NULL;
+					if (waitblock->lf_vnode != vp) {
+						/*
+						 * This thread in wproc is waiting for a lock
+						 * on a different vnode; grab the lock on it
+						 * that protects lf_next while we examine it.
+						 */
+						othervp = waitblock->lf_vnode;
+						if (!lck_mtx_try_lock(&othervp->v_lock)) {
+							/*
+							 * avoid kernel deadlock: drop all
+							 * locks, pause for a bit to let the
+							 * other thread do what it needs to do,
+							 * then (because we drop and retake
+							 * v_lock) retry the scan.
+							 */
+							proc_unlock(wproc);
+							lck_mtx_unlock(&lf_dead_lock);
+							static struct timespec ts = {
+								.tv_sec = 0,
+								.tv_nsec = 2 * NSEC_PER_MSEC,
+							};
+							static const char pausestr[] = "lockf:pause";
+							(void) msleep(lock, &vp->v_lock, priority, pausestr, &ts);
+							LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p contention for vp %p => restart\n", lock, othervp);
+							goto scan;
+						}
+					}
+
 					/*
 					 * Get the lock blocking the lock
 					 * which would block us, and make
-					 * certain it hasn't come unblocked
+					 * certain it hasn't become unblocked
 					 * (been granted, e.g. between the time
 					 * we called lf_getblock, and the time
 					 * we successfully acquired the
 					 * proc_lock).
 					 */
-					waitblock = waitblock->lf_next;
-					if (waitblock == NULL)
-						break;
+					const struct lockf *nextblock = waitblock->lf_next;
+					if (nextblock == NULL) {
+						if (othervp) {
+							lck_mtx_unlock(&othervp->v_lock);
+						}
+						LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p with waitblock %p and no lf_next; othervp %p\n", lock, waitblock, othervp);
+						continue;
+					}
+					LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is also blocked on lock %p vnode %p\n", lock, nextblock, nextblock->lf_vnode);
 
 					/*
 					 * Make sure it's an advisory range
-					 * lock and not an overall file lock;
+					 * lock and not any other kind of lock;
 					 * if we mix lock types, it's our own
 					 * fault.
 					 */
-					if ((waitblock->lf_flags & F_POSIX) == 0)
-						break;
+					if ((nextblock->lf_flags & F_POSIX) == 0) {
+						if (othervp) {
+							lck_mtx_unlock(&othervp->v_lock);
+						}
+						continue;
+					}
 
 					/*
 					 * If the owner of the lock that's
@@ -509,15 +666,25 @@ lf_setlock(struct lockf *lock)
 					 * getting the requested lock, then we
 					 * would deadlock, so error out.
 					 */
-					bproc = (struct proc *)waitblock->lf_id;
-					if (bproc == (struct proc *)lock->lf_id) {
+					struct proc *bproc = nextblock->lf_owner;
+					const boolean_t deadlocked = bproc == lock->lf_owner;
+
+					if (othervp) {
+						lck_mtx_unlock(&othervp->v_lock);
+					}
+					LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p owned by pid %d\n", lock, proc_pid(bproc));
+					if (deadlocked) {
+						LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is me, so EDEADLK\n", lock);
 						proc_unlock(wproc);
+						lck_mtx_unlock(&lf_dead_lock);
 						FREE(lock, M_LOCKF);
-						return (EDEADLK);
+						return EDEADLK;
 					}
 				}
+				LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p bottom of thread loop\n", lock);
 			}
 			proc_unlock(wproc);
+			lck_mtx_unlock(&lf_dead_lock);
 		}
 
 		/*
@@ -530,7 +697,7 @@ lf_setlock(struct lockf *lock)
 			lock->lf_type = F_UNLCK;
 			if ((error = lf_clearlock(lock)) != 0) {
 				FREE(lock, M_LOCKF);
-				return (error);
+				return error;
 			}
 			lock->lf_type = F_WRLCK;
 		}
@@ -541,42 +708,107 @@ lf_setlock(struct lockf *lock)
 		lock->lf_next = block;
 		TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
 
-		if ( !(lock->lf_flags & F_FLOCK))
-		        block->lf_flags &= ~F_WAKE1_SAFE;
+		if (!(lock->lf_flags & F_FLOCK)) {
+			block->lf_flags &= ~F_WAKE1_SAFE;
+		}
+
+#if IMPORTANCE_INHERITANCE
+		/*
+		 * Importance donation is done only for cases where the
+		 * owning task can be unambiguously determined.
+		 *
+		 * POSIX type locks are not inherited by child processes;
+		 * we maintain a 1:1 mapping between a lock and its owning
+		 * process.
+		 *
+		 * Flock type locks are inherited across fork() and there is
+		 * no 1:1 mapping in the general case.  However, the fileglobs
+		 * used by OFD locks *may* be confined to the process that
+		 * created them, and thus have an "owner", in which case
+		 * we also attempt importance donation.
+		 */
+		if ((lock->lf_flags & block->lf_flags & F_POSIX) != 0) {
+			lf_boost_blocking_proc(lock, block);
+		} else if ((lock->lf_flags & block->lf_flags & F_OFD_LOCK) &&
+		    lock->lf_owner != block->lf_owner &&
+		    NULL != lock->lf_owner && NULL != block->lf_owner) {
+			lf_boost_blocking_proc(lock, block);
+		}
+#endif /* IMPORTANCE_INHERITANCE */
 
 #ifdef LOCKF_DEBUGGING
-		if (lockf_debug & 1) {
+		if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
 			lf_print("lf_setlock: blocking on", block);
 			lf_printlist("lf_setlock(block)", block);
 		}
 #endif /* LOCKF_DEBUGGING */
-		error = msleep(lock, &vp->v_lock, priority, lockstr, 0);
+		DTRACE_FSINFO(advlock__wait, vnode_t, vp);
+
+		if (lock->lf_flags & F_POSIX) {
+			error = msleep(lock, &vp->v_lock, priority, lockstr, timeout);
+			/*
+			 * Ensure that 'lock' doesn't get mutated or freed if a
+			 * wakeup occurs while hunting for deadlocks (and holding
+			 * lf_dead_lock - see above)
+			 */
+			lck_mtx_lock(&lf_dead_lock);
+			lck_mtx_unlock(&lf_dead_lock);
+		} else {
+			static const char lockstr_np[] = "lockf:np";
+			error = msleep(lock, &vp->v_lock, priority, lockstr_np, timeout);
+		}
+
+		if (error == 0 && (lock->lf_flags & F_ABORT) != 0) {
+			error = EBADF;
+		}
+
+		if (lock->lf_next) {
+			/*
+			 * lf_wakelock() always sets wakelock->lf_next to
+			 * NULL before a wakeup; so we've been woken early
+			 * - perhaps by a debugger, signal or other event.
+			 *
+			 * Remove 'lock' from the block list (avoids double-add
+			 * in the spurious case, which would create a cycle)
+			 */
+			TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
+#if IMPORTANCE_INHERITANCE
+			/*
+			 * Adjust the boost on lf_next.
+			 */
+			lf_adjust_assertion(lock->lf_next);
+#endif /* IMPORTANCE_INHERITANCE */
+			lock->lf_next = NULL;
+
+			if (error == 0) {
+				/*
+				 * If this was a spurious wakeup, retry
+				 */
+				printf("%s: spurious wakeup, retrying lock\n",
+				    __func__);
+				continue;
+			}
+		}
 
 		if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
-		        if ((block = lf_getblock(lock))) {
+			if ((block = lf_getblock(lock, -1)) != NULL) {
 				lf_move_blocked(block, lock);
 			}
 		}
-		if (error) {	/* XXX */
-			/*
-			 * We may have been awakened by a signal and/or by a
-			 * debugger continuing us (in which cases we must remove
-			 * ourselves from the blocked list) and/or by another
-			 * process releasing a lock (in which case we have
-			 * already been removed from the blocked list and our
-			 * lf_next field set to NOLOCKF).
-			 */
-			if (lock->lf_next) {
-				TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
-				lock->lf_next = NOLOCKF;
+
+		if (error) {
+			if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
+				lf_wakelock(lock, TRUE);
 			}
-			if (!TAILQ_EMPTY(&lock->lf_blkhd))
-			        lf_wakelock(lock, TRUE);
-			  
 			FREE(lock, M_LOCKF);
-			return (error);
-		}	/* XXX */
+			/* Return ETIMEDOUT if timeout occoured. */
+			if (error == EWOULDBLOCK) {
+				error = ETIMEDOUT;
+			}
+			return error;
+		}
 	}
+
 	/*
 	 * No blocks!!  Add the lock.  Note that we will
 	 * downgrade or upgrade any overlapping locks this
@@ -590,8 +822,9 @@ lf_setlock(struct lockf *lock)
 	needtolink = 1;
 	for (;;) {
 		ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
-		if (ovcase)
+		if (ovcase) {
 			block = overlap->lf_next;
+		}
 		/*
 		 * Six cases:
 		 *	0) no overlap
@@ -615,8 +848,9 @@ lf_setlock(struct lockf *lock)
 			 * able to acquire it.
 			 */
 			if (lock->lf_type == F_RDLCK &&
-			    overlap->lf_type == F_WRLCK)
-			        lf_wakelock(overlap, TRUE);
+			    overlap->lf_type == F_WRLCK) {
+				lf_wakelock(overlap, TRUE);
+			}
 			overlap->lf_type = lock->lf_type;
 			FREE(lock, M_LOCKF);
 			lock = overlap; /* for lf_coalesce_adjacent() */
@@ -643,7 +877,7 @@ lf_setlock(struct lockf *lock)
 				 */
 				if (lf_split(overlap, lock)) {
 					FREE(lock, M_LOCKF);
-					return (ENOLCK);
+					return ENOLCK;
 				}
 			}
 			lf_wakelock(overlap, TRUE);
@@ -656,7 +890,7 @@ lf_setlock(struct lockf *lock)
 			 */
 			if (lock->lf_type == F_RDLCK &&
 			    overlap->lf_type == F_WRLCK) {
-			        lf_wakelock(overlap, TRUE);
+				lf_wakelock(overlap, TRUE);
 			} else {
 				while (!TAILQ_EMPTY(&overlap->lf_blkhd)) {
 					ltmp = TAILQ_FIRST(&overlap->lf_blkhd);
@@ -675,8 +909,9 @@ lf_setlock(struct lockf *lock)
 				lock->lf_next = overlap->lf_next;
 				prev = &lock->lf_next;
 				needtolink = 0;
-			} else
+			} else {
 				*prev = overlap->lf_next;
+			}
 			FREE(overlap, M_LOCKF);
 			continue;
 
@@ -709,12 +944,12 @@ lf_setlock(struct lockf *lock)
 	/* Coalesce adjacent locks with identical attributes */
 	lf_coalesce_adjacent(lock);
 #ifdef LOCKF_DEBUGGING
-	if (lockf_debug & 1) {
+	if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
 		lf_print("lf_setlock: got the lock", lock);
 		lf_printlist("lf_setlock(out)", lock);
 	}
 #endif /* LOCKF_DEBUGGING */
-	return (0);
+	return 0;
 }
 
 
@@ -742,23 +977,31 @@ lf_clearlock(struct lockf *unlock)
 	struct lockf *overlap, **prev;
 	overlap_t ovcase;
 
-	if (lf == NOLOCKF)
-		return (0);
+	if (lf == NOLOCKF) {
+		return 0;
+	}
 #ifdef LOCKF_DEBUGGING
-	if (unlock->lf_type != F_UNLCK)
+	if (unlock->lf_type != F_UNLCK) {
 		panic("lf_clearlock: bad type");
-	if (lockf_debug & 1)
+	}
+	if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
 		lf_print("lf_clearlock", unlock);
+	}
 #endif /* LOCKF_DEBUGGING */
 	prev = head;
 	while ((ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap)) != OVERLAP_NONE) {
 		/*
 		 * Wakeup the list of locks to be retried.
 		 */
-	        lf_wakelock(overlap, FALSE);
+		lf_wakelock(overlap, FALSE);
+#if IMPORTANCE_INHERITANCE
+		if (overlap->lf_boosted == LF_BOOSTED) {
+			lf_drop_assertion(overlap);
+		}
+#endif /* IMPORTANCE_INHERITANCE */
 
 		switch (ovcase) {
-		case OVERLAP_NONE:	/* satisfy compiler enum/switch */
+		case OVERLAP_NONE:      /* satisfy compiler enum/switch */
 			break;
 
 		case OVERLAP_EQUALS_LOCK:
@@ -775,8 +1018,9 @@ lf_clearlock(struct lockf *unlock)
 			 * If we can't split the lock, we can't grant it.
 			 * Claim a system limit for the resource shortage.
 			 */
-			if (lf_split(overlap, unlock))
-				return (ENOLCK);
+			if (lf_split(overlap, unlock)) {
+				return ENOLCK;
+			}
 			overlap->lf_next = unlock->lf_next;
 			break;
 
@@ -799,10 +1043,11 @@ lf_clearlock(struct lockf *unlock)
 		break;
 	}
 #ifdef LOCKF_DEBUGGING
-	if (lockf_debug & 1)
+	if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
 		lf_printlist("lf_clearlock", unlock);
+	}
 #endif /* LOCKF_DEBUGGING */
-	return (0);
+	return 0;
 }
 
 
@@ -816,6 +1061,7 @@ lf_clearlock(struct lockf *unlock)
  *		fl			Pointer to flock structure to receive
  *					the blocking lock information, if a
  *					blocking lock is found.
+ *		matchpid		-1, or pid value to match in lookup.
  *
  * Returns:	0			Success
  *
@@ -828,83 +1074,40 @@ lf_clearlock(struct lockf *unlock)
  *		the blocking process ID for advisory record locks.
  */
 static int
-lf_getlock(struct lockf *lock, struct flock *fl)
+lf_getlock(struct lockf *lock, struct flock *fl, pid_t matchpid)
 {
 	struct lockf *block;
 
 #ifdef LOCKF_DEBUGGING
-	if (lockf_debug & 1)
+	if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
 		lf_print("lf_getlock", lock);
+	}
 #endif /* LOCKF_DEBUGGING */
 
-	if ((block = lf_getblock(lock))) {
+	if ((block = lf_getblock(lock, matchpid))) {
 		fl->l_type = block->lf_type;
 		fl->l_whence = SEEK_SET;
 		fl->l_start = block->lf_start;
-		if (block->lf_end == -1)
+		if (block->lf_end == -1) {
 			fl->l_len = 0;
-		else
+		} else {
 			fl->l_len = block->lf_end - block->lf_start + 1;
-		if (block->lf_flags & F_POSIX)
-			fl->l_pid = proc_pid((struct proc *)(block->lf_id));
-		else
+		}
+		if (NULL != block->lf_owner) {
+			/*
+			 * lf_owner is only non-NULL when the lock
+			 * "owner" can be unambiguously determined
+			 */
+			fl->l_pid = proc_pid(block->lf_owner);
+		} else {
 			fl->l_pid = -1;
+		}
 	} else {
 		fl->l_type = F_UNLCK;
 	}
-	return (0);
+	return 0;
 }
 
-#if CONFIG_EMBEDDED
-int lf_getlockpid(struct vnode *vp, struct flock *fl)
-{
-	struct lockf *lf, *blk;
-
-	if (vp == 0)
-		return EINVAL;
-
-	fl->l_type = F_UNLCK;
-	
-	lck_mtx_lock(&vp->v_lock);
-
-	for (lf = vp->v_lockf; lf; lf = lf->lf_next) {
-
-		if (lf->lf_flags & F_POSIX) {
-			if ((((struct proc *)lf->lf_id)->p_pid) == fl->l_pid) {
-				fl->l_type = lf->lf_type;
-				fl->l_whence = SEEK_SET;
-				fl->l_start = lf->lf_start;
-				if (lf->lf_end == -1)
-					fl->l_len = 0;
-				else
-					fl->l_len = lf->lf_end - lf->lf_start + 1;
-
-				break;
-			}
-		}
-
-		TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
-			if (blk->lf_flags & F_POSIX) {
-				if ((((struct proc *)blk->lf_id)->p_pid) == fl->l_pid) {
-					fl->l_type = blk->lf_type;
-					fl->l_whence = SEEK_SET;
-					fl->l_start = blk->lf_start;
-					if (blk->lf_end == -1)
-						fl->l_len = 0;
-					else
-						fl->l_len = blk->lf_end - blk->lf_start + 1;
-
-					break;
-				}
-			}
-		}
-	}
-
-	lck_mtx_unlock(&vp->v_lock);
-	return (0);
-}
-#endif
-
 /*
  * lf_getblock
  *
@@ -915,31 +1118,41 @@ int lf_getlockpid(struct vnode *vp, struct flock *fl)
  *
  * Parameters:	lock			The lock for which we are interested
  *					in obtaining the blocking lock, if any
+ *		matchpid		-1, or pid value to match in lookup.
  *
  * Returns:	NOLOCKF			No blocking lock exists
  *		!NOLOCKF		The address of the blocking lock's
  *					struct lockf.
  */
 static struct lockf *
-lf_getblock(struct lockf *lock)
+lf_getblock(struct lockf *lock, pid_t matchpid)
 {
 	struct lockf **prev, *overlap, *lf = *(lock->lf_head);
-	int ovcase;
 
-	prev = lock->lf_head;
-	while ((ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap)) != OVERLAP_NONE) {
+	for (prev = lock->lf_head;
+	    lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != OVERLAP_NONE;
+	    lf = overlap->lf_next) {
 		/*
-		 * We've found an overlap, see if it blocks us
+		 * Found an overlap.
+		 *
+		 * If we're matching pids, and it's a record lock,
+		 * or it's an OFD lock on a process-confined fd,
+		 * but the pid doesn't match, then keep on looking ..
 		 */
-		if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
-			return (overlap);
+		if (matchpid != -1 &&
+		    (overlap->lf_flags & (F_POSIX | F_OFD_LOCK)) != 0 &&
+		    proc_pid(overlap->lf_owner) != matchpid) {
+			continue;
+		}
+
 		/*
-		 * Nope, point to the next one on the list and
-		 * see if it blocks us
+		 * does it block us?
 		 */
-		lf = overlap->lf_next;
+		if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK)) {
+			return overlap;
+		}
 	}
-	return (NOLOCKF);
+	return NOLOCKF;
 }
 
 
@@ -970,7 +1183,7 @@ lf_getblock(struct lockf *lock)
  *					this is generally used to relink the
  *					lock list, avoiding a second iteration.
  *		*overlap		The pointer to the overlapping lock
- *					itself; this is ussed to return data in
+ *					itself; this is used to return data in
  *					the check == OTHERS case, and for the
  *					caller to modify the overlapping lock,
  *					in the check == SELF case
@@ -989,30 +1202,32 @@ lf_getblock(struct lockf *lock)
  */
 static overlap_t
 lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
-	       struct lockf ***prev, struct lockf **overlap)
+    struct lockf ***prev, struct lockf **overlap)
 {
 	off_t start, end;
 	int found_self = 0;
 
 	*overlap = lf;
-	if (lf == NOLOCKF)
-		return (0);
+	if (lf == NOLOCKF) {
+		return 0;
+	}
 #ifdef LOCKF_DEBUGGING
-	if (lockf_debug & 2)
+	if (LOCKF_DEBUGP(LF_DBG_LIST)) {
 		lf_print("lf_findoverlap: looking for overlap in", lock);
+	}
 #endif /* LOCKF_DEBUGGING */
 	start = lock->lf_start;
 	end = lock->lf_end;
 	while (lf != NOLOCKF) {
 		if (((type & SELF) && lf->lf_id != lock->lf_id) ||
 		    ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
-			/* 
+			/*
 			 * Locks belonging to one process are adjacent on the
 			 * list, so if we've found any locks belonging to us,
 			 * and we're now seeing something else, then we've
 			 * examined all "self" locks.  Note that bailing out
-			 * here is quite important; for coalescing, we assume 
-			 * numerically adjacent locks from the same owner to 
+			 * here is quite important; for coalescing, we assume
+			 * numerically adjacent locks from the same owner to
 			 * be adjacent on the list.
 			 */
 			if ((type & SELF) && found_self) {
@@ -1029,8 +1244,9 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
 		}
 
 #ifdef LOCKF_DEBUGGING
-		if (lockf_debug & 2)
+		if (LOCKF_DEBUGP(LF_DBG_LIST)) {
 			lf_print("\tchecking", lf);
+		}
 #endif /* LOCKF_DEBUGGING */
 		/*
 		 * OK, check for overlap
@@ -1038,48 +1254,49 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
 		if ((lf->lf_end != -1 && start > lf->lf_end) ||
 		    (end != -1 && lf->lf_start > end)) {
 			/* Case 0 */
-			LOCKF_DEBUG(2, "no overlap\n");
+			LOCKF_DEBUG(LF_DBG_LIST, "no overlap\n");
 
 			/*
-			 * NOTE: assumes that locks for the same process are 
+			 * NOTE: assumes that locks for the same process are
 			 * nonintersecting and ordered.
 			 */
-			if ((type & SELF) && end != -1 && lf->lf_start > end)
-				return (OVERLAP_NONE);
+			if ((type & SELF) && end != -1 && lf->lf_start > end) {
+				return OVERLAP_NONE;
+			}
 			*prev = &lf->lf_next;
 			*overlap = lf = lf->lf_next;
 			continue;
 		}
 		if ((lf->lf_start == start) && (lf->lf_end == end)) {
-			LOCKF_DEBUG(2, "overlap == lock\n");
-			return (OVERLAP_EQUALS_LOCK);
+			LOCKF_DEBUG(LF_DBG_LIST, "overlap == lock\n");
+			return OVERLAP_EQUALS_LOCK;
 		}
 		if ((lf->lf_start <= start) &&
 		    (end != -1) &&
 		    ((lf->lf_end >= end) || (lf->lf_end == -1))) {
-			LOCKF_DEBUG(2, "overlap contains lock\n");
-			return (OVERLAP_CONTAINS_LOCK);
+			LOCKF_DEBUG(LF_DBG_LIST, "overlap contains lock\n");
+			return OVERLAP_CONTAINS_LOCK;
 		}
 		if (start <= lf->lf_start &&
-		           (end == -1 ||
-			   (lf->lf_end != -1 && end >= lf->lf_end))) {
-			LOCKF_DEBUG(2, "lock contains overlap\n");
-			return (OVERLAP_CONTAINED_BY_LOCK);
+		    (end == -1 ||
+		    (lf->lf_end != -1 && end >= lf->lf_end))) {
+			LOCKF_DEBUG(LF_DBG_LIST, "lock contains overlap\n");
+			return OVERLAP_CONTAINED_BY_LOCK;
 		}
 		if ((lf->lf_start < start) &&
-			((lf->lf_end >= start) || (lf->lf_end == -1))) {
-			LOCKF_DEBUG(2, "overlap starts before lock\n");
-			return (OVERLAP_STARTS_BEFORE_LOCK);
+		    ((lf->lf_end >= start) || (lf->lf_end == -1))) {
+			LOCKF_DEBUG(LF_DBG_LIST, "overlap starts before lock\n");
+			return OVERLAP_STARTS_BEFORE_LOCK;
 		}
 		if ((lf->lf_start > start) &&
-			(end != -1) &&
-			((lf->lf_end > end) || (lf->lf_end == -1))) {
-			LOCKF_DEBUG(2, "overlap ends after lock\n");
-			return (OVERLAP_ENDS_AFTER_LOCK);
+		    (end != -1) &&
+		    ((lf->lf_end > end) || (lf->lf_end == -1))) {
+			LOCKF_DEBUG(LF_DBG_LIST, "overlap ends after lock\n");
+			return OVERLAP_ENDS_AFTER_LOCK;
 		}
 		panic("lf_findoverlap: default");
 	}
-	return (OVERLAP_NONE);
+	return OVERLAP_NONE;
 }
 
 
@@ -1112,32 +1329,33 @@ lf_split(struct lockf *lock1, struct lockf *lock2)
 	struct lockf *splitlock;
 
 #ifdef LOCKF_DEBUGGING
-	if (lockf_debug & 2) {
+	if (LOCKF_DEBUGP(LF_DBG_LIST)) {
 		lf_print("lf_split", lock1);
 		lf_print("splitting from", lock2);
 	}
 #endif /* LOCKF_DEBUGGING */
 	/*
-	 * Check to see if spliting into only two pieces.
+	 * Check to see if splitting into only two pieces.
 	 */
 	if (lock1->lf_start == lock2->lf_start) {
 		lock1->lf_start = lock2->lf_end + 1;
 		lock2->lf_next = lock1;
-		return (0);
+		return 0;
 	}
 	if (lock1->lf_end == lock2->lf_end) {
 		lock1->lf_end = lock2->lf_start - 1;
 		lock2->lf_next = lock1->lf_next;
 		lock1->lf_next = lock2;
-		return (0);
+		return 0;
 	}
 	/*
 	 * Make a new lock consisting of the last part of
 	 * the encompassing lock
 	 */
 	MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
-	if (splitlock == NULL)
-		return (ENOLCK);
+	if (splitlock == NULL) {
+		return ENOLCK;
+	}
 	bcopy(lock1, splitlock, sizeof *splitlock);
 	splitlock->lf_start = lock2->lf_end + 1;
 	TAILQ_INIT(&splitlock->lf_blkhd);
@@ -1149,7 +1367,7 @@ lf_split(struct lockf *lock1, struct lockf *lock2)
 	lock2->lf_next = splitlock;
 	lock1->lf_next = lock2;
 
-	return (0);
+	return 0;
 }
 
 
@@ -1177,8 +1395,9 @@ lf_wakelock(struct lockf *listhead, boolean_t force_all)
 	struct lockf *wakelock;
 	boolean_t wake_all = TRUE;
 
-	if (force_all == FALSE && (listhead->lf_flags & F_WAKE1_SAFE))
-	        wake_all = FALSE;
+	if (force_all == FALSE && (listhead->lf_flags & F_WAKE1_SAFE)) {
+		wake_all = FALSE;
+	}
 
 	while (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
 		wakelock = TAILQ_FIRST(&listhead->lf_blkhd);
@@ -1186,8 +1405,9 @@ lf_wakelock(struct lockf *listhead, boolean_t force_all)
 
 		wakelock->lf_next = NOLOCKF;
 #ifdef LOCKF_DEBUGGING
-		if (lockf_debug & 2)
+		if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
 			lf_print("lf_wakelock: awakening", wakelock);
+		}
 #endif /* LOCKF_DEBUGGING */
 		if (wake_all == FALSE) {
 			/*
@@ -1198,22 +1418,29 @@ lf_wakelock(struct lockf *listhead, boolean_t force_all)
 			if (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
 				TAILQ_CONCAT(&wakelock->lf_blkhd, &listhead->lf_blkhd, lf_block);
 
-			        struct lockf *tlock;
+				struct lockf *tlock;
 
-			        TAILQ_FOREACH(tlock, &wakelock->lf_blkhd, lf_block) {
-				        tlock->lf_next = wakelock;
+				TAILQ_FOREACH(tlock, &wakelock->lf_blkhd, lf_block) {
+					if (TAILQ_NEXT(tlock, lf_block) == tlock) {
+						/* See rdar://10887303 */
+						panic("cycle in wakelock list");
+					}
+					tlock->lf_next = wakelock;
 				}
 			}
 		}
 		wakeup(wakelock);
 
-		if (wake_all == FALSE)
-		        break;
+		if (wake_all == FALSE) {
+			break;
+		}
 	}
 }
 
 
 #ifdef LOCKF_DEBUGGING
+#define GET_LF_OWNER_PID(lf)    (proc_pid((lf)->lf_owner))
+
 /*
  * lf_print DEBUG
  *
@@ -1229,27 +1456,34 @@ void
 lf_print(const char *tag, struct lockf *lock)
 {
 	printf("%s: lock %p for ", tag, (void *)lock);
-	if (lock->lf_flags & F_POSIX)
-		printf("proc %ld", (long)((struct proc *)lock->lf_id)->p_pid);
-	else
+	if (lock->lf_flags & F_POSIX) {
+		printf("proc %p (owner %d)",
+		    lock->lf_id, GET_LF_OWNER_PID(lock));
+	} else if (lock->lf_flags & F_OFD_LOCK) {
+		printf("fg %p (owner %d)",
+		    lock->lf_id, GET_LF_OWNER_PID(lock));
+	} else {
 		printf("id %p", (void *)lock->lf_id);
-	if (lock->lf_vnode != 0)
+	}
+	if (lock->lf_vnode != 0) {
 		printf(" in vno %p, %s, start 0x%016llx, end 0x%016llx",
 		    lock->lf_vnode,
 		    lock->lf_type == F_RDLCK ? "shared" :
 		    lock->lf_type == F_WRLCK ? "exclusive" :
 		    lock->lf_type == F_UNLCK ? "unlock" : "unknown",
 		    (intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
-	else
+	} else {
 		printf(" %s, start 0x%016llx, end 0x%016llx",
 		    lock->lf_type == F_RDLCK ? "shared" :
 		    lock->lf_type == F_WRLCK ? "exclusive" :
 		    lock->lf_type == F_UNLCK ? "unlock" : "unknown",
 		    (intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
-	if (!TAILQ_EMPTY(&lock->lf_blkhd))
+	}
+	if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
 		printf(" block %p\n", (void *)TAILQ_FIRST(&lock->lf_blkhd));
-	else
+	} else {
 		printf("\n");
+	}
 }
 
 
@@ -1270,18 +1504,23 @@ lf_printlist(const char *tag, struct lockf *lock)
 {
 	struct lockf *lf, *blk;
 
-	if (lock->lf_vnode == 0)
+	if (lock->lf_vnode == 0) {
 		return;
+	}
 
 	printf("%s: Lock list for vno %p:\n",
 	    tag, lock->lf_vnode);
 	for (lf = lock->lf_vnode->v_lockf; lf; lf = lf->lf_next) {
-		printf("\tlock %p for ",(void *)lf);
-		if (lf->lf_flags & F_POSIX)
-			printf("proc %ld",
-			    (long)((struct proc *)lf->lf_id)->p_pid);
-		else
+		printf("\tlock %p for ", (void *)lf);
+		if (lf->lf_flags & F_POSIX) {
+			printf("proc %p (owner %d)",
+			    lf->lf_id, GET_LF_OWNER_PID(lf));
+		} else if (lf->lf_flags & F_OFD_LOCK) {
+			printf("fg %p (owner %d)",
+			    lf->lf_id, GET_LF_OWNER_PID(lf));
+		} else {
 			printf("id %p", (void *)lf->lf_id);
+		}
 		printf(", %s, start 0x%016llx, end 0x%016llx",
 		    lf->lf_type == F_RDLCK ? "shared" :
 		    lf->lf_type == F_WRLCK ? "exclusive" :
@@ -1289,21 +1528,172 @@ lf_printlist(const char *tag, struct lockf *lock)
 		    "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end);
 		TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
 			printf("\n\t\tlock request %p for ", (void *)blk);
-			if (blk->lf_flags & F_POSIX)
-				printf("proc %ld",
-				    (long)((struct proc *)blk->lf_id)->p_pid);
-			else
+			if (blk->lf_flags & F_POSIX) {
+				printf("proc %p (owner %d)",
+				    blk->lf_id, GET_LF_OWNER_PID(blk));
+			} else if (blk->lf_flags & F_OFD_LOCK) {
+				printf("fg %p (owner %d)",
+				    blk->lf_id, GET_LF_OWNER_PID(blk));
+			} else {
 				printf("id %p", (void *)blk->lf_id);
+			}
 			printf(", %s, start 0x%016llx, end 0x%016llx",
 			    blk->lf_type == F_RDLCK ? "shared" :
 			    blk->lf_type == F_WRLCK ? "exclusive" :
 			    blk->lf_type == F_UNLCK ? "unlock" :
 			    "unknown", (intmax_t)blk->lf_start,
 			    (intmax_t)blk->lf_end);
-			if (!TAILQ_EMPTY(&blk->lf_blkhd))
+			if (!TAILQ_EMPTY(&blk->lf_blkhd)) {
 				panic("lf_printlist: bad list");
+			}
 		}
 		printf("\n");
 	}
 }
 #endif /* LOCKF_DEBUGGING */
+
+#if IMPORTANCE_INHERITANCE
+
+/*
+ * lf_hold_assertion
+ *
+ * Call task importance hold assertion on the owner of the lock.
+ *
+ * Parameters: block_task               Owner of the lock blocking
+ *                                      current thread.
+ *
+ *             block                    lock on which the current thread
+ *                                      is blocking on.
+ *
+ * Returns:    <void>
+ *
+ * Notes: The task reference on block_task is not needed to be hold since
+ *        the current thread has vnode lock and block_task has a file
+ *        lock, thus removing file lock in exit requires block_task to
+ *        grab the vnode lock.
+ */
+static void
+lf_hold_assertion(task_t block_task, struct lockf *block)
+{
+	if (task_importance_hold_file_lock_assertion(block_task, 1) == 0) {
+		block->lf_boosted = LF_BOOSTED;
+		LOCKF_DEBUG(LF_DBG_IMPINH,
+		    "lf: importance hold file lock assert on pid %d lock %p\n",
+		    proc_pid(block->lf_owner), block);
+	}
+}
+
+
+/*
+ * lf_jump_to_queue_head
+ *
+ * Jump the lock from the tail of the block queue to the head of
+ * the queue.
+ *
+ * Parameters: block                    lockf struct containing the
+ *                                      block queue.
+ *             lock                     lockf struct to be jumped to the
+ *                                      front.
+ *
+ * Returns:    <void>
+ */
+static void
+lf_jump_to_queue_head(struct lockf *block, struct lockf *lock)
+{
+	/* Move the lock to the head of the block queue. */
+	TAILQ_REMOVE(&block->lf_blkhd, lock, lf_block);
+	TAILQ_INSERT_HEAD(&block->lf_blkhd, lock, lf_block);
+}
+
+
+/*
+ * lf_drop_assertion
+ *
+ * Drops the task hold assertion.
+ *
+ * Parameters: block                    lockf struct holding the assertion.
+ *
+ * Returns:    <void>
+ */
+static void
+lf_drop_assertion(struct lockf *block)
+{
+	LOCKF_DEBUG(LF_DBG_IMPINH, "lf: %d: dropping assertion for lock %p\n",
+	    proc_pid(block->lf_owner), block);
+
+	task_t current_task = proc_task(block->lf_owner);
+	task_importance_drop_file_lock_assertion(current_task, 1);
+	block->lf_boosted = LF_NOT_BOOSTED;
+}
+
+/*
+ * lf_adjust_assertion
+ *
+ * Adjusts importance assertion of file lock. Goes through
+ * all the blocking locks and checks if the file lock needs
+ * to be boosted anymore.
+ *
+ * Parameters: block	lockf structure which needs to be adjusted.
+ *
+ * Returns:	<void>
+ */
+static void
+lf_adjust_assertion(struct lockf *block)
+{
+	boolean_t drop_boost = TRUE;
+	struct lockf *next;
+
+	/* Return if the lock is not boosted */
+	if (block->lf_boosted == LF_NOT_BOOSTED) {
+		return;
+	}
+
+	TAILQ_FOREACH(next, &block->lf_blkhd, lf_block) {
+		/* Check if block and next are same type of locks */
+		if (((block->lf_flags & next->lf_flags & F_POSIX) != 0) ||
+		    ((block->lf_flags & next->lf_flags & F_OFD_LOCK) &&
+		    (block->lf_owner != next->lf_owner) &&
+		    (NULL != block->lf_owner && NULL != next->lf_owner))) {
+			/* Check if next would be boosting block */
+			if (task_is_importance_donor(proc_task(next->lf_owner)) &&
+			    task_is_importance_receiver_type(proc_task(block->lf_owner))) {
+				/* Found a lock boosting block */
+				drop_boost = FALSE;
+				break;
+			}
+		}
+	}
+
+	if (drop_boost) {
+		lf_drop_assertion(block);
+	}
+}
+
+static void
+lf_boost_blocking_proc(struct lockf *lock, struct lockf *block)
+{
+	task_t ltask = proc_task(lock->lf_owner);
+	task_t btask = proc_task(block->lf_owner);
+
+	/*
+	 * Check if ltask can donate importance. The
+	 * check of imp_donor bit is done without holding
+	 * any lock. The value may change after you read it,
+	 * but it is ok to boost a task while someone else is
+	 * unboosting you.
+	 *
+	 * TODO: Support live inheritance on file locks.
+	 */
+	if (task_is_importance_donor(ltask)) {
+		LOCKF_DEBUG(LF_DBG_IMPINH,
+		    "lf: %d: attempt to boost pid %d that holds lock %p\n",
+		    proc_pid(lock->lf_owner), proc_pid(block->lf_owner), block);
+
+		if (block->lf_boosted != LF_BOOSTED &&
+		    task_is_importance_receiver_type(btask)) {
+			lf_hold_assertion(btask, block);
+		}
+		lf_jump_to_queue_head(block, lock);
+	}
+}
+#endif /* IMPORTANCE_INHERITANCE */