xnu-6153.81.5.tar.gz

[apple/xnu.git] / bsd / kern / kern_lockf.c
diff --git a/bsd/kern/kern_lockf.c b/bsd/kern/kern_lockf.c

index 13e4c97dbe02c6d8c58a58f3a8ac4f9a891a21c2..d67a8f84bb982aad3fb861bf1cb0c5c84263e890 100644 (file)
--- a/bsd/kern/kern_lockf.c
+++ b/bsd/kern/kern_lockf.c
@@ -1,8 +1,8 @@
  /*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2019 Apple Computer, Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
   * This file contains Original Code and/or Modifications of Original Code
   * as defined in and that are subject to the Apple Public Source License
   * Version 2.0 (the 'License'). You may not use this file except in
@@ -11,10 +11,10 @@
   * unlawful or unlicensed copies of an Apple operating system, or to
   * circumvent, violate, or enable the circumvention or violation of, any
   * terms of an Apple operating system software license agreement.
- * 
+ *
   * Please obtain a copy of the License at
   * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
   * The Original Code and all software distributed under the License are
   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -22,7 +22,7 @@
   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
   * Please see the License for the specific language governing rights and
   * limitations under the License.
- * 
+ *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   */
  /*
@@ -75,42 +75,55 @@
  #include <sys/malloc.h>
  #include <sys/fcntl.h>
  #include <sys/lockf.h>
+#include <sys/sdt.h>
+#include <kern/policy_internal.h>
  
-/*
- * This variable controls the maximum number of processes that will
- * be checked in doing deadlock detection.
- */
-static int maxlockdepth = MAXDEPTH;
+#include <sys/file_internal.h>
+
+#if (DEVELOPMENT || DEBUG)
+#define LOCKF_DEBUGGING 1
+#endif
  
  #ifdef LOCKF_DEBUGGING
  #include <sys/sysctl.h>
-#include <ufs/ufs/quota.h>
-#include <ufs/ufs/inode.h>
  void lf_print(const char *tag, struct lockf *lock);
  void lf_printlist(const char *tag, struct lockf *lock);
-static int     lockf_debug = 2;
+
+#define LF_DBG_LOCKOP   (1 << 0)        /* setlk, getlk, clearlk */
+#define LF_DBG_LIST     (1 << 1)        /* split, coalesce */
+#define LF_DBG_IMPINH   (1 << 2)        /* importance inheritance */
+#define LF_DBG_TRACE    (1 << 3)        /* errors, exit */
+#define LF_DBG_DEADLOCK (1 << 4)        /* deadlock detection */
+
+static int      lockf_debug = 0;        /* was 2, could be 3 ;-) */
  SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &lockf_debug, 0, "");
  
  /*
- * If there is no mask bit selector, or there is on, and the selector is
+ * If there is no mask bit selector, or there is one, and the selector is
   * set, then output the debugging diagnostic.
   */
-#define LOCKF_DEBUG(mask, ...)                                 \
-       do {                                                    \
-               if( !(mask) || ((mask) & lockf_debug)) {        \
-                       printf(__VA_ARGS__);                    \
-               }                                               \
+#define LOCKF_DEBUG(mask, ...)                                  \
+       do {                                                    \
+               if (!(mask) || ((mask) & lockf_debug)) {        \
+                       printf("%s>", __FUNCTION__);            \
+                       printf(__VA_ARGS__);                    \
+               }                                               \
         } while(0)
-#else  /* !LOCKF_DEBUGGING */
-#define LOCKF_DEBUG(mask, ...)         /* mask */
-#endif /* !LOCKF_DEBUGGING */
+
+#define LOCKF_DEBUGP(mask)                                      \
+       ({                                                      \
+               ((mask) & lockf_debug);                         \
+       })
+#else   /* !LOCKF_DEBUGGING */
+#define LOCKF_DEBUG(mask, ...)          /* mask */
+#endif  /* !LOCKF_DEBUGGING */
  
  MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
  
  #define NOLOCKF (struct lockf *)0
-#define SELF   0x1
-#define OTHERS 0x2
-#define OFF_MAX        0x7fffffffffffffffULL   /* max off_t */
+#define SELF    0x1
+#define OTHERS  0x2
+#define OFF_MAX 0x7fffffffffffffffULL   /* max off_t */
  
  /*
   * Overlapping lock states
@@ -124,14 +137,31 @@ typedef enum {
         OVERLAP_ENDS_AFTER_LOCK
  } overlap_t;
  
-static int      lf_clearlock(struct lockf *);
+static int       lf_clearlock(struct lockf *);
  static overlap_t lf_findoverlap(struct lockf *,
-           struct lockf *, int, struct lockf ***, struct lockf **);
+    struct lockf *, int, struct lockf ***, struct lockf **);
  static struct lockf *lf_getblock(struct lockf *, pid_t);
-static int      lf_getlock(struct lockf *, struct flock *, pid_t);
-static int      lf_setlock(struct lockf *);
-static int      lf_split(struct lockf *, struct lockf *);
-static void     lf_wakelock(struct lockf *, boolean_t);
+static int       lf_getlock(struct lockf *, struct flock *, pid_t);
+static int       lf_setlock(struct lockf *, struct timespec *);
+static int       lf_split(struct lockf *, struct lockf *);
+static void      lf_wakelock(struct lockf *, boolean_t);
+#if IMPORTANCE_INHERITANCE
+static void      lf_hold_assertion(task_t, struct lockf *);
+static void      lf_jump_to_queue_head(struct lockf *, struct lockf *);
+static void      lf_drop_assertion(struct lockf *);
+static void      lf_boost_blocking_proc(struct lockf *, struct lockf *);
+static void      lf_adjust_assertion(struct lockf *block);
+#endif /* IMPORTANCE_INHERITANCE */
+
+static lck_mtx_t lf_dead_lock;
+static lck_grp_t *lf_dead_lock_grp;
+
+void
+lf_init(void)
+{
+       lf_dead_lock_grp = lck_grp_alloc_init("lf_dead_lock", LCK_GRP_ATTR_NULL);
+       lck_mtx_init(&lf_dead_lock, lf_dead_lock_grp, LCK_ATTR_NULL);
+}
  
  /*
   * lf_advlock
@@ -150,6 +180,7 @@ static void  lf_wakelock(struct lockf *, boolean_t);
   *     lf_setlock:EDEADLK
   *     lf_setlock:EINTR
   *     lf_setlock:ENOLCK
+ *     lf_setlock:ETIMEDOUT
   *     lf_clearlock:ENOLCK
   *     vnode_size:???
   *
@@ -177,8 +208,10 @@ lf_advlock(struct vnop_advlock_args *ap)
         if (*head == (struct lockf *)0) {
                 if (ap->a_op != F_SETLK) {
                         fl->l_type = F_UNLCK;
-                       LOCKF_DEBUG(0, "lf_advlock: '%s' unlock without lock\n", vfs_context_proc(context)->p_comm);
-                       return (0);
+                       LOCKF_DEBUG(LF_DBG_TRACE,
+                           "lf_advlock: '%s' unlock without lock\n",
+                           vfs_context_proc(context)->p_comm);
+                       return 0;
                 }
         }
  
@@ -186,7 +219,6 @@ lf_advlock(struct vnop_advlock_args *ap)
          * Convert the flock structure into a start and end.
          */
         switch (fl->l_whence) {
-
         case SEEK_SET:
         case SEEK_CUR:
                 /*
@@ -205,43 +237,49 @@ lf_advlock(struct vnop_advlock_args *ap)
                  * do this because we will use size to force range checks.
                  */
                 if ((error = vnode_size(vp, (off_t *)&size, context))) {
-                       LOCKF_DEBUG(0, "lf_advlock: vnode_getattr failed: %d\n", error);
-                       return (error);
+                       LOCKF_DEBUG(LF_DBG_TRACE,
+                           "lf_advlock: vnode_getattr failed: %d\n", error);
+                       return error;
                 }
  
                 if (size > OFF_MAX ||
                     (fl->l_start > 0 &&
-                    size > (u_quad_t)(OFF_MAX - fl->l_start)))
-                       return (EOVERFLOW);
+                   size > (u_quad_t)(OFF_MAX - fl->l_start))) {
+                       return EOVERFLOW;
+               }
                 start = size + fl->l_start;
                 break;
  
         default:
-               LOCKF_DEBUG(0, "lf_advlock: unknown whence %d\n", fl->l_whence);
-               return (EINVAL);
+               LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: unknown whence %d\n",
+                   fl->l_whence);
+               return EINVAL;
         }
         if (start < 0) {
-               LOCKF_DEBUG(0, "lf_advlock: start < 0 (%qd)\n", start);
-               return (EINVAL);
+               LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: start < 0 (%qd)\n",
+                   start);
+               return EINVAL;
         }
         if (fl->l_len < 0) {
                 if (start == 0) {
-                       LOCKF_DEBUG(0, "lf_advlock: len < 0 & start == 0\n");
-                       return (EINVAL);
+                       LOCKF_DEBUG(LF_DBG_TRACE,
+                           "lf_advlock: len < 0 & start == 0\n");
+                       return EINVAL;
                 }
                 end = start - 1;
                 start += fl->l_len;
                 if (start < 0) {
-                       LOCKF_DEBUG(0, "lf_advlock: start < 0 (%qd)\n", start);
-                       return (EINVAL);
+                       LOCKF_DEBUG(LF_DBG_TRACE,
+                           "lf_advlock: start < 0 (%qd)\n", start);
+                       return EINVAL;
                 }
-       } else if (fl->l_len == 0)
+       } else if (fl->l_len == 0) {
                 end = -1;
-       else {
+       } else {
                 oadd = fl->l_len - 1;
                 if (oadd > (off_t)(OFF_MAX - start)) {
-                       LOCKF_DEBUG(0, "lf_advlock: overflow\n");
-                       return (EOVERFLOW);
+                       LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: overflow\n");
+                       return EOVERFLOW;
                 }
                 end = start + oadd;
         }
@@ -249,8 +287,9 @@ lf_advlock(struct vnop_advlock_args *ap)
          * Create the lockf structure
          */
         MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
-       if (lock == NULL)
-               return (ENOLCK);
+       if (lock == NULL) {
+               return ENOLCK;
+       }
         lock->lf_start = start;
         lock->lf_end = end;
         lock->lf_id = ap->a_id;
@@ -260,17 +299,40 @@ lf_advlock(struct vnop_advlock_args *ap)
         lock->lf_next = (struct lockf *)0;
         TAILQ_INIT(&lock->lf_blkhd);
         lock->lf_flags = ap->a_flags;
+#if IMPORTANCE_INHERITANCE
+       lock->lf_boosted = LF_NOT_BOOSTED;
+#endif
+       if (ap->a_flags & F_POSIX) {
+               lock->lf_owner = (struct proc *)lock->lf_id;
+       } else {
+               lock->lf_owner = NULL;
+       }
  
-       if (ap->a_flags & F_FLOCK)
-               lock->lf_flags |= F_WAKE1_SAFE;
+       if (ap->a_flags & F_FLOCK) {
+               lock->lf_flags |= F_WAKE1_SAFE;
+       }
  
-       lck_mtx_lock(&vp->v_lock);      /* protect the lockf list */
+       lck_mtx_lock(&vp->v_lock);      /* protect the lockf list */
         /*
          * Do the requested operation.
          */
-       switch(ap->a_op) {
+       switch (ap->a_op) {
         case F_SETLK:
-               error = lf_setlock(lock);
+               /*
+                * For F_OFD_* locks, lf_id is the fileglob.
+                * Record an "lf_owner" iff this is a confined fd
+                * i.e. it cannot escape this process and will be
+                * F_UNLCKed before the owner exits.  (This is
+                * the implicit guarantee needed to ensure lf_owner
+                * remains a valid reference here.)
+                */
+               if (ap->a_flags & F_OFD_LOCK) {
+                       struct fileglob *fg = (void *)lock->lf_id;
+                       if (fg->fg_lflags & FG_CONFINED) {
+                               lock->lf_owner = current_proc();
+                       }
+               }
+               error = lf_setlock(lock, ap->a_timeout);
                 break;
  
         case F_UNLCK:
@@ -283,22 +345,20 @@ lf_advlock(struct vnop_advlock_args *ap)
                 FREE(lock, M_LOCKF);
                 break;
  
-#if CONFIG_EMBEDDED
         case F_GETLKPID:
                 error = lf_getlock(lock, fl, fl->l_pid);
                 FREE(lock, M_LOCKF);
                 break;
-#endif
  
         default:
                 FREE(lock, M_LOCKF);
                 error = EINVAL;
                 break;
         }
-       lck_mtx_unlock(&vp->v_lock);    /* done manipulating the list */
+       lck_mtx_unlock(&vp->v_lock);    /* done manipulating the list */
  
-       LOCKF_DEBUG(0, "lf_advlock: normal exit: %d\n\n", error);
-       return (error);
+       LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: normal exit: %d\n", error);
+       return error;
  }
  
  /*
@@ -312,8 +372,9 @@ lf_abort_advlocks(vnode_t vp)
  {
         struct lockf *lock;
  
-       if ((lock = vp->v_lockf) == NULL)
-               return; 
+       if ((lock = vp->v_lockf) == NULL) {
+               return;
+       }
  
         lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED);
  
@@ -377,14 +438,14 @@ lf_coalesce_adjacent(struct lockf *lock)
                 }
  
                 /*
-                * NOTE: Assumes that if two locks are adjacent on the number line 
+                * NOTE: Assumes that if two locks are adjacent on the number line
                  * and belong to the same owner, then they are adjacent on the list.
                  */
                 if ((*lf)->lf_end != -1 &&
                     ((*lf)->lf_end + 1) == lock->lf_start) {
                         struct lockf *adjacent = *lf;
  
-                       LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent previous\n");
+                       LOCKF_DEBUG(LF_DBG_LIST, "lf_coalesce_adjacent: coalesce adjacent previous\n");
                         lock->lf_start = (*lf)->lf_start;
                         *lf = lock;
                         lf = &(*lf)->lf_next;
@@ -399,7 +460,7 @@ lf_coalesce_adjacent(struct lockf *lock)
                     (lock->lf_end + 1) == (*lf)->lf_start) {
                         struct lockf *adjacent = *lf;
  
-                       LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent following\n");
+                       LOCKF_DEBUG(LF_DBG_LIST, "lf_coalesce_adjacent: coalesce adjacent following\n");
                         lock->lf_end = (*lf)->lf_end;
                         lock->lf_next = (*lf)->lf_next;
                         lf = &lock->lf_next;
@@ -415,7 +476,6 @@ lf_coalesce_adjacent(struct lockf *lock)
         }
  }
  
-
  /*
   * lf_setlock
   *
@@ -427,42 +487,49 @@ lf_coalesce_adjacent(struct lockf *lock)
   *                                     the set is successful, and freed if the
   *                                     set is unsuccessful.
   *
+ *             timeout                 Timeout specified in the case of
+ *                                      SETLKWTIMEOUT.
+ *
   * Returns:    0                       Success
   *             EAGAIN
   *             EDEADLK
   *     lf_split:ENOLCK
   *     lf_clearlock:ENOLCK
   *     msleep:EINTR
+ *     msleep:ETIMEDOUT
   *
   * Notes:      We add the lock to the provisional lock list.  We do not
   *             coalesce at this time; this has implications for other lock
   *             requestors in the blocker search mechanism.
   */
  static int
-lf_setlock(struct lockf *lock)
+lf_setlock(struct lockf *lock, struct timespec *timeout)
  {
         struct lockf *block;
         struct lockf **head = lock->lf_head;
         struct lockf **prev, *overlap, *ltmp;
-       static char lockstr[] = "lockf";
+       static const char lockstr[] = "lockf";
         int priority, needtolink, error;
         struct vnode *vp = lock->lf_vnode;
         overlap_t ovcase;
  
  #ifdef LOCKF_DEBUGGING
-       if (lockf_debug & 1) {
+       if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
                 lf_print("lf_setlock", lock);
                 lf_printlist("lf_setlock(in)", lock);
         }
  #endif /* LOCKF_DEBUGGING */
+       LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p Looking for deadlock, vnode %p\n", lock, lock->lf_vnode);
  
         /*
          * Set the priority
          */
         priority = PLOCK;
-       if (lock->lf_type == F_WRLCK)
+       if (lock->lf_type == F_WRLCK) {
                 priority += 4;
+       }
         priority |= PCATCH;
+scan:
         /*
          * Scan lock list for this file looking for locks that would block us.
          */
@@ -471,64 +538,127 @@ lf_setlock(struct lockf *lock)
                  * Free the structure and return if nonblocking.
                  */
                 if ((lock->lf_flags & F_WAIT) == 0) {
+                       DTRACE_FSINFO(advlock__nowait, vnode_t, vp);
                         FREE(lock, M_LOCKF);
-                       return (EAGAIN);
+                       return EAGAIN;
                 }
  
+               LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p found blocking lock %p\n", lock, block);
+
                 /*
                  * We are blocked. Since flock style locks cover
                  * the whole file, there is no chance for deadlock.
-                * For byte-range locks we must check for deadlock.
+                *
+                * OFD byte-range locks currently do NOT support
+                * deadlock detection.
+                *
+                * For POSIX byte-range locks we must check for deadlock.
                  *
                  * Deadlock detection is done by looking through the
                  * wait channels to see if there are any cycles that
-                * involve us. MAXDEPTH is set just to make sure we
-                * do not go off into neverland.
+                * involve us.
                  */
                 if ((lock->lf_flags & F_POSIX) &&
                     (block->lf_flags & F_POSIX)) {
-                       struct proc *wproc, *bproc;
-                       struct uthread *ut;
-                       struct lockf *waitblock;
-                       int i = 0;
+                       lck_mtx_lock(&lf_dead_lock);
  
-                       /* The block is waiting on something */
-                       wproc = (struct proc *)block->lf_id;
+                       /* The blocked process is waiting on something */
+                       struct proc *wproc = block->lf_owner;
                         proc_lock(wproc);
+
+                       LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p owned by pid %d\n", lock, proc_pid(wproc));
+
+                       struct uthread *ut;
                         TAILQ_FOREACH(ut, &wproc->p_uthlist, uu_list) {
                                 /*
-                                * While the thread is asleep (uu_wchan != 0)
-                                * in this code (uu_wmesg == lockstr)
-                                * and we have not exceeded the maximum cycle
-                                * depth (i < maxlockdepth), then check for a
-                                * cycle to see if the lock is blocked behind
+                                * If the thread is (a) asleep (uu_wchan != 0)
+                                * and (b) in this code (uu_wmesg == lockstr)
+                                * then check to see if the lock is blocked behind
                                  * someone blocked behind us.
+                                *
+                                * Note: (i) vp->v_lock is held, preventing other
+                                * threads from mutating the blocking list for our vnode.
+                                * and (ii) the proc_lock is held i.e the thread list
+                                * is stable.
+                                *
+                                * HOWEVER some thread in wproc might be sleeping on a lockf
+                                * structure for a different vnode, and be woken at any
+                                * time. Thus the waitblock list could mutate while
+                                * it's being inspected by this thread, and what
+                                * ut->uu_wchan was just pointing at could even be freed.
+                                *
+                                * Nevertheless this is safe here because of lf_dead_lock; if
+                                * any thread blocked with uu_wmesg == lockstr wakes (see below)
+                                * it will try to acquire lf_dead_lock which is already held
+                                * here. Holding that lock prevents the lockf structure being
+                                * pointed at by ut->uu_wchan from going away. Thus the vnode
+                                * involved can be found and locked, and the corresponding
+                                * blocking chain can then be examined safely.
                                  */
-                               while (((waitblock = (struct lockf *)ut->uu_wchan) != NULL) &&
-                                   ut->uu_wmesg == lockstr &&
-                                   (i++ < maxlockdepth)) {
-                                       waitblock = (struct lockf *)ut->uu_wchan;
+                               const struct lockf *waitblock = (const void *)ut->uu_wchan;
+                               if ((waitblock != NULL) && (ut->uu_wmesg == lockstr)) {
+                                       LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is also blocked on lock %p vnode %p\n", lock, waitblock, waitblock->lf_vnode);
+
+                                       vnode_t othervp = NULL;
+                                       if (waitblock->lf_vnode != vp) {
+                                               /*
+                                                * This thread in wproc is waiting for a lock
+                                                * on a different vnode; grab the lock on it
+                                                * that protects lf_next while we examine it.
+                                                */
+                                               othervp = waitblock->lf_vnode;
+                                               if (!lck_mtx_try_lock(&othervp->v_lock)) {
+                                                       /*
+                                                        * avoid kernel deadlock: drop all
+                                                        * locks, pause for a bit to let the
+                                                        * other thread do what it needs to do,
+                                                        * then (because we drop and retake
+                                                        * v_lock) retry the scan.
+                                                        */
+                                                       proc_unlock(wproc);
+                                                       lck_mtx_unlock(&lf_dead_lock);
+                                                       static struct timespec ts = {
+                                                               .tv_sec = 0,
+                                                               .tv_nsec = 2 * NSEC_PER_MSEC,
+                                                       };
+                                                       static const char pausestr[] = "lockf:pause";
+                                                       (void) msleep(lock, &vp->v_lock, priority, pausestr, &ts);
+                                                       LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p contention for vp %p => restart\n", lock, othervp);
+                                                       goto scan;
+                                               }
+                                       }
+
                                         /*
                                          * Get the lock blocking the lock
                                          * which would block us, and make
-                                        * certain it hasn't come unblocked
+                                        * certain it hasn't become unblocked
                                          * (been granted, e.g. between the time
                                          * we called lf_getblock, and the time
                                          * we successfully acquired the
                                          * proc_lock).
                                          */
-                                       waitblock = waitblock->lf_next;
-                                       if (waitblock == NULL)
-                                               break;
+                                       const struct lockf *nextblock = waitblock->lf_next;
+                                       if (nextblock == NULL) {
+                                               if (othervp) {
+                                                       lck_mtx_unlock(&othervp->v_lock);
+                                               }
+                                               LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p with waitblock %p and no lf_next; othervp %p\n", lock, waitblock, othervp);
+                                               continue;
+                                       }
+                                       LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is also blocked on lock %p vnode %p\n", lock, nextblock, nextblock->lf_vnode);
  
                                         /*
                                          * Make sure it's an advisory range
-                                        * lock and not an overall file lock;
+                                        * lock and not any other kind of lock;
                                          * if we mix lock types, it's our own
                                          * fault.
                                          */
-                                       if ((waitblock->lf_flags & F_POSIX) == 0)
-                                               break;
+                                       if ((nextblock->lf_flags & F_POSIX) == 0) {
+                                               if (othervp) {
+                                                       lck_mtx_unlock(&othervp->v_lock);
+                                               }
+                                               continue;
+                                       }
  
                                         /*
                                          * If the owner of the lock that's
@@ -536,15 +666,25 @@ lf_setlock(struct lockf *lock)
                                          * getting the requested lock, then we
                                          * would deadlock, so error out.
                                          */
-                                       bproc = (struct proc *)waitblock->lf_id;
-                                       if (bproc == (struct proc *)lock->lf_id) {
+                                       struct proc *bproc = nextblock->lf_owner;
+                                       const boolean_t deadlocked = bproc == lock->lf_owner;
+
+                                       if (othervp) {
+                                               lck_mtx_unlock(&othervp->v_lock);
+                                       }
+                                       LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p owned by pid %d\n", lock, proc_pid(bproc));
+                                       if (deadlocked) {
+                                               LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is me, so EDEADLK\n", lock);
                                                 proc_unlock(wproc);
+                                               lck_mtx_unlock(&lf_dead_lock);
                                                 FREE(lock, M_LOCKF);
-                                               return (EDEADLK);
+                                               return EDEADLK;
                                         }
                                 }
+                               LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p bottom of thread loop\n", lock);
                         }
                         proc_unlock(wproc);
+                       lck_mtx_unlock(&lf_dead_lock);
                 }
  
                 /*
@@ -557,7 +697,7 @@ lf_setlock(struct lockf *lock)
                         lock->lf_type = F_UNLCK;
                         if ((error = lf_clearlock(lock)) != 0) {
                                 FREE(lock, M_LOCKF);
-                               return (error);
+                               return error;
                         }
                         lock->lf_type = F_WRLCK;
                 }
@@ -568,46 +708,107 @@ lf_setlock(struct lockf *lock)
                 lock->lf_next = block;
                 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
  
-               if ( !(lock->lf_flags & F_FLOCK))
-                       block->lf_flags &= ~F_WAKE1_SAFE;
+               if (!(lock->lf_flags & F_FLOCK)) {
+                       block->lf_flags &= ~F_WAKE1_SAFE;
+               }
+
+#if IMPORTANCE_INHERITANCE
+               /*
+                * Importance donation is done only for cases where the
+                * owning task can be unambiguously determined.
+                *
+                * POSIX type locks are not inherited by child processes;
+                * we maintain a 1:1 mapping between a lock and its owning
+                * process.
+                *
+                * Flock type locks are inherited across fork() and there is
+                * no 1:1 mapping in the general case.  However, the fileglobs
+                * used by OFD locks *may* be confined to the process that
+                * created them, and thus have an "owner", in which case
+                * we also attempt importance donation.
+                */
+               if ((lock->lf_flags & block->lf_flags & F_POSIX) != 0) {
+                       lf_boost_blocking_proc(lock, block);
+               } else if ((lock->lf_flags & block->lf_flags & F_OFD_LOCK) &&
+                   lock->lf_owner != block->lf_owner &&
+                   NULL != lock->lf_owner && NULL != block->lf_owner) {
+                       lf_boost_blocking_proc(lock, block);
+               }
+#endif /* IMPORTANCE_INHERITANCE */
  
  #ifdef LOCKF_DEBUGGING
-               if (lockf_debug & 1) {
+               if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
                         lf_print("lf_setlock: blocking on", block);
                         lf_printlist("lf_setlock(block)", block);
                 }
  #endif /* LOCKF_DEBUGGING */
-               error = msleep(lock, &vp->v_lock, priority, lockstr, 0);
+               DTRACE_FSINFO(advlock__wait, vnode_t, vp);
  
-               if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
-                       if ((block = lf_getblock(lock, -1))) {
-                               lf_move_blocked(block, lock);
-                       }
+               if (lock->lf_flags & F_POSIX) {
+                       error = msleep(lock, &vp->v_lock, priority, lockstr, timeout);
+                       /*
+                        * Ensure that 'lock' doesn't get mutated or freed if a
+                        * wakeup occurs while hunting for deadlocks (and holding
+                        * lf_dead_lock - see above)
+                        */
+                       lck_mtx_lock(&lf_dead_lock);
+                       lck_mtx_unlock(&lf_dead_lock);
+               } else {
+                       static const char lockstr_np[] = "lockf:np";
+                       error = msleep(lock, &vp->v_lock, priority, lockstr_np, timeout);
                 }
  
-               if (error == 0 && (lock->lf_flags & F_ABORT) != 0)
+               if (error == 0 && (lock->lf_flags & F_ABORT) != 0) {
                         error = EBADF;
+               }
  
-               if (error) {    /* XXX */
+               if (lock->lf_next) {
                         /*
-                        * We may have been awakened by a signal and/or by a
-                        * debugger continuing us (in which cases we must remove
-                        * ourselves from the blocked list) and/or by another
-                        * process releasing a lock (in which case we have
-                        * already been removed from the blocked list and our
-                        * lf_next field set to NOLOCKF).
+                        * lf_wakelock() always sets wakelock->lf_next to
+                        * NULL before a wakeup; so we've been woken early
+                        * - perhaps by a debugger, signal or other event.
+                        *
+                        * Remove 'lock' from the block list (avoids double-add
+                        * in the spurious case, which would create a cycle)
                          */
-                       if (lock->lf_next) {
-                               TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
-                               lock->lf_next = NOLOCKF;
+                       TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
+#if IMPORTANCE_INHERITANCE
+                       /*
+                        * Adjust the boost on lf_next.
+                        */
+                       lf_adjust_assertion(lock->lf_next);
+#endif /* IMPORTANCE_INHERITANCE */
+                       lock->lf_next = NULL;
+
+                       if (error == 0) {
+                               /*
+                                * If this was a spurious wakeup, retry
+                                */
+                               printf("%s: spurious wakeup, retrying lock\n",
+                                   __func__);
+                               continue;
+                       }
+               }
+
+               if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
+                       if ((block = lf_getblock(lock, -1)) != NULL) {
+                               lf_move_blocked(block, lock);
+                       }
+               }
+
+               if (error) {
+                       if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
+                               lf_wakelock(lock, TRUE);
                         }
-                       if (!TAILQ_EMPTY(&lock->lf_blkhd))
-                               lf_wakelock(lock, TRUE);
-                         
                         FREE(lock, M_LOCKF);
-                       return (error);
-               }       /* XXX */
+                       /* Return ETIMEDOUT if timeout occoured. */
+                       if (error == EWOULDBLOCK) {
+                               error = ETIMEDOUT;
+                       }
+                       return error;
+               }
         }
+
         /*
          * No blocks!!  Add the lock.  Note that we will
          * downgrade or upgrade any overlapping locks this
@@ -621,8 +822,9 @@ lf_setlock(struct lockf *lock)
         needtolink = 1;
         for (;;) {
                 ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
-               if (ovcase)
+               if (ovcase) {
                         block = overlap->lf_next;
+               }
                 /*
                  * Six cases:
                  *      0) no overlap
@@ -646,8 +848,9 @@ lf_setlock(struct lockf *lock)
                          * able to acquire it.
                          */
                         if (lock->lf_type == F_RDLCK &&
-                           overlap->lf_type == F_WRLCK)
-                               lf_wakelock(overlap, TRUE);
+                           overlap->lf_type == F_WRLCK) {
+                               lf_wakelock(overlap, TRUE);
+                       }
                         overlap->lf_type = lock->lf_type;
                         FREE(lock, M_LOCKF);
                         lock = overlap; /* for lf_coalesce_adjacent() */
@@ -674,7 +877,7 @@ lf_setlock(struct lockf *lock)
                                  */
                                 if (lf_split(overlap, lock)) {
                                         FREE(lock, M_LOCKF);
-                                       return (ENOLCK);
+                                       return ENOLCK;
                                 }
                         }
                         lf_wakelock(overlap, TRUE);
@@ -687,7 +890,7 @@ lf_setlock(struct lockf *lock)
                          */
                         if (lock->lf_type == F_RDLCK &&
                             overlap->lf_type == F_WRLCK) {
-                               lf_wakelock(overlap, TRUE);
+                               lf_wakelock(overlap, TRUE);
                         } else {
                                 while (!TAILQ_EMPTY(&overlap->lf_blkhd)) {
                                         ltmp = TAILQ_FIRST(&overlap->lf_blkhd);
@@ -706,8 +909,9 @@ lf_setlock(struct lockf *lock)
                                 lock->lf_next = overlap->lf_next;
                                 prev = &lock->lf_next;
                                 needtolink = 0;
-                       } else
+                       } else {
                                 *prev = overlap->lf_next;
+                       }
                         FREE(overlap, M_LOCKF);
                         continue;
  
@@ -740,12 +944,12 @@ lf_setlock(struct lockf *lock)
         /* Coalesce adjacent locks with identical attributes */
         lf_coalesce_adjacent(lock);
  #ifdef LOCKF_DEBUGGING
-       if (lockf_debug & 1) {
+       if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
                 lf_print("lf_setlock: got the lock", lock);
                 lf_printlist("lf_setlock(out)", lock);
         }
  #endif /* LOCKF_DEBUGGING */
-       return (0);
+       return 0;
  }
  
  
@@ -773,23 +977,31 @@ lf_clearlock(struct lockf *unlock)
         struct lockf *overlap, **prev;
         overlap_t ovcase;
  
-       if (lf == NOLOCKF)
-               return (0);
+       if (lf == NOLOCKF) {
+               return 0;
+       }
  #ifdef LOCKF_DEBUGGING
-       if (unlock->lf_type != F_UNLCK)
+       if (unlock->lf_type != F_UNLCK) {
                 panic("lf_clearlock: bad type");
-       if (lockf_debug & 1)
+       }
+       if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
                 lf_print("lf_clearlock", unlock);
+       }
  #endif /* LOCKF_DEBUGGING */
         prev = head;
         while ((ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap)) != OVERLAP_NONE) {
                 /*
                  * Wakeup the list of locks to be retried.
                  */
-               lf_wakelock(overlap, FALSE);
+               lf_wakelock(overlap, FALSE);
+#if IMPORTANCE_INHERITANCE
+               if (overlap->lf_boosted == LF_BOOSTED) {
+                       lf_drop_assertion(overlap);
+               }
+#endif /* IMPORTANCE_INHERITANCE */
  
                 switch (ovcase) {
-               case OVERLAP_NONE:      /* satisfy compiler enum/switch */
+               case OVERLAP_NONE:      /* satisfy compiler enum/switch */
                         break;
  
                 case OVERLAP_EQUALS_LOCK:
@@ -806,8 +1018,9 @@ lf_clearlock(struct lockf *unlock)
                          * If we can't split the lock, we can't grant it.
                          * Claim a system limit for the resource shortage.
                          */
-                       if (lf_split(overlap, unlock))
-                               return (ENOLCK);
+                       if (lf_split(overlap, unlock)) {
+                               return ENOLCK;
+                       }
                         overlap->lf_next = unlock->lf_next;
                         break;
  
@@ -830,10 +1043,11 @@ lf_clearlock(struct lockf *unlock)
                 break;
         }
  #ifdef LOCKF_DEBUGGING
-       if (lockf_debug & 1)
+       if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
                 lf_printlist("lf_clearlock", unlock);
+       }
  #endif /* LOCKF_DEBUGGING */
-       return (0);
+       return 0;
  }
  
  
@@ -865,26 +1079,33 @@ lf_getlock(struct lockf *lock, struct flock *fl, pid_t matchpid)
         struct lockf *block;
  
  #ifdef LOCKF_DEBUGGING
-       if (lockf_debug & 1)
+       if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
                 lf_print("lf_getlock", lock);
+       }
  #endif /* LOCKF_DEBUGGING */
  
         if ((block = lf_getblock(lock, matchpid))) {
                 fl->l_type = block->lf_type;
                 fl->l_whence = SEEK_SET;
                 fl->l_start = block->lf_start;
-               if (block->lf_end == -1)
+               if (block->lf_end == -1) {
                         fl->l_len = 0;
-               else
+               } else {
                         fl->l_len = block->lf_end - block->lf_start + 1;
-               if (block->lf_flags & F_POSIX)
-                       fl->l_pid = proc_pid((struct proc *)(block->lf_id));
-               else
+               }
+               if (NULL != block->lf_owner) {
+                       /*
+                        * lf_owner is only non-NULL when the lock
+                        * "owner" can be unambiguously determined
+                        */
+                       fl->l_pid = proc_pid(block->lf_owner);
+               } else {
                         fl->l_pid = -1;
+               }
         } else {
                 fl->l_type = F_UNLCK;
         }
-       return (0);
+       return 0;
  }
  
  /*
@@ -915,19 +1136,23 @@ lf_getblock(struct lockf *lock, pid_t matchpid)
                  * Found an overlap.
                  *
                  * If we're matching pids, and it's a record lock,
+                * or it's an OFD lock on a process-confined fd,
                  * but the pid doesn't match, then keep on looking ..
                  */
                 if (matchpid != -1 &&
-                   (overlap->lf_flags & F_POSIX) != 0 &&
-                   proc_pid((struct proc *)(overlap->lf_id)) != matchpid)
+                   (overlap->lf_flags & (F_POSIX | F_OFD_LOCK)) != 0 &&
+                   proc_pid(overlap->lf_owner) != matchpid) {
                         continue;
+               }
+
                 /*
                  * does it block us?
                  */
-               if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
-                       return (overlap);
+               if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK)) {
+                       return overlap;
+               }
         }
-       return (NOLOCKF);
+       return NOLOCKF;
  }
  
  
@@ -977,30 +1202,32 @@ lf_getblock(struct lockf *lock, pid_t matchpid)
   */
  static overlap_t
  lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
-              struct lockf ***prev, struct lockf **overlap)
+    struct lockf ***prev, struct lockf **overlap)
  {
         off_t start, end;
         int found_self = 0;
  
         *overlap = lf;
-       if (lf == NOLOCKF)
-               return (0);
+       if (lf == NOLOCKF) {
+               return 0;
+       }
  #ifdef LOCKF_DEBUGGING
-       if (lockf_debug & 2)
+       if (LOCKF_DEBUGP(LF_DBG_LIST)) {
                 lf_print("lf_findoverlap: looking for overlap in", lock);
+       }
  #endif /* LOCKF_DEBUGGING */
         start = lock->lf_start;
         end = lock->lf_end;
         while (lf != NOLOCKF) {
                 if (((type & SELF) && lf->lf_id != lock->lf_id) ||
                     ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
-                       /* 
+                       /*
                          * Locks belonging to one process are adjacent on the
                          * list, so if we've found any locks belonging to us,
                          * and we're now seeing something else, then we've
                          * examined all "self" locks.  Note that bailing out
-                        * here is quite important; for coalescing, we assume 
-                        * numerically adjacent locks from the same owner to 
+                        * here is quite important; for coalescing, we assume
+                        * numerically adjacent locks from the same owner to
                          * be adjacent on the list.
                          */
                         if ((type & SELF) && found_self) {
@@ -1017,8 +1244,9 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
                 }
  
  #ifdef LOCKF_DEBUGGING
-               if (lockf_debug & 2)
+               if (LOCKF_DEBUGP(LF_DBG_LIST)) {
                         lf_print("\tchecking", lf);
+               }
  #endif /* LOCKF_DEBUGGING */
                 /*
                  * OK, check for overlap
@@ -1026,48 +1254,49 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
                 if ((lf->lf_end != -1 && start > lf->lf_end) ||
                     (end != -1 && lf->lf_start > end)) {
                         /* Case 0 */
-                       LOCKF_DEBUG(2, "no overlap\n");
+                       LOCKF_DEBUG(LF_DBG_LIST, "no overlap\n");
  
                         /*
-                        * NOTE: assumes that locks for the same process are 
+                        * NOTE: assumes that locks for the same process are
                          * nonintersecting and ordered.
                          */
-                       if ((type & SELF) && end != -1 && lf->lf_start > end)
-                               return (OVERLAP_NONE);
+                       if ((type & SELF) && end != -1 && lf->lf_start > end) {
+                               return OVERLAP_NONE;
+                       }
                         *prev = &lf->lf_next;
                         *overlap = lf = lf->lf_next;
                         continue;
                 }
                 if ((lf->lf_start == start) && (lf->lf_end == end)) {
-                       LOCKF_DEBUG(2, "overlap == lock\n");
-                       return (OVERLAP_EQUALS_LOCK);
+                       LOCKF_DEBUG(LF_DBG_LIST, "overlap == lock\n");
+                       return OVERLAP_EQUALS_LOCK;
                 }
                 if ((lf->lf_start <= start) &&
                     (end != -1) &&
                     ((lf->lf_end >= end) || (lf->lf_end == -1))) {
-                       LOCKF_DEBUG(2, "overlap contains lock\n");
-                       return (OVERLAP_CONTAINS_LOCK);
+                       LOCKF_DEBUG(LF_DBG_LIST, "overlap contains lock\n");
+                       return OVERLAP_CONTAINS_LOCK;
                 }
                 if (start <= lf->lf_start &&
-                          (end == -1 ||
-                          (lf->lf_end != -1 && end >= lf->lf_end))) {
-                       LOCKF_DEBUG(2, "lock contains overlap\n");
-                       return (OVERLAP_CONTAINED_BY_LOCK);
+                   (end == -1 ||
+                   (lf->lf_end != -1 && end >= lf->lf_end))) {
+                       LOCKF_DEBUG(LF_DBG_LIST, "lock contains overlap\n");
+                       return OVERLAP_CONTAINED_BY_LOCK;
                 }
                 if ((lf->lf_start < start) &&
-                       ((lf->lf_end >= start) || (lf->lf_end == -1))) {
-                       LOCKF_DEBUG(2, "overlap starts before lock\n");
-                       return (OVERLAP_STARTS_BEFORE_LOCK);
+                   ((lf->lf_end >= start) || (lf->lf_end == -1))) {
+                       LOCKF_DEBUG(LF_DBG_LIST, "overlap starts before lock\n");
+                       return OVERLAP_STARTS_BEFORE_LOCK;
                 }
                 if ((lf->lf_start > start) &&
-                       (end != -1) &&
-                       ((lf->lf_end > end) || (lf->lf_end == -1))) {
-                       LOCKF_DEBUG(2, "overlap ends after lock\n");
-                       return (OVERLAP_ENDS_AFTER_LOCK);
+                   (end != -1) &&
+                   ((lf->lf_end > end) || (lf->lf_end == -1))) {
+                       LOCKF_DEBUG(LF_DBG_LIST, "overlap ends after lock\n");
+                       return OVERLAP_ENDS_AFTER_LOCK;
                 }
                 panic("lf_findoverlap: default");
         }
-       return (OVERLAP_NONE);
+       return OVERLAP_NONE;
  }
  
  
@@ -1100,32 +1329,33 @@ lf_split(struct lockf *lock1, struct lockf *lock2)
         struct lockf *splitlock;
  
  #ifdef LOCKF_DEBUGGING
-       if (lockf_debug & 2) {
+       if (LOCKF_DEBUGP(LF_DBG_LIST)) {
                 lf_print("lf_split", lock1);
                 lf_print("splitting from", lock2);
         }
  #endif /* LOCKF_DEBUGGING */
         /*
-        * Check to see if spliting into only two pieces.
+        * Check to see if splitting into only two pieces.
          */
         if (lock1->lf_start == lock2->lf_start) {
                 lock1->lf_start = lock2->lf_end + 1;
                 lock2->lf_next = lock1;
-               return (0);
+               return 0;
         }
         if (lock1->lf_end == lock2->lf_end) {
                 lock1->lf_end = lock2->lf_start - 1;
                 lock2->lf_next = lock1->lf_next;
                 lock1->lf_next = lock2;
-               return (0);
+               return 0;
         }
         /*
          * Make a new lock consisting of the last part of
          * the encompassing lock
          */
         MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
-       if (splitlock == NULL)
-               return (ENOLCK);
+       if (splitlock == NULL) {
+               return ENOLCK;
+       }
         bcopy(lock1, splitlock, sizeof *splitlock);
         splitlock->lf_start = lock2->lf_end + 1;
         TAILQ_INIT(&splitlock->lf_blkhd);
@@ -1137,7 +1367,7 @@ lf_split(struct lockf *lock1, struct lockf *lock2)
         lock2->lf_next = splitlock;
         lock1->lf_next = lock2;
  
-       return (0);
+       return 0;
  }
  
  
@@ -1165,8 +1395,9 @@ lf_wakelock(struct lockf *listhead, boolean_t force_all)
         struct lockf *wakelock;
         boolean_t wake_all = TRUE;
  
-       if (force_all == FALSE && (listhead->lf_flags & F_WAKE1_SAFE))
-               wake_all = FALSE;
+       if (force_all == FALSE && (listhead->lf_flags & F_WAKE1_SAFE)) {
+               wake_all = FALSE;
+       }
  
         while (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
                 wakelock = TAILQ_FIRST(&listhead->lf_blkhd);
@@ -1174,8 +1405,9 @@ lf_wakelock(struct lockf *listhead, boolean_t force_all)
  
                 wakelock->lf_next = NOLOCKF;
  #ifdef LOCKF_DEBUGGING
-               if (lockf_debug & 2)
+               if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
                         lf_print("lf_wakelock: awakening", wakelock);
+               }
  #endif /* LOCKF_DEBUGGING */
                 if (wake_all == FALSE) {
                         /*
@@ -1186,22 +1418,29 @@ lf_wakelock(struct lockf *listhead, boolean_t force_all)
                         if (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
                                 TAILQ_CONCAT(&wakelock->lf_blkhd, &listhead->lf_blkhd, lf_block);
  
-                               struct lockf *tlock;
+                               struct lockf *tlock;
  
-                               TAILQ_FOREACH(tlock, &wakelock->lf_blkhd, lf_block) {
-                                       tlock->lf_next = wakelock;
+                               TAILQ_FOREACH(tlock, &wakelock->lf_blkhd, lf_block) {
+                                       if (TAILQ_NEXT(tlock, lf_block) == tlock) {
+                                               /* See rdar://10887303 */
+                                               panic("cycle in wakelock list");
+                                       }
+                                       tlock->lf_next = wakelock;
                                 }
                         }
                 }
                 wakeup(wakelock);
  
-               if (wake_all == FALSE)
-                       break;
+               if (wake_all == FALSE) {
+                       break;
+               }
         }
  }
  
  
  #ifdef LOCKF_DEBUGGING
+#define GET_LF_OWNER_PID(lf)    (proc_pid((lf)->lf_owner))
+
  /*
   * lf_print DEBUG
   *
@@ -1217,27 +1456,34 @@ void
  lf_print(const char *tag, struct lockf *lock)
  {
         printf("%s: lock %p for ", tag, (void *)lock);
-       if (lock->lf_flags & F_POSIX)
-               printf("proc %ld", (long)((struct proc *)lock->lf_id)->p_pid);
-       else
+       if (lock->lf_flags & F_POSIX) {
+               printf("proc %p (owner %d)",
+                   lock->lf_id, GET_LF_OWNER_PID(lock));
+       } else if (lock->lf_flags & F_OFD_LOCK) {
+               printf("fg %p (owner %d)",
+                   lock->lf_id, GET_LF_OWNER_PID(lock));
+       } else {
                 printf("id %p", (void *)lock->lf_id);
-       if (lock->lf_vnode != 0)
+       }
+       if (lock->lf_vnode != 0) {
                 printf(" in vno %p, %s, start 0x%016llx, end 0x%016llx",
                     lock->lf_vnode,
                     lock->lf_type == F_RDLCK ? "shared" :
                     lock->lf_type == F_WRLCK ? "exclusive" :
                     lock->lf_type == F_UNLCK ? "unlock" : "unknown",
                     (intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
-       else
+       } else {
                 printf(" %s, start 0x%016llx, end 0x%016llx",
                     lock->lf_type == F_RDLCK ? "shared" :
                     lock->lf_type == F_WRLCK ? "exclusive" :
                     lock->lf_type == F_UNLCK ? "unlock" : "unknown",
                     (intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
-       if (!TAILQ_EMPTY(&lock->lf_blkhd))
+       }
+       if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
                 printf(" block %p\n", (void *)TAILQ_FIRST(&lock->lf_blkhd));
-       else
+       } else {
                 printf("\n");
+       }
  }
  
  
@@ -1258,18 +1504,23 @@ lf_printlist(const char *tag, struct lockf *lock)
  {
         struct lockf *lf, *blk;
  
-       if (lock->lf_vnode == 0)
+       if (lock->lf_vnode == 0) {
                 return;
+       }
  
         printf("%s: Lock list for vno %p:\n",
             tag, lock->lf_vnode);
         for (lf = lock->lf_vnode->v_lockf; lf; lf = lf->lf_next) {
-               printf("\tlock %p for ",(void *)lf);
-               if (lf->lf_flags & F_POSIX)
-                       printf("proc %ld",
-                           (long)((struct proc *)lf->lf_id)->p_pid);
-               else
+               printf("\tlock %p for ", (void *)lf);
+               if (lf->lf_flags & F_POSIX) {
+                       printf("proc %p (owner %d)",
+                           lf->lf_id, GET_LF_OWNER_PID(lf));
+               } else if (lf->lf_flags & F_OFD_LOCK) {
+                       printf("fg %p (owner %d)",
+                           lf->lf_id, GET_LF_OWNER_PID(lf));
+               } else {
                         printf("id %p", (void *)lf->lf_id);
+               }
                 printf(", %s, start 0x%016llx, end 0x%016llx",
                     lf->lf_type == F_RDLCK ? "shared" :
                     lf->lf_type == F_WRLCK ? "exclusive" :
@@ -1277,21 +1528,172 @@ lf_printlist(const char *tag, struct lockf *lock)
                     "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end);
                 TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
                         printf("\n\t\tlock request %p for ", (void *)blk);
-                       if (blk->lf_flags & F_POSIX)
-                               printf("proc %ld",
-                                   (long)((struct proc *)blk->lf_id)->p_pid);
-                       else
+                       if (blk->lf_flags & F_POSIX) {
+                               printf("proc %p (owner %d)",
+                                   blk->lf_id, GET_LF_OWNER_PID(blk));
+                       } else if (blk->lf_flags & F_OFD_LOCK) {
+                               printf("fg %p (owner %d)",
+                                   blk->lf_id, GET_LF_OWNER_PID(blk));
+                       } else {
                                 printf("id %p", (void *)blk->lf_id);
+                       }
                         printf(", %s, start 0x%016llx, end 0x%016llx",
                             blk->lf_type == F_RDLCK ? "shared" :
                             blk->lf_type == F_WRLCK ? "exclusive" :
                             blk->lf_type == F_UNLCK ? "unlock" :
                             "unknown", (intmax_t)blk->lf_start,
                             (intmax_t)blk->lf_end);
-                       if (!TAILQ_EMPTY(&blk->lf_blkhd))
+                       if (!TAILQ_EMPTY(&blk->lf_blkhd)) {
                                 panic("lf_printlist: bad list");
+                       }
                 }
                 printf("\n");
         }
  }
  #endif /* LOCKF_DEBUGGING */
+
+#if IMPORTANCE_INHERITANCE
+
+/*
+ * lf_hold_assertion
+ *
+ * Call task importance hold assertion on the owner of the lock.
+ *
+ * Parameters: block_task               Owner of the lock blocking
+ *                                      current thread.
+ *
+ *             block                    lock on which the current thread
+ *                                      is blocking on.
+ *
+ * Returns:    <void>
+ *
+ * Notes: The task reference on block_task is not needed to be hold since
+ *        the current thread has vnode lock and block_task has a file
+ *        lock, thus removing file lock in exit requires block_task to
+ *        grab the vnode lock.
+ */
+static void
+lf_hold_assertion(task_t block_task, struct lockf *block)
+{
+       if (task_importance_hold_file_lock_assertion(block_task, 1) == 0) {
+               block->lf_boosted = LF_BOOSTED;
+               LOCKF_DEBUG(LF_DBG_IMPINH,
+                   "lf: importance hold file lock assert on pid %d lock %p\n",
+                   proc_pid(block->lf_owner), block);
+       }
+}
+
+
+/*
+ * lf_jump_to_queue_head
+ *
+ * Jump the lock from the tail of the block queue to the head of
+ * the queue.
+ *
+ * Parameters: block                    lockf struct containing the
+ *                                      block queue.
+ *             lock                     lockf struct to be jumped to the
+ *                                      front.
+ *
+ * Returns:    <void>
+ */
+static void
+lf_jump_to_queue_head(struct lockf *block, struct lockf *lock)
+{
+       /* Move the lock to the head of the block queue. */
+       TAILQ_REMOVE(&block->lf_blkhd, lock, lf_block);
+       TAILQ_INSERT_HEAD(&block->lf_blkhd, lock, lf_block);
+}
+
+
+/*
+ * lf_drop_assertion
+ *
+ * Drops the task hold assertion.
+ *
+ * Parameters: block                    lockf struct holding the assertion.
+ *
+ * Returns:    <void>
+ */
+static void
+lf_drop_assertion(struct lockf *block)
+{
+       LOCKF_DEBUG(LF_DBG_IMPINH, "lf: %d: dropping assertion for lock %p\n",
+           proc_pid(block->lf_owner), block);
+
+       task_t current_task = proc_task(block->lf_owner);
+       task_importance_drop_file_lock_assertion(current_task, 1);
+       block->lf_boosted = LF_NOT_BOOSTED;
+}
+
+/*
+ * lf_adjust_assertion
+ *
+ * Adjusts importance assertion of file lock. Goes through
+ * all the blocking locks and checks if the file lock needs
+ * to be boosted anymore.
+ *
+ * Parameters: block   lockf structure which needs to be adjusted.
+ *
+ * Returns:    <void>
+ */
+static void
+lf_adjust_assertion(struct lockf *block)
+{
+       boolean_t drop_boost = TRUE;
+       struct lockf *next;
+
+       /* Return if the lock is not boosted */
+       if (block->lf_boosted == LF_NOT_BOOSTED) {
+               return;
+       }
+
+       TAILQ_FOREACH(next, &block->lf_blkhd, lf_block) {
+               /* Check if block and next are same type of locks */
+               if (((block->lf_flags & next->lf_flags & F_POSIX) != 0) ||
+                   ((block->lf_flags & next->lf_flags & F_OFD_LOCK) &&
+                   (block->lf_owner != next->lf_owner) &&
+                   (NULL != block->lf_owner && NULL != next->lf_owner))) {
+                       /* Check if next would be boosting block */
+                       if (task_is_importance_donor(proc_task(next->lf_owner)) &&
+                           task_is_importance_receiver_type(proc_task(block->lf_owner))) {
+                               /* Found a lock boosting block */
+                               drop_boost = FALSE;
+                               break;
+                       }
+               }
+       }
+
+       if (drop_boost) {
+               lf_drop_assertion(block);
+       }
+}
+
+static void
+lf_boost_blocking_proc(struct lockf *lock, struct lockf *block)
+{
+       task_t ltask = proc_task(lock->lf_owner);
+       task_t btask = proc_task(block->lf_owner);
+
+       /*
+        * Check if ltask can donate importance. The
+        * check of imp_donor bit is done without holding
+        * any lock. The value may change after you read it,
+        * but it is ok to boost a task while someone else is
+        * unboosting you.
+        *
+        * TODO: Support live inheritance on file locks.
+        */
+       if (task_is_importance_donor(ltask)) {
+               LOCKF_DEBUG(LF_DBG_IMPINH,
+                   "lf: %d: attempt to boost pid %d that holds lock %p\n",
+                   proc_pid(lock->lf_owner), proc_pid(block->lf_owner), block);
+
+               if (block->lf_boosted != LF_BOOSTED &&
+                   task_is_importance_receiver_type(btask)) {
+                       lf_hold_assertion(btask, block);
+               }
+               lf_jump_to_queue_head(block, lock);
+       }
+}
+#endif /* IMPORTANCE_INHERITANCE */