/*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2019 Apple Computer, Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
+ *
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
- *
+ *
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
+ *
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
- *
+ *
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
#include <sys/malloc.h>
#include <sys/fcntl.h>
#include <sys/lockf.h>
+#include <sys/sdt.h>
+#include <kern/policy_internal.h>
-/*
- * This variable controls the maximum number of processes that will
- * be checked in doing deadlock detection.
- */
-static int maxlockdepth = MAXDEPTH;
+#include <sys/file_internal.h>
+
+#if (DEVELOPMENT || DEBUG)
+#define LOCKF_DEBUGGING 1
+#endif
#ifdef LOCKF_DEBUGGING
#include <sys/sysctl.h>
-#include <ufs/ufs/quota.h>
-#include <ufs/ufs/inode.h>
void lf_print(const char *tag, struct lockf *lock);
void lf_printlist(const char *tag, struct lockf *lock);
-static int lockf_debug = 2;
+
+#define LF_DBG_LOCKOP (1 << 0) /* setlk, getlk, clearlk */
+#define LF_DBG_LIST (1 << 1) /* split, coalesce */
+#define LF_DBG_IMPINH (1 << 2) /* importance inheritance */
+#define LF_DBG_TRACE (1 << 3) /* errors, exit */
+#define LF_DBG_DEADLOCK (1 << 4) /* deadlock detection */
+
+static int lockf_debug = 0; /* was 2, could be 3 ;-) */
SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &lockf_debug, 0, "");
/*
- * If there is no mask bit selector, or there is on, and the selector is
+ * If there is no mask bit selector, or there is one, and the selector is
* set, then output the debugging diagnostic.
*/
-#define LOCKF_DEBUG(mask, ...) \
- do { \
- if( !(mask) || ((mask) & lockf_debug)) { \
- printf(__VA_ARGS__); \
- } \
+#define LOCKF_DEBUG(mask, ...) \
+ do { \
+ if (!(mask) || ((mask) & lockf_debug)) { \
+ printf("%s>", __FUNCTION__); \
+ printf(__VA_ARGS__); \
+ } \
} while(0)
-#else /* !LOCKF_DEBUGGING */
-#define LOCKF_DEBUG(mask, ...) /* mask */
-#endif /* !LOCKF_DEBUGGING */
+
+#define LOCKF_DEBUGP(mask) \
+ ({ \
+ ((mask) & lockf_debug); \
+ })
+#else /* !LOCKF_DEBUGGING */
+#define LOCKF_DEBUG(mask, ...) /* mask */
+#endif /* !LOCKF_DEBUGGING */
MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
#define NOLOCKF (struct lockf *)0
-#define SELF 0x1
-#define OTHERS 0x2
-#define OFF_MAX 0x7fffffffffffffffULL /* max off_t */
+#define SELF 0x1
+#define OTHERS 0x2
+#define OFF_MAX 0x7fffffffffffffffULL /* max off_t */
/*
* Overlapping lock states
OVERLAP_ENDS_AFTER_LOCK
} overlap_t;
-static int lf_clearlock(struct lockf *);
+static int lf_clearlock(struct lockf *);
static overlap_t lf_findoverlap(struct lockf *,
- struct lockf *, int, struct lockf ***, struct lockf **);
-static struct lockf *lf_getblock(struct lockf *);
-static int lf_getlock(struct lockf *, struct flock *);
-#if CONFIG_EMBEDDED
-static int lf_getlockpid(struct vnode *, struct flock *);
-#endif
-static int lf_setlock(struct lockf *);
-static int lf_split(struct lockf *, struct lockf *);
-static void lf_wakelock(struct lockf *, boolean_t);
+ struct lockf *, int, struct lockf ***, struct lockf **);
+static struct lockf *lf_getblock(struct lockf *, pid_t);
+static int lf_getlock(struct lockf *, struct flock *, pid_t);
+static int lf_setlock(struct lockf *, struct timespec *);
+static int lf_split(struct lockf *, struct lockf *);
+static void lf_wakelock(struct lockf *, boolean_t);
+#if IMPORTANCE_INHERITANCE
+static void lf_hold_assertion(task_t, struct lockf *);
+static void lf_jump_to_queue_head(struct lockf *, struct lockf *);
+static void lf_drop_assertion(struct lockf *);
+static void lf_boost_blocking_proc(struct lockf *, struct lockf *);
+static void lf_adjust_assertion(struct lockf *block);
+#endif /* IMPORTANCE_INHERITANCE */
+
+static lck_mtx_t lf_dead_lock;
+static lck_grp_t *lf_dead_lock_grp;
+
+void
+lf_init(void)
+{
+ lf_dead_lock_grp = lck_grp_alloc_init("lf_dead_lock", LCK_GRP_ATTR_NULL);
+ lck_mtx_init(&lf_dead_lock, lf_dead_lock_grp, LCK_ATTR_NULL);
+}
/*
* lf_advlock
* lf_setlock:EDEADLK
* lf_setlock:EINTR
* lf_setlock:ENOLCK
+ * lf_setlock:ETIMEDOUT
* lf_clearlock:ENOLCK
* vnode_size:???
*
/* XXX HFS may need a !vnode_isreg(vp) EISDIR error here */
-#if CONFIG_EMBEDDED
- if (ap->a_op == F_GETLKPID)
- return lf_getlockpid(vp, fl);
-#endif
-
/*
* Avoid the common case of unlocking when inode has no locks.
*/
if (*head == (struct lockf *)0) {
if (ap->a_op != F_SETLK) {
fl->l_type = F_UNLCK;
- LOCKF_DEBUG(0, "lf_advlock: '%s' unlock without lock\n", vfs_context_proc(context)->p_comm);
- return (0);
+ LOCKF_DEBUG(LF_DBG_TRACE,
+ "lf_advlock: '%s' unlock without lock\n",
+ vfs_context_proc(context)->p_comm);
+ return 0;
}
}
* Convert the flock structure into a start and end.
*/
switch (fl->l_whence) {
-
case SEEK_SET:
case SEEK_CUR:
/*
* do this because we will use size to force range checks.
*/
if ((error = vnode_size(vp, (off_t *)&size, context))) {
- LOCKF_DEBUG(0, "lf_advlock: vnode_getattr failed: %d\n", error);
- return (error);
+ LOCKF_DEBUG(LF_DBG_TRACE,
+ "lf_advlock: vnode_getattr failed: %d\n", error);
+ return error;
}
if (size > OFF_MAX ||
(fl->l_start > 0 &&
- size > (u_quad_t)(OFF_MAX - fl->l_start)))
- return (EOVERFLOW);
+ size > (u_quad_t)(OFF_MAX - fl->l_start))) {
+ return EOVERFLOW;
+ }
start = size + fl->l_start;
break;
default:
- LOCKF_DEBUG(0, "lf_advlock: unknown whence %d\n", fl->l_whence);
- return (EINVAL);
+ LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: unknown whence %d\n",
+ fl->l_whence);
+ return EINVAL;
}
if (start < 0) {
- LOCKF_DEBUG(0, "lf_advlock: start < 0 (%qd)\n", start);
- return (EINVAL);
+ LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: start < 0 (%qd)\n",
+ start);
+ return EINVAL;
}
if (fl->l_len < 0) {
if (start == 0) {
- LOCKF_DEBUG(0, "lf_advlock: len < 0 & start == 0\n");
- return (EINVAL);
+ LOCKF_DEBUG(LF_DBG_TRACE,
+ "lf_advlock: len < 0 & start == 0\n");
+ return EINVAL;
}
end = start - 1;
start += fl->l_len;
if (start < 0) {
- LOCKF_DEBUG(0, "lf_advlock: start < 0 (%qd)\n", start);
- return (EINVAL);
+ LOCKF_DEBUG(LF_DBG_TRACE,
+ "lf_advlock: start < 0 (%qd)\n", start);
+ return EINVAL;
}
- } else if (fl->l_len == 0)
+ } else if (fl->l_len == 0) {
end = -1;
- else {
+ } else {
oadd = fl->l_len - 1;
if (oadd > (off_t)(OFF_MAX - start)) {
- LOCKF_DEBUG(0, "lf_advlock: overflow\n");
- return (EOVERFLOW);
+ LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: overflow\n");
+ return EOVERFLOW;
}
end = start + oadd;
}
* Create the lockf structure
*/
MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
- if (lock == NULL)
- return (ENOLCK);
+ if (lock == NULL) {
+ return ENOLCK;
+ }
lock->lf_start = start;
lock->lf_end = end;
lock->lf_id = ap->a_id;
lock->lf_next = (struct lockf *)0;
TAILQ_INIT(&lock->lf_blkhd);
lock->lf_flags = ap->a_flags;
+#if IMPORTANCE_INHERITANCE
+ lock->lf_boosted = LF_NOT_BOOSTED;
+#endif
+ if (ap->a_flags & F_POSIX) {
+ lock->lf_owner = (struct proc *)lock->lf_id;
+ } else {
+ lock->lf_owner = NULL;
+ }
- if (ap->a_flags & F_FLOCK)
- lock->lf_flags |= F_WAKE1_SAFE;
+ if (ap->a_flags & F_FLOCK) {
+ lock->lf_flags |= F_WAKE1_SAFE;
+ }
- lck_mtx_lock(&vp->v_lock); /* protect the lockf list */
+ lck_mtx_lock(&vp->v_lock); /* protect the lockf list */
/*
* Do the requested operation.
*/
- switch(ap->a_op) {
+ switch (ap->a_op) {
case F_SETLK:
- error = lf_setlock(lock);
+ /*
+ * For F_OFD_* locks, lf_id is the fileglob.
+ * Record an "lf_owner" iff this is a confined fd
+ * i.e. it cannot escape this process and will be
+ * F_UNLCKed before the owner exits. (This is
+ * the implicit guarantee needed to ensure lf_owner
+ * remains a valid reference here.)
+ */
+ if (ap->a_flags & F_OFD_LOCK) {
+ struct fileglob *fg = (void *)lock->lf_id;
+ if (fg->fg_lflags & FG_CONFINED) {
+ lock->lf_owner = current_proc();
+ }
+ }
+ error = lf_setlock(lock, ap->a_timeout);
break;
case F_UNLCK:
break;
case F_GETLK:
- error = lf_getlock(lock, fl);
+ error = lf_getlock(lock, fl, -1);
+ FREE(lock, M_LOCKF);
+ break;
+
+ case F_GETLKPID:
+ error = lf_getlock(lock, fl, fl->l_pid);
FREE(lock, M_LOCKF);
break;
error = EINVAL;
break;
}
- lck_mtx_unlock(&vp->v_lock); /* done manipulating the list */
+ lck_mtx_unlock(&vp->v_lock); /* done manipulating the list */
- LOCKF_DEBUG(0, "lf_advlock: normal exit: %d\n\n", error);
- return (error);
+ LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: normal exit: %d\n", error);
+ return error;
}
+/*
+ * Empty the queue of msleeping requests for a lock on the given vnode.
+ * Called with the vnode already locked. Used for forced unmount, where
+ * a flock(2) invoker sleeping on a blocked lock holds an iocount reference
+ * that prevents the vnode from ever being drained. Force unmounting wins.
+ */
+void
+lf_abort_advlocks(vnode_t vp)
+{
+ struct lockf *lock;
+
+ if ((lock = vp->v_lockf) == NULL) {
+ return;
+ }
+
+ lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED);
+
+ if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
+ struct lockf *tlock;
+
+ TAILQ_FOREACH(tlock, &lock->lf_blkhd, lf_block) {
+ /*
+ * Setting this flag should cause all
+ * currently blocked F_SETLK request to
+ * return to userland with an errno.
+ */
+ tlock->lf_flags |= F_ABORT;
+ }
+ lf_wakelock(lock, TRUE);
+ }
+}
/*
* Take any lock attempts which are currently blocked by a given lock ("from")
}
/*
- * NOTE: Assumes that if two locks are adjacent on the number line
+ * NOTE: Assumes that if two locks are adjacent on the number line
* and belong to the same owner, then they are adjacent on the list.
*/
-
- /* If the lock ends adjacent to us, we can coelesce it */
if ((*lf)->lf_end != -1 &&
((*lf)->lf_end + 1) == lock->lf_start) {
struct lockf *adjacent = *lf;
- LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent previous\n");
+ LOCKF_DEBUG(LF_DBG_LIST, "lf_coalesce_adjacent: coalesce adjacent previous\n");
lock->lf_start = (*lf)->lf_start;
*lf = lock;
lf = &(*lf)->lf_next;
(lock->lf_end + 1) == (*lf)->lf_start) {
struct lockf *adjacent = *lf;
- LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent following\n");
+ LOCKF_DEBUG(LF_DBG_LIST, "lf_coalesce_adjacent: coalesce adjacent following\n");
lock->lf_end = (*lf)->lf_end;
lock->lf_next = (*lf)->lf_next;
lf = &lock->lf_next;
}
}
-
/*
* lf_setlock
*
* the set is successful, and freed if the
* set is unsuccessful.
*
+ * timeout Timeout specified in the case of
+ * SETLKWTIMEOUT.
+ *
* Returns: 0 Success
* EAGAIN
* EDEADLK
* lf_split:ENOLCK
* lf_clearlock:ENOLCK
* msleep:EINTR
+ * msleep:ETIMEDOUT
*
* Notes: We add the lock to the provisional lock list. We do not
* coalesce at this time; this has implications for other lock
* requestors in the blocker search mechanism.
*/
static int
-lf_setlock(struct lockf *lock)
+lf_setlock(struct lockf *lock, struct timespec *timeout)
{
struct lockf *block;
struct lockf **head = lock->lf_head;
struct lockf **prev, *overlap, *ltmp;
- static char lockstr[] = "lockf";
+ static const char lockstr[] = "lockf";
int priority, needtolink, error;
struct vnode *vp = lock->lf_vnode;
overlap_t ovcase;
#ifdef LOCKF_DEBUGGING
- if (lockf_debug & 1) {
+ if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
lf_print("lf_setlock", lock);
lf_printlist("lf_setlock(in)", lock);
}
#endif /* LOCKF_DEBUGGING */
+ LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p Looking for deadlock, vnode %p\n", lock, lock->lf_vnode);
/*
* Set the priority
*/
priority = PLOCK;
- if (lock->lf_type == F_WRLCK)
+ if (lock->lf_type == F_WRLCK) {
priority += 4;
+ }
priority |= PCATCH;
+scan:
/*
* Scan lock list for this file looking for locks that would block us.
*/
- while ((block = lf_getblock(lock))) {
+ while ((block = lf_getblock(lock, -1))) {
/*
* Free the structure and return if nonblocking.
*/
if ((lock->lf_flags & F_WAIT) == 0) {
+ DTRACE_FSINFO(advlock__nowait, vnode_t, vp);
FREE(lock, M_LOCKF);
- return (EAGAIN);
+ return EAGAIN;
}
+ LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p found blocking lock %p\n", lock, block);
+
/*
* We are blocked. Since flock style locks cover
* the whole file, there is no chance for deadlock.
- * For byte-range locks we must check for deadlock.
+ *
+ * OFD byte-range locks currently do NOT support
+ * deadlock detection.
+ *
+ * For POSIX byte-range locks we must check for deadlock.
*
* Deadlock detection is done by looking through the
* wait channels to see if there are any cycles that
- * involve us. MAXDEPTH is set just to make sure we
- * do not go off into neverland.
+ * involve us.
*/
if ((lock->lf_flags & F_POSIX) &&
(block->lf_flags & F_POSIX)) {
- struct proc *wproc, *bproc;
- struct uthread *ut;
- struct lockf *waitblock;
- int i = 0;
+ lck_mtx_lock(&lf_dead_lock);
- /* The block is waiting on something */
- wproc = (struct proc *)block->lf_id;
+ /* The blocked process is waiting on something */
+ struct proc *wproc = block->lf_owner;
proc_lock(wproc);
+
+ LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p owned by pid %d\n", lock, proc_pid(wproc));
+
+ struct uthread *ut;
TAILQ_FOREACH(ut, &wproc->p_uthlist, uu_list) {
/*
- * While the thread is asleep (uu_wchan != 0)
- * in this code (uu_wmesg == lockstr)
- * and we have not exceeded the maximum cycle
- * depth (i < maxlockdepth), then check for a
- * cycle to see if the lock is blocked behind
+ * If the thread is (a) asleep (uu_wchan != 0)
+ * and (b) in this code (uu_wmesg == lockstr)
+ * then check to see if the lock is blocked behind
* someone blocked behind us.
+ *
+ * Note: (i) vp->v_lock is held, preventing other
+ * threads from mutating the blocking list for our vnode.
+ * and (ii) the proc_lock is held i.e the thread list
+ * is stable.
+ *
+ * HOWEVER some thread in wproc might be sleeping on a lockf
+ * structure for a different vnode, and be woken at any
+ * time. Thus the waitblock list could mutate while
+ * it's being inspected by this thread, and what
+ * ut->uu_wchan was just pointing at could even be freed.
+ *
+ * Nevertheless this is safe here because of lf_dead_lock; if
+ * any thread blocked with uu_wmesg == lockstr wakes (see below)
+ * it will try to acquire lf_dead_lock which is already held
+ * here. Holding that lock prevents the lockf structure being
+ * pointed at by ut->uu_wchan from going away. Thus the vnode
+ * involved can be found and locked, and the corresponding
+ * blocking chain can then be examined safely.
*/
- while (((waitblock = (struct lockf *)ut->uu_wchan) != NULL) &&
- ut->uu_wmesg == lockstr &&
- (i++ < maxlockdepth)) {
- waitblock = (struct lockf *)ut->uu_wchan;
+ const struct lockf *waitblock = (const void *)ut->uu_wchan;
+ if ((waitblock != NULL) && (ut->uu_wmesg == lockstr)) {
+ LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is also blocked on lock %p vnode %p\n", lock, waitblock, waitblock->lf_vnode);
+
+ vnode_t othervp = NULL;
+ if (waitblock->lf_vnode != vp) {
+ /*
+ * This thread in wproc is waiting for a lock
+ * on a different vnode; grab the lock on it
+ * that protects lf_next while we examine it.
+ */
+ othervp = waitblock->lf_vnode;
+ if (!lck_mtx_try_lock(&othervp->v_lock)) {
+ /*
+ * avoid kernel deadlock: drop all
+ * locks, pause for a bit to let the
+ * other thread do what it needs to do,
+ * then (because we drop and retake
+ * v_lock) retry the scan.
+ */
+ proc_unlock(wproc);
+ lck_mtx_unlock(&lf_dead_lock);
+ static struct timespec ts = {
+ .tv_sec = 0,
+ .tv_nsec = 2 * NSEC_PER_MSEC,
+ };
+ static const char pausestr[] = "lockf:pause";
+ (void) msleep(lock, &vp->v_lock, priority, pausestr, &ts);
+ LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p contention for vp %p => restart\n", lock, othervp);
+ goto scan;
+ }
+ }
+
/*
* Get the lock blocking the lock
* which would block us, and make
- * certain it hasn't come unblocked
+ * certain it hasn't become unblocked
* (been granted, e.g. between the time
* we called lf_getblock, and the time
* we successfully acquired the
* proc_lock).
*/
- waitblock = waitblock->lf_next;
- if (waitblock == NULL)
- break;
+ const struct lockf *nextblock = waitblock->lf_next;
+ if (nextblock == NULL) {
+ if (othervp) {
+ lck_mtx_unlock(&othervp->v_lock);
+ }
+ LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p with waitblock %p and no lf_next; othervp %p\n", lock, waitblock, othervp);
+ continue;
+ }
+ LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is also blocked on lock %p vnode %p\n", lock, nextblock, nextblock->lf_vnode);
/*
* Make sure it's an advisory range
- * lock and not an overall file lock;
+ * lock and not any other kind of lock;
* if we mix lock types, it's our own
* fault.
*/
- if ((waitblock->lf_flags & F_POSIX) == 0)
- break;
+ if ((nextblock->lf_flags & F_POSIX) == 0) {
+ if (othervp) {
+ lck_mtx_unlock(&othervp->v_lock);
+ }
+ continue;
+ }
/*
* If the owner of the lock that's
* getting the requested lock, then we
* would deadlock, so error out.
*/
- bproc = (struct proc *)waitblock->lf_id;
- if (bproc == (struct proc *)lock->lf_id) {
+ struct proc *bproc = nextblock->lf_owner;
+ const boolean_t deadlocked = bproc == lock->lf_owner;
+
+ if (othervp) {
+ lck_mtx_unlock(&othervp->v_lock);
+ }
+ LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p owned by pid %d\n", lock, proc_pid(bproc));
+ if (deadlocked) {
+ LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is me, so EDEADLK\n", lock);
proc_unlock(wproc);
+ lck_mtx_unlock(&lf_dead_lock);
FREE(lock, M_LOCKF);
- return (EDEADLK);
+ return EDEADLK;
}
}
+ LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p bottom of thread loop\n", lock);
}
proc_unlock(wproc);
+ lck_mtx_unlock(&lf_dead_lock);
}
/*
lock->lf_type = F_UNLCK;
if ((error = lf_clearlock(lock)) != 0) {
FREE(lock, M_LOCKF);
- return (error);
+ return error;
}
lock->lf_type = F_WRLCK;
}
lock->lf_next = block;
TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
- if ( !(lock->lf_flags & F_FLOCK))
- block->lf_flags &= ~F_WAKE1_SAFE;
+ if (!(lock->lf_flags & F_FLOCK)) {
+ block->lf_flags &= ~F_WAKE1_SAFE;
+ }
+
+#if IMPORTANCE_INHERITANCE
+ /*
+ * Importance donation is done only for cases where the
+ * owning task can be unambiguously determined.
+ *
+ * POSIX type locks are not inherited by child processes;
+ * we maintain a 1:1 mapping between a lock and its owning
+ * process.
+ *
+ * Flock type locks are inherited across fork() and there is
+ * no 1:1 mapping in the general case. However, the fileglobs
+ * used by OFD locks *may* be confined to the process that
+ * created them, and thus have an "owner", in which case
+ * we also attempt importance donation.
+ */
+ if ((lock->lf_flags & block->lf_flags & F_POSIX) != 0) {
+ lf_boost_blocking_proc(lock, block);
+ } else if ((lock->lf_flags & block->lf_flags & F_OFD_LOCK) &&
+ lock->lf_owner != block->lf_owner &&
+ NULL != lock->lf_owner && NULL != block->lf_owner) {
+ lf_boost_blocking_proc(lock, block);
+ }
+#endif /* IMPORTANCE_INHERITANCE */
#ifdef LOCKF_DEBUGGING
- if (lockf_debug & 1) {
+ if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
lf_print("lf_setlock: blocking on", block);
lf_printlist("lf_setlock(block)", block);
}
#endif /* LOCKF_DEBUGGING */
- error = msleep(lock, &vp->v_lock, priority, lockstr, 0);
+ DTRACE_FSINFO(advlock__wait, vnode_t, vp);
+
+ if (lock->lf_flags & F_POSIX) {
+ error = msleep(lock, &vp->v_lock, priority, lockstr, timeout);
+ /*
+ * Ensure that 'lock' doesn't get mutated or freed if a
+ * wakeup occurs while hunting for deadlocks (and holding
+ * lf_dead_lock - see above)
+ */
+ lck_mtx_lock(&lf_dead_lock);
+ lck_mtx_unlock(&lf_dead_lock);
+ } else {
+ static const char lockstr_np[] = "lockf:np";
+ error = msleep(lock, &vp->v_lock, priority, lockstr_np, timeout);
+ }
+
+ if (error == 0 && (lock->lf_flags & F_ABORT) != 0) {
+ error = EBADF;
+ }
+
+ if (lock->lf_next) {
+ /*
+ * lf_wakelock() always sets wakelock->lf_next to
+ * NULL before a wakeup; so we've been woken early
+ * - perhaps by a debugger, signal or other event.
+ *
+ * Remove 'lock' from the block list (avoids double-add
+ * in the spurious case, which would create a cycle)
+ */
+ TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
+#if IMPORTANCE_INHERITANCE
+ /*
+ * Adjust the boost on lf_next.
+ */
+ lf_adjust_assertion(lock->lf_next);
+#endif /* IMPORTANCE_INHERITANCE */
+ lock->lf_next = NULL;
+
+ if (error == 0) {
+ /*
+ * If this was a spurious wakeup, retry
+ */
+ printf("%s: spurious wakeup, retrying lock\n",
+ __func__);
+ continue;
+ }
+ }
if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
- if ((block = lf_getblock(lock))) {
+ if ((block = lf_getblock(lock, -1)) != NULL) {
lf_move_blocked(block, lock);
}
}
- if (error) { /* XXX */
- /*
- * We may have been awakened by a signal and/or by a
- * debugger continuing us (in which cases we must remove
- * ourselves from the blocked list) and/or by another
- * process releasing a lock (in which case we have
- * already been removed from the blocked list and our
- * lf_next field set to NOLOCKF).
- */
- if (lock->lf_next) {
- TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
- lock->lf_next = NOLOCKF;
+
+ if (error) {
+ if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
+ lf_wakelock(lock, TRUE);
}
- if (!TAILQ_EMPTY(&lock->lf_blkhd))
- lf_wakelock(lock, TRUE);
-
FREE(lock, M_LOCKF);
- return (error);
- } /* XXX */
+ /* Return ETIMEDOUT if timeout occoured. */
+ if (error == EWOULDBLOCK) {
+ error = ETIMEDOUT;
+ }
+ return error;
+ }
}
+
/*
* No blocks!! Add the lock. Note that we will
* downgrade or upgrade any overlapping locks this
needtolink = 1;
for (;;) {
ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
- if (ovcase)
+ if (ovcase) {
block = overlap->lf_next;
+ }
/*
* Six cases:
* 0) no overlap
* able to acquire it.
*/
if (lock->lf_type == F_RDLCK &&
- overlap->lf_type == F_WRLCK)
- lf_wakelock(overlap, TRUE);
+ overlap->lf_type == F_WRLCK) {
+ lf_wakelock(overlap, TRUE);
+ }
overlap->lf_type = lock->lf_type;
FREE(lock, M_LOCKF);
lock = overlap; /* for lf_coalesce_adjacent() */
*/
if (lf_split(overlap, lock)) {
FREE(lock, M_LOCKF);
- return (ENOLCK);
+ return ENOLCK;
}
}
lf_wakelock(overlap, TRUE);
*/
if (lock->lf_type == F_RDLCK &&
overlap->lf_type == F_WRLCK) {
- lf_wakelock(overlap, TRUE);
+ lf_wakelock(overlap, TRUE);
} else {
while (!TAILQ_EMPTY(&overlap->lf_blkhd)) {
ltmp = TAILQ_FIRST(&overlap->lf_blkhd);
lock->lf_next = overlap->lf_next;
prev = &lock->lf_next;
needtolink = 0;
- } else
+ } else {
*prev = overlap->lf_next;
+ }
FREE(overlap, M_LOCKF);
continue;
/* Coalesce adjacent locks with identical attributes */
lf_coalesce_adjacent(lock);
#ifdef LOCKF_DEBUGGING
- if (lockf_debug & 1) {
+ if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
lf_print("lf_setlock: got the lock", lock);
lf_printlist("lf_setlock(out)", lock);
}
#endif /* LOCKF_DEBUGGING */
- return (0);
+ return 0;
}
struct lockf *overlap, **prev;
overlap_t ovcase;
- if (lf == NOLOCKF)
- return (0);
+ if (lf == NOLOCKF) {
+ return 0;
+ }
#ifdef LOCKF_DEBUGGING
- if (unlock->lf_type != F_UNLCK)
+ if (unlock->lf_type != F_UNLCK) {
panic("lf_clearlock: bad type");
- if (lockf_debug & 1)
+ }
+ if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
lf_print("lf_clearlock", unlock);
+ }
#endif /* LOCKF_DEBUGGING */
prev = head;
while ((ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap)) != OVERLAP_NONE) {
/*
* Wakeup the list of locks to be retried.
*/
- lf_wakelock(overlap, FALSE);
+ lf_wakelock(overlap, FALSE);
+#if IMPORTANCE_INHERITANCE
+ if (overlap->lf_boosted == LF_BOOSTED) {
+ lf_drop_assertion(overlap);
+ }
+#endif /* IMPORTANCE_INHERITANCE */
switch (ovcase) {
- case OVERLAP_NONE: /* satisfy compiler enum/switch */
+ case OVERLAP_NONE: /* satisfy compiler enum/switch */
break;
case OVERLAP_EQUALS_LOCK:
* If we can't split the lock, we can't grant it.
* Claim a system limit for the resource shortage.
*/
- if (lf_split(overlap, unlock))
- return (ENOLCK);
+ if (lf_split(overlap, unlock)) {
+ return ENOLCK;
+ }
overlap->lf_next = unlock->lf_next;
break;
break;
}
#ifdef LOCKF_DEBUGGING
- if (lockf_debug & 1)
+ if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
lf_printlist("lf_clearlock", unlock);
+ }
#endif /* LOCKF_DEBUGGING */
- return (0);
+ return 0;
}
* fl Pointer to flock structure to receive
* the blocking lock information, if a
* blocking lock is found.
+ * matchpid -1, or pid value to match in lookup.
*
* Returns: 0 Success
*
* the blocking process ID for advisory record locks.
*/
static int
-lf_getlock(struct lockf *lock, struct flock *fl)
+lf_getlock(struct lockf *lock, struct flock *fl, pid_t matchpid)
{
struct lockf *block;
#ifdef LOCKF_DEBUGGING
- if (lockf_debug & 1)
+ if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
lf_print("lf_getlock", lock);
+ }
#endif /* LOCKF_DEBUGGING */
- if ((block = lf_getblock(lock))) {
+ if ((block = lf_getblock(lock, matchpid))) {
fl->l_type = block->lf_type;
fl->l_whence = SEEK_SET;
fl->l_start = block->lf_start;
- if (block->lf_end == -1)
+ if (block->lf_end == -1) {
fl->l_len = 0;
- else
+ } else {
fl->l_len = block->lf_end - block->lf_start + 1;
- if (block->lf_flags & F_POSIX)
- fl->l_pid = proc_pid((struct proc *)(block->lf_id));
- else
+ }
+ if (NULL != block->lf_owner) {
+ /*
+ * lf_owner is only non-NULL when the lock
+ * "owner" can be unambiguously determined
+ */
+ fl->l_pid = proc_pid(block->lf_owner);
+ } else {
fl->l_pid = -1;
+ }
} else {
fl->l_type = F_UNLCK;
}
- return (0);
+ return 0;
}
-#if CONFIG_EMBEDDED
-int lf_getlockpid(struct vnode *vp, struct flock *fl)
-{
- struct lockf *lf, *blk;
-
- if (vp == 0)
- return EINVAL;
-
- fl->l_type = F_UNLCK;
-
- lck_mtx_lock(&vp->v_lock);
-
- for (lf = vp->v_lockf; lf; lf = lf->lf_next) {
-
- if (lf->lf_flags & F_POSIX) {
- if ((((struct proc *)lf->lf_id)->p_pid) == fl->l_pid) {
- fl->l_type = lf->lf_type;
- fl->l_whence = SEEK_SET;
- fl->l_start = lf->lf_start;
- if (lf->lf_end == -1)
- fl->l_len = 0;
- else
- fl->l_len = lf->lf_end - lf->lf_start + 1;
-
- break;
- }
- }
-
- TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
- if (blk->lf_flags & F_POSIX) {
- if ((((struct proc *)blk->lf_id)->p_pid) == fl->l_pid) {
- fl->l_type = blk->lf_type;
- fl->l_whence = SEEK_SET;
- fl->l_start = blk->lf_start;
- if (blk->lf_end == -1)
- fl->l_len = 0;
- else
- fl->l_len = blk->lf_end - blk->lf_start + 1;
-
- break;
- }
- }
- }
- }
-
- lck_mtx_unlock(&vp->v_lock);
- return (0);
-}
-#endif
-
/*
* lf_getblock
*
*
* Parameters: lock The lock for which we are interested
* in obtaining the blocking lock, if any
+ * matchpid -1, or pid value to match in lookup.
*
* Returns: NOLOCKF No blocking lock exists
* !NOLOCKF The address of the blocking lock's
* struct lockf.
*/
static struct lockf *
-lf_getblock(struct lockf *lock)
+lf_getblock(struct lockf *lock, pid_t matchpid)
{
struct lockf **prev, *overlap, *lf = *(lock->lf_head);
- int ovcase;
- prev = lock->lf_head;
- while ((ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap)) != OVERLAP_NONE) {
+ for (prev = lock->lf_head;
+ lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != OVERLAP_NONE;
+ lf = overlap->lf_next) {
/*
- * We've found an overlap, see if it blocks us
+ * Found an overlap.
+ *
+ * If we're matching pids, and it's a record lock,
+ * or it's an OFD lock on a process-confined fd,
+ * but the pid doesn't match, then keep on looking ..
*/
- if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
- return (overlap);
+ if (matchpid != -1 &&
+ (overlap->lf_flags & (F_POSIX | F_OFD_LOCK)) != 0 &&
+ proc_pid(overlap->lf_owner) != matchpid) {
+ continue;
+ }
+
/*
- * Nope, point to the next one on the list and
- * see if it blocks us
+ * does it block us?
*/
- lf = overlap->lf_next;
+ if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK)) {
+ return overlap;
+ }
}
- return (NOLOCKF);
+ return NOLOCKF;
}
* this is generally used to relink the
* lock list, avoiding a second iteration.
* *overlap The pointer to the overlapping lock
- * itself; this is ussed to return data in
+ * itself; this is used to return data in
* the check == OTHERS case, and for the
* caller to modify the overlapping lock,
* in the check == SELF case
*/
static overlap_t
lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
- struct lockf ***prev, struct lockf **overlap)
+ struct lockf ***prev, struct lockf **overlap)
{
off_t start, end;
int found_self = 0;
*overlap = lf;
- if (lf == NOLOCKF)
- return (0);
+ if (lf == NOLOCKF) {
+ return 0;
+ }
#ifdef LOCKF_DEBUGGING
- if (lockf_debug & 2)
+ if (LOCKF_DEBUGP(LF_DBG_LIST)) {
lf_print("lf_findoverlap: looking for overlap in", lock);
+ }
#endif /* LOCKF_DEBUGGING */
start = lock->lf_start;
end = lock->lf_end;
while (lf != NOLOCKF) {
if (((type & SELF) && lf->lf_id != lock->lf_id) ||
((type & OTHERS) && lf->lf_id == lock->lf_id)) {
- /*
+ /*
* Locks belonging to one process are adjacent on the
* list, so if we've found any locks belonging to us,
* and we're now seeing something else, then we've
* examined all "self" locks. Note that bailing out
- * here is quite important; for coalescing, we assume
- * numerically adjacent locks from the same owner to
+ * here is quite important; for coalescing, we assume
+ * numerically adjacent locks from the same owner to
* be adjacent on the list.
*/
if ((type & SELF) && found_self) {
}
#ifdef LOCKF_DEBUGGING
- if (lockf_debug & 2)
+ if (LOCKF_DEBUGP(LF_DBG_LIST)) {
lf_print("\tchecking", lf);
+ }
#endif /* LOCKF_DEBUGGING */
/*
* OK, check for overlap
if ((lf->lf_end != -1 && start > lf->lf_end) ||
(end != -1 && lf->lf_start > end)) {
/* Case 0 */
- LOCKF_DEBUG(2, "no overlap\n");
+ LOCKF_DEBUG(LF_DBG_LIST, "no overlap\n");
/*
- * NOTE: assumes that locks for the same process are
+ * NOTE: assumes that locks for the same process are
* nonintersecting and ordered.
*/
- if ((type & SELF) && end != -1 && lf->lf_start > end)
- return (OVERLAP_NONE);
+ if ((type & SELF) && end != -1 && lf->lf_start > end) {
+ return OVERLAP_NONE;
+ }
*prev = &lf->lf_next;
*overlap = lf = lf->lf_next;
continue;
}
if ((lf->lf_start == start) && (lf->lf_end == end)) {
- LOCKF_DEBUG(2, "overlap == lock\n");
- return (OVERLAP_EQUALS_LOCK);
+ LOCKF_DEBUG(LF_DBG_LIST, "overlap == lock\n");
+ return OVERLAP_EQUALS_LOCK;
}
if ((lf->lf_start <= start) &&
(end != -1) &&
((lf->lf_end >= end) || (lf->lf_end == -1))) {
- LOCKF_DEBUG(2, "overlap contains lock\n");
- return (OVERLAP_CONTAINS_LOCK);
+ LOCKF_DEBUG(LF_DBG_LIST, "overlap contains lock\n");
+ return OVERLAP_CONTAINS_LOCK;
}
if (start <= lf->lf_start &&
- (end == -1 ||
- (lf->lf_end != -1 && end >= lf->lf_end))) {
- LOCKF_DEBUG(2, "lock contains overlap\n");
- return (OVERLAP_CONTAINED_BY_LOCK);
+ (end == -1 ||
+ (lf->lf_end != -1 && end >= lf->lf_end))) {
+ LOCKF_DEBUG(LF_DBG_LIST, "lock contains overlap\n");
+ return OVERLAP_CONTAINED_BY_LOCK;
}
if ((lf->lf_start < start) &&
- ((lf->lf_end >= start) || (lf->lf_end == -1))) {
- LOCKF_DEBUG(2, "overlap starts before lock\n");
- return (OVERLAP_STARTS_BEFORE_LOCK);
+ ((lf->lf_end >= start) || (lf->lf_end == -1))) {
+ LOCKF_DEBUG(LF_DBG_LIST, "overlap starts before lock\n");
+ return OVERLAP_STARTS_BEFORE_LOCK;
}
if ((lf->lf_start > start) &&
- (end != -1) &&
- ((lf->lf_end > end) || (lf->lf_end == -1))) {
- LOCKF_DEBUG(2, "overlap ends after lock\n");
- return (OVERLAP_ENDS_AFTER_LOCK);
+ (end != -1) &&
+ ((lf->lf_end > end) || (lf->lf_end == -1))) {
+ LOCKF_DEBUG(LF_DBG_LIST, "overlap ends after lock\n");
+ return OVERLAP_ENDS_AFTER_LOCK;
}
panic("lf_findoverlap: default");
}
- return (OVERLAP_NONE);
+ return OVERLAP_NONE;
}
struct lockf *splitlock;
#ifdef LOCKF_DEBUGGING
- if (lockf_debug & 2) {
+ if (LOCKF_DEBUGP(LF_DBG_LIST)) {
lf_print("lf_split", lock1);
lf_print("splitting from", lock2);
}
#endif /* LOCKF_DEBUGGING */
/*
- * Check to see if spliting into only two pieces.
+ * Check to see if splitting into only two pieces.
*/
if (lock1->lf_start == lock2->lf_start) {
lock1->lf_start = lock2->lf_end + 1;
lock2->lf_next = lock1;
- return (0);
+ return 0;
}
if (lock1->lf_end == lock2->lf_end) {
lock1->lf_end = lock2->lf_start - 1;
lock2->lf_next = lock1->lf_next;
lock1->lf_next = lock2;
- return (0);
+ return 0;
}
/*
* Make a new lock consisting of the last part of
* the encompassing lock
*/
MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
- if (splitlock == NULL)
- return (ENOLCK);
+ if (splitlock == NULL) {
+ return ENOLCK;
+ }
bcopy(lock1, splitlock, sizeof *splitlock);
splitlock->lf_start = lock2->lf_end + 1;
TAILQ_INIT(&splitlock->lf_blkhd);
lock2->lf_next = splitlock;
lock1->lf_next = lock2;
- return (0);
+ return 0;
}
struct lockf *wakelock;
boolean_t wake_all = TRUE;
- if (force_all == FALSE && (listhead->lf_flags & F_WAKE1_SAFE))
- wake_all = FALSE;
+ if (force_all == FALSE && (listhead->lf_flags & F_WAKE1_SAFE)) {
+ wake_all = FALSE;
+ }
while (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
wakelock = TAILQ_FIRST(&listhead->lf_blkhd);
wakelock->lf_next = NOLOCKF;
#ifdef LOCKF_DEBUGGING
- if (lockf_debug & 2)
+ if (LOCKF_DEBUGP(LF_DBG_LOCKOP)) {
lf_print("lf_wakelock: awakening", wakelock);
+ }
#endif /* LOCKF_DEBUGGING */
if (wake_all == FALSE) {
/*
if (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
TAILQ_CONCAT(&wakelock->lf_blkhd, &listhead->lf_blkhd, lf_block);
- struct lockf *tlock;
+ struct lockf *tlock;
- TAILQ_FOREACH(tlock, &wakelock->lf_blkhd, lf_block) {
- tlock->lf_next = wakelock;
+ TAILQ_FOREACH(tlock, &wakelock->lf_blkhd, lf_block) {
+ if (TAILQ_NEXT(tlock, lf_block) == tlock) {
+ /* See rdar://10887303 */
+ panic("cycle in wakelock list");
+ }
+ tlock->lf_next = wakelock;
}
}
}
wakeup(wakelock);
- if (wake_all == FALSE)
- break;
+ if (wake_all == FALSE) {
+ break;
+ }
}
}
#ifdef LOCKF_DEBUGGING
+#define GET_LF_OWNER_PID(lf) (proc_pid((lf)->lf_owner))
+
/*
* lf_print DEBUG
*
lf_print(const char *tag, struct lockf *lock)
{
printf("%s: lock %p for ", tag, (void *)lock);
- if (lock->lf_flags & F_POSIX)
- printf("proc %ld", (long)((struct proc *)lock->lf_id)->p_pid);
- else
+ if (lock->lf_flags & F_POSIX) {
+ printf("proc %p (owner %d)",
+ lock->lf_id, GET_LF_OWNER_PID(lock));
+ } else if (lock->lf_flags & F_OFD_LOCK) {
+ printf("fg %p (owner %d)",
+ lock->lf_id, GET_LF_OWNER_PID(lock));
+ } else {
printf("id %p", (void *)lock->lf_id);
- if (lock->lf_vnode != 0)
+ }
+ if (lock->lf_vnode != 0) {
printf(" in vno %p, %s, start 0x%016llx, end 0x%016llx",
lock->lf_vnode,
lock->lf_type == F_RDLCK ? "shared" :
lock->lf_type == F_WRLCK ? "exclusive" :
lock->lf_type == F_UNLCK ? "unlock" : "unknown",
(intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
- else
+ } else {
printf(" %s, start 0x%016llx, end 0x%016llx",
lock->lf_type == F_RDLCK ? "shared" :
lock->lf_type == F_WRLCK ? "exclusive" :
lock->lf_type == F_UNLCK ? "unlock" : "unknown",
(intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
- if (!TAILQ_EMPTY(&lock->lf_blkhd))
+ }
+ if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
printf(" block %p\n", (void *)TAILQ_FIRST(&lock->lf_blkhd));
- else
+ } else {
printf("\n");
+ }
}
{
struct lockf *lf, *blk;
- if (lock->lf_vnode == 0)
+ if (lock->lf_vnode == 0) {
return;
+ }
printf("%s: Lock list for vno %p:\n",
tag, lock->lf_vnode);
for (lf = lock->lf_vnode->v_lockf; lf; lf = lf->lf_next) {
- printf("\tlock %p for ",(void *)lf);
- if (lf->lf_flags & F_POSIX)
- printf("proc %ld",
- (long)((struct proc *)lf->lf_id)->p_pid);
- else
+ printf("\tlock %p for ", (void *)lf);
+ if (lf->lf_flags & F_POSIX) {
+ printf("proc %p (owner %d)",
+ lf->lf_id, GET_LF_OWNER_PID(lf));
+ } else if (lf->lf_flags & F_OFD_LOCK) {
+ printf("fg %p (owner %d)",
+ lf->lf_id, GET_LF_OWNER_PID(lf));
+ } else {
printf("id %p", (void *)lf->lf_id);
+ }
printf(", %s, start 0x%016llx, end 0x%016llx",
lf->lf_type == F_RDLCK ? "shared" :
lf->lf_type == F_WRLCK ? "exclusive" :
"unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end);
TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
printf("\n\t\tlock request %p for ", (void *)blk);
- if (blk->lf_flags & F_POSIX)
- printf("proc %ld",
- (long)((struct proc *)blk->lf_id)->p_pid);
- else
+ if (blk->lf_flags & F_POSIX) {
+ printf("proc %p (owner %d)",
+ blk->lf_id, GET_LF_OWNER_PID(blk));
+ } else if (blk->lf_flags & F_OFD_LOCK) {
+ printf("fg %p (owner %d)",
+ blk->lf_id, GET_LF_OWNER_PID(blk));
+ } else {
printf("id %p", (void *)blk->lf_id);
+ }
printf(", %s, start 0x%016llx, end 0x%016llx",
blk->lf_type == F_RDLCK ? "shared" :
blk->lf_type == F_WRLCK ? "exclusive" :
blk->lf_type == F_UNLCK ? "unlock" :
"unknown", (intmax_t)blk->lf_start,
(intmax_t)blk->lf_end);
- if (!TAILQ_EMPTY(&blk->lf_blkhd))
+ if (!TAILQ_EMPTY(&blk->lf_blkhd)) {
panic("lf_printlist: bad list");
+ }
}
printf("\n");
}
}
#endif /* LOCKF_DEBUGGING */
+
+#if IMPORTANCE_INHERITANCE
+
+/*
+ * lf_hold_assertion
+ *
+ * Call task importance hold assertion on the owner of the lock.
+ *
+ * Parameters: block_task Owner of the lock blocking
+ * current thread.
+ *
+ * block lock on which the current thread
+ * is blocking on.
+ *
+ * Returns: <void>
+ *
+ * Notes: The task reference on block_task is not needed to be hold since
+ * the current thread has vnode lock and block_task has a file
+ * lock, thus removing file lock in exit requires block_task to
+ * grab the vnode lock.
+ */
+static void
+lf_hold_assertion(task_t block_task, struct lockf *block)
+{
+ if (task_importance_hold_file_lock_assertion(block_task, 1) == 0) {
+ block->lf_boosted = LF_BOOSTED;
+ LOCKF_DEBUG(LF_DBG_IMPINH,
+ "lf: importance hold file lock assert on pid %d lock %p\n",
+ proc_pid(block->lf_owner), block);
+ }
+}
+
+
+/*
+ * lf_jump_to_queue_head
+ *
+ * Jump the lock from the tail of the block queue to the head of
+ * the queue.
+ *
+ * Parameters: block lockf struct containing the
+ * block queue.
+ * lock lockf struct to be jumped to the
+ * front.
+ *
+ * Returns: <void>
+ */
+static void
+lf_jump_to_queue_head(struct lockf *block, struct lockf *lock)
+{
+ /* Move the lock to the head of the block queue. */
+ TAILQ_REMOVE(&block->lf_blkhd, lock, lf_block);
+ TAILQ_INSERT_HEAD(&block->lf_blkhd, lock, lf_block);
+}
+
+
+/*
+ * lf_drop_assertion
+ *
+ * Drops the task hold assertion.
+ *
+ * Parameters: block lockf struct holding the assertion.
+ *
+ * Returns: <void>
+ */
+static void
+lf_drop_assertion(struct lockf *block)
+{
+ LOCKF_DEBUG(LF_DBG_IMPINH, "lf: %d: dropping assertion for lock %p\n",
+ proc_pid(block->lf_owner), block);
+
+ task_t current_task = proc_task(block->lf_owner);
+ task_importance_drop_file_lock_assertion(current_task, 1);
+ block->lf_boosted = LF_NOT_BOOSTED;
+}
+
+/*
+ * lf_adjust_assertion
+ *
+ * Adjusts importance assertion of file lock. Goes through
+ * all the blocking locks and checks if the file lock needs
+ * to be boosted anymore.
+ *
+ * Parameters: block lockf structure which needs to be adjusted.
+ *
+ * Returns: <void>
+ */
+static void
+lf_adjust_assertion(struct lockf *block)
+{
+ boolean_t drop_boost = TRUE;
+ struct lockf *next;
+
+ /* Return if the lock is not boosted */
+ if (block->lf_boosted == LF_NOT_BOOSTED) {
+ return;
+ }
+
+ TAILQ_FOREACH(next, &block->lf_blkhd, lf_block) {
+ /* Check if block and next are same type of locks */
+ if (((block->lf_flags & next->lf_flags & F_POSIX) != 0) ||
+ ((block->lf_flags & next->lf_flags & F_OFD_LOCK) &&
+ (block->lf_owner != next->lf_owner) &&
+ (NULL != block->lf_owner && NULL != next->lf_owner))) {
+ /* Check if next would be boosting block */
+ if (task_is_importance_donor(proc_task(next->lf_owner)) &&
+ task_is_importance_receiver_type(proc_task(block->lf_owner))) {
+ /* Found a lock boosting block */
+ drop_boost = FALSE;
+ break;
+ }
+ }
+ }
+
+ if (drop_boost) {
+ lf_drop_assertion(block);
+ }
+}
+
+static void
+lf_boost_blocking_proc(struct lockf *lock, struct lockf *block)
+{
+ task_t ltask = proc_task(lock->lf_owner);
+ task_t btask = proc_task(block->lf_owner);
+
+ /*
+ * Check if ltask can donate importance. The
+ * check of imp_donor bit is done without holding
+ * any lock. The value may change after you read it,
+ * but it is ok to boost a task while someone else is
+ * unboosting you.
+ *
+ * TODO: Support live inheritance on file locks.
+ */
+ if (task_is_importance_donor(ltask)) {
+ LOCKF_DEBUG(LF_DBG_IMPINH,
+ "lf: %d: attempt to boost pid %d that holds lock %p\n",
+ proc_pid(lock->lf_owner), proc_pid(block->lf_owner), block);
+
+ if (block->lf_boosted != LF_BOOSTED &&
+ task_is_importance_receiver_type(btask)) {
+ lf_hold_assertion(btask, block);
+ }
+ lf_jump_to_queue_head(block, lock);
+ }
+}
+#endif /* IMPORTANCE_INHERITANCE */