+unsigned
+int
+(hw_lock_bit_to)(hw_lock_bit_t * lock, unsigned int bit, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp))
+{
+ _disable_preemption();
+ return hw_lock_bit_to_internal(lock, bit, timeout LCK_GRP_ARG(grp));
+}
+
+#if __SMP__
+static unsigned int NOINLINE
+hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp))
+{
+ uint64_t end = 0;
+ int i;
+#if CONFIG_DTRACE || LOCK_STATS
+ uint64_t begin = 0;
+ boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
+#endif /* CONFIG_DTRACE || LOCK_STATS */
+
+#if LOCK_STATS || CONFIG_DTRACE
+ if (__improbable(stat_enabled)) {
+ begin = mach_absolute_time();
+ }
+#endif /* LOCK_STATS || CONFIG_DTRACE */
+ for (;;) {
+ for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
+ // Always load-exclusive before wfe
+ // This grabs the monitor and wakes up on a release event
+ if (hw_atomic_test_and_set32(lock, mask, mask, memory_order_acquire, TRUE)) {
+ goto end;
+ }
+ }
+ if (end == 0) {
+ end = ml_get_timebase() + timeout;
+ } else if (ml_get_timebase() >= end) {
+ break;
+ }
+ }
+ return 0;
+end:
+#if CONFIG_DTRACE || LOCK_STATS
+ if (__improbable(stat_enabled)) {
+ lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp), mach_absolute_time() - begin);
+ }
+ lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
+#endif /* CONFIG_DTRACE || LCK_GRP_STAT */
+
+ return 1;
+}
+#endif // __SMP__
+
+void
+(hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
+{
+ if (hw_lock_bit_to(lock, bit, LOCK_PANIC_TIMEOUT, LCK_GRP_PROBEARG(grp))) {
+ return;
+ }
+#if __SMP__
+ panic("hw_lock_bit(): timed out (%p)", lock);
+#else
+ panic("hw_lock_bit(): interlock held (%p)", lock);
+#endif
+}
+
+void
+(hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
+{
+ if (__improbable(get_preemption_level() == 0)) {
+ panic("Attempt to take no-preempt bitlock %p in preemptible context", lock);
+ }
+ if (hw_lock_bit_to_internal(lock, bit, LOCK_PANIC_TIMEOUT LCK_GRP_ARG(grp))) {
+ return;
+ }
+#if __SMP__
+ panic("hw_lock_bit_nopreempt(): timed out (%p)", lock);
+#else
+ panic("hw_lock_bit_nopreempt(): interlock held (%p)", lock);
+#endif
+}
+
+unsigned
+int
+(hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
+{
+ uint32_t mask = (1 << bit);
+#if !__SMP__
+ uint32_t state;
+#endif
+ boolean_t success = FALSE;
+
+ _disable_preemption();
+#if __SMP__
+ // TODO: consider weak (non-looping) atomic test-and-set
+ success = hw_atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE);
+#else
+ state = ordered_load_bit(lock);
+ if (!(mask & state)) {
+ ordered_store_bit(lock, state | mask);
+ success = TRUE;
+ }
+#endif // __SMP__
+ if (!success) {
+ _enable_preemption();
+ }
+
+ if (success) {
+ lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
+ }
+
+ return success;
+}
+
+static inline void
+hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
+{
+ uint32_t mask = (1 << bit);
+#if !__SMP__
+ uint32_t state;
+#endif
+
+#if __SMP__
+ os_atomic_andnot(lock, mask, release);
+#if __arm__
+ set_event();
+#endif
+#else // __SMP__
+ state = ordered_load_bit(lock);
+ ordered_store_bit(lock, state & ~mask);
+#endif // __SMP__
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
+#endif
+}
+
+/*
+ * Routine: hw_unlock_bit
+ *
+ * Release spin-lock. The second parameter is the bit number to test and set.
+ * Decrement the preemption level.
+ */
+void
+hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
+{
+ hw_unlock_bit_internal(lock, bit);
+ _enable_preemption();
+}
+
+void
+hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
+{
+ if (__improbable(get_preemption_level() == 0)) {
+ panic("Attempt to release no-preempt bitlock %p in preemptible context", lock);
+ }
+ hw_unlock_bit_internal(lock, bit);
+}
+
+/*
+ * Routine: lck_spin_sleep
+ */
+wait_result_t
+lck_spin_sleep_grp(
+ lck_spin_t *lck,
+ lck_sleep_action_t lck_sleep_action,
+ event_t event,
+ wait_interrupt_t interruptible,
+ lck_grp_t *grp)
+{
+ wait_result_t res;
+
+ if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
+ panic("Invalid lock sleep action %x\n", lck_sleep_action);
+ }
+
+ res = assert_wait(event, interruptible);
+ if (res == THREAD_WAITING) {
+ lck_spin_unlock(lck);
+ res = thread_block(THREAD_CONTINUE_NULL);
+ if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
+ lck_spin_lock_grp(lck, grp);
+ }
+ } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
+ lck_spin_unlock(lck);
+ }
+
+ return res;
+}
+
+wait_result_t
+lck_spin_sleep(
+ lck_spin_t *lck,
+ lck_sleep_action_t lck_sleep_action,
+ event_t event,
+ wait_interrupt_t interruptible)
+{
+ return lck_spin_sleep_grp(lck, lck_sleep_action, event, interruptible, LCK_GRP_NULL);
+}
+
+/*
+ * Routine: lck_spin_sleep_deadline
+ */
+wait_result_t
+lck_spin_sleep_deadline(
+ lck_spin_t *lck,
+ lck_sleep_action_t lck_sleep_action,
+ event_t event,
+ wait_interrupt_t interruptible,
+ uint64_t deadline)
+{
+ wait_result_t res;
+
+ if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
+ panic("Invalid lock sleep action %x\n", lck_sleep_action);
+ }
+
+ res = assert_wait_deadline(event, interruptible, deadline);
+ if (res == THREAD_WAITING) {
+ lck_spin_unlock(lck);
+ res = thread_block(THREAD_CONTINUE_NULL);
+ if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
+ lck_spin_lock(lck);
+ }
+ } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
+ lck_spin_unlock(lck);
+ }
+
+ return res;
+}
+
+/*
+ * Routine: lck_mtx_sleep
+ */
+wait_result_t
+lck_mtx_sleep(
+ lck_mtx_t *lck,
+ lck_sleep_action_t lck_sleep_action,
+ event_t event,
+ wait_interrupt_t interruptible)
+{
+ wait_result_t res;
+ thread_t thread = current_thread();
+
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
+ VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
+
+ if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
+ panic("Invalid lock sleep action %x\n", lck_sleep_action);
+ }
+
+ if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
+ /*
+ * We overload the RW lock promotion to give us a priority ceiling
+ * during the time that this thread is asleep, so that when it
+ * is re-awakened (and not yet contending on the mutex), it is
+ * runnable at a reasonably high priority.
+ */
+ thread->rwlock_count++;
+ }
+
+ res = assert_wait(event, interruptible);
+ if (res == THREAD_WAITING) {
+ lck_mtx_unlock(lck);
+ res = thread_block(THREAD_CONTINUE_NULL);
+ if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
+ if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
+ lck_mtx_lock_spin(lck);
+ } else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS)) {
+ lck_mtx_lock_spin_always(lck);
+ } else {
+ lck_mtx_lock(lck);
+ }
+ }
+ } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
+ lck_mtx_unlock(lck);
+ }
+
+ if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
+ if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
+ /* sched_flags checked without lock, but will be rechecked while clearing */
+ lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
+ }
+ }
+
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
+
+ return res;
+}
+
+
+/*
+ * Routine: lck_mtx_sleep_deadline
+ */
+wait_result_t
+lck_mtx_sleep_deadline(
+ lck_mtx_t *lck,
+ lck_sleep_action_t lck_sleep_action,
+ event_t event,
+ wait_interrupt_t interruptible,
+ uint64_t deadline)
+{
+ wait_result_t res;
+ thread_t thread = current_thread();
+
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
+ VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
+
+ if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
+ panic("Invalid lock sleep action %x\n", lck_sleep_action);
+ }
+
+ if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
+ /*
+ * See lck_mtx_sleep().
+ */
+ thread->rwlock_count++;
+ }
+
+ res = assert_wait_deadline(event, interruptible, deadline);
+ if (res == THREAD_WAITING) {
+ lck_mtx_unlock(lck);
+ res = thread_block(THREAD_CONTINUE_NULL);
+ if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
+ if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
+ lck_mtx_lock_spin(lck);
+ } else {
+ lck_mtx_lock(lck);
+ }
+ }
+ } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
+ lck_mtx_unlock(lck);
+ }
+
+ if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
+ if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
+ /* sched_flags checked without lock, but will be rechecked while clearing */
+ lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
+ }
+ }
+
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
+
+ return res;
+}
+
+/*
+ * Lock Boosting Invariants:
+ *
+ * The lock owner is always promoted to the max priority of all its waiters.
+ * Max priority is capped at MAXPRI_PROMOTE.
+ *
+ * The last waiter is not given a promotion when it wakes up or acquires the lock.
+ * When the last waiter is waking up, a new contender can always come in and
+ * steal the lock without having to wait for the last waiter to make forward progress.
+ */
+
+/*
+ * Routine: lck_mtx_lock_wait
+ *
+ * Invoked in order to wait on contention.
+ *
+ * Called with the interlock locked and
+ * returns it unlocked.
+ *
+ * Always aggressively sets the owning thread to promoted,
+ * even if it's the same or higher priority
+ * This prevents it from lowering its own priority while holding a lock
+ *
+ * TODO: Come up with a more efficient way to handle same-priority promotions
+ * <rdar://problem/30737670> ARM mutex contention logic could avoid taking the thread lock
+ */
+void
+lck_mtx_lock_wait(
+ lck_mtx_t *lck,
+ thread_t holder,
+ struct turnstile **ts)
+{
+ thread_t thread = current_thread();
+ lck_mtx_t *mutex;
+ __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
+
+#if CONFIG_DTRACE
+ uint64_t sleep_start = 0;
+
+ if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
+ sleep_start = mach_absolute_time();
+ }
+#endif
+
+ if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
+ mutex = lck;
+ } else {
+ mutex = &lck->lck_mtx_ptr->lck_mtx;
+ }
+
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
+ trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
+
+ assert(thread->waiting_for_mutex == NULL);
+ thread->waiting_for_mutex = mutex;
+ mutex->lck_mtx_waiters++;
+
+ if (*ts == NULL) {
+ *ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
+ }
+
+ struct turnstile *turnstile = *ts;
+ thread_set_pending_block_hint(thread, kThreadWaitKernelMutex);
+ turnstile_update_inheritor(turnstile, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
+
+ waitq_assert_wait64(&turnstile->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
+
+ lck_mtx_ilk_unlock(mutex);
+
+ turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
+
+ thread_block(THREAD_CONTINUE_NULL);
+
+ thread->waiting_for_mutex = NULL;
+
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
+#if CONFIG_DTRACE
+ /*
+ * Record the DTrace lockstat probe for blocking, block time
+ * measured from when we were entered.
+ */
+ if (sleep_start) {
+ if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
+ LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
+ mach_absolute_time() - sleep_start);
+ } else {
+ LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
+ mach_absolute_time() - sleep_start);
+ }
+ }
+#endif
+}
+
+/*
+ * Routine: lck_mtx_lock_acquire
+ *
+ * Invoked on acquiring the mutex when there is
+ * contention.
+ *
+ * Returns the current number of waiters.
+ *
+ * Called with the interlock locked.
+ */
+int
+lck_mtx_lock_acquire(
+ lck_mtx_t *lck,
+ struct turnstile *ts)
+{
+ thread_t thread = current_thread();
+ lck_mtx_t *mutex;
+
+ if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
+ mutex = lck;
+ } else {
+ mutex = &lck->lck_mtx_ptr->lck_mtx;
+ }
+
+ assert(thread->waiting_for_mutex == NULL);
+
+ if (mutex->lck_mtx_waiters > 0) {
+ if (ts == NULL) {
+ ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
+ }
+
+ turnstile_update_inheritor(ts, thread, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
+ turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
+ }
+
+ if (ts != NULL) {
+ turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
+ }
+
+ return mutex->lck_mtx_waiters;
+}
+
+/*
+ * Routine: lck_mtx_unlock_wakeup
+ *
+ * Invoked on unlock when there is contention.
+ *
+ * Called with the interlock locked.
+ *
+ * NOTE: callers should call turnstile_clenup after
+ * dropping the interlock.
+ */
+boolean_t
+lck_mtx_unlock_wakeup(
+ lck_mtx_t *lck,
+ thread_t holder)
+{
+ thread_t thread = current_thread();
+ lck_mtx_t *mutex;
+ __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
+ struct turnstile *ts;
+ kern_return_t did_wake;
+
+ if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
+ mutex = lck;
+ } else {
+ mutex = &lck->lck_mtx_ptr->lck_mtx;
+ }
+
+ if (thread != holder) {
+ panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);
+ }
+
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START,
+ trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
+
+ assert(mutex->lck_mtx_waiters > 0);
+ assert(thread->waiting_for_mutex == NULL);
+
+ ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
+
+ if (mutex->lck_mtx_waiters > 1) {
+ /* WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor on the wokenup thread */
+ did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
+ } else {
+ did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
+ turnstile_update_inheritor(ts, NULL, TURNSTILE_IMMEDIATE_UPDATE);
+ }
+ assert(did_wake == KERN_SUCCESS);
+
+ turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
+ turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
+
+ mutex->lck_mtx_waiters--;
+
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
+
+ return mutex->lck_mtx_waiters > 0;
+}
+
+/*
+ * Routine: mutex_pause
+ *
+ * Called by former callers of simple_lock_pause().
+ */
+#define MAX_COLLISION_COUNTS 32
+#define MAX_COLLISION 8
+
+unsigned int max_collision_count[MAX_COLLISION_COUNTS];
+
+uint32_t collision_backoffs[MAX_COLLISION] = {
+ 10, 50, 100, 200, 400, 600, 800, 1000
+};
+
+
+void
+mutex_pause(uint32_t collisions)
+{
+ wait_result_t wait_result;
+ uint32_t back_off;
+
+ if (collisions >= MAX_COLLISION_COUNTS) {
+ collisions = MAX_COLLISION_COUNTS - 1;
+ }
+ max_collision_count[collisions]++;
+
+ if (collisions >= MAX_COLLISION) {
+ collisions = MAX_COLLISION - 1;
+ }
+ back_off = collision_backoffs[collisions];
+
+ wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC);
+ assert(wait_result == THREAD_WAITING);
+
+ wait_result = thread_block(THREAD_CONTINUE_NULL);
+ assert(wait_result == THREAD_TIMED_OUT);
+}
+
+
+unsigned int mutex_yield_wait = 0;
+unsigned int mutex_yield_no_wait = 0;
+
+void
+lck_mtx_yield(
+ lck_mtx_t *lck)
+{
+ int waiters;
+
+#if DEBUG
+ lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
+#endif /* DEBUG */
+
+ if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT) {
+ waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters;
+ } else {
+ waiters = lck->lck_mtx_waiters;
+ }
+
+ if (!waiters) {
+ mutex_yield_no_wait++;
+ } else {
+ mutex_yield_wait++;
+ lck_mtx_unlock(lck);
+ mutex_pause(0);
+ lck_mtx_lock(lck);
+ }
+}
+
+
+/*
+ * Routine: lck_rw_sleep
+ */
+wait_result_t
+lck_rw_sleep(
+ lck_rw_t *lck,
+ lck_sleep_action_t lck_sleep_action,
+ event_t event,
+ wait_interrupt_t interruptible)
+{
+ wait_result_t res;
+ lck_rw_type_t lck_rw_type;
+ thread_t thread = current_thread();
+
+ if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
+ panic("Invalid lock sleep action %x\n", lck_sleep_action);
+ }
+
+ if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
+ /*
+ * Although we are dropping the RW lock, the intent in most cases
+ * is that this thread remains as an observer, since it may hold
+ * some secondary resource, but must yield to avoid deadlock. In
+ * this situation, make sure that the thread is boosted to the
+ * RW lock ceiling while blocked, so that it can re-acquire the
+ * RW lock at that priority.
+ */
+ thread->rwlock_count++;
+ }
+
+ res = assert_wait(event, interruptible);
+ if (res == THREAD_WAITING) {
+ lck_rw_type = lck_rw_done(lck);
+ res = thread_block(THREAD_CONTINUE_NULL);
+ if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
+ if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
+ lck_rw_lock(lck, lck_rw_type);
+ } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
+ lck_rw_lock_exclusive(lck);
+ } else {
+ lck_rw_lock_shared(lck);
+ }
+ }
+ } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
+ (void)lck_rw_done(lck);
+ }
+
+ if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
+ if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
+ /* sched_flags checked without lock, but will be rechecked while clearing */
+
+ /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
+ assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
+
+ lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
+ }
+ }
+
+ return res;
+}
+
+
+/*
+ * Routine: lck_rw_sleep_deadline
+ */
+wait_result_t
+lck_rw_sleep_deadline(
+ lck_rw_t *lck,
+ lck_sleep_action_t lck_sleep_action,
+ event_t event,
+ wait_interrupt_t interruptible,
+ uint64_t deadline)
+{
+ wait_result_t res;
+ lck_rw_type_t lck_rw_type;
+ thread_t thread = current_thread();
+
+ if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
+ panic("Invalid lock sleep action %x\n", lck_sleep_action);
+ }
+
+ if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
+ thread->rwlock_count++;
+ }
+
+ res = assert_wait_deadline(event, interruptible, deadline);
+ if (res == THREAD_WAITING) {
+ lck_rw_type = lck_rw_done(lck);
+ res = thread_block(THREAD_CONTINUE_NULL);
+ if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
+ if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
+ lck_rw_lock(lck, lck_rw_type);
+ } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
+ lck_rw_lock_exclusive(lck);
+ } else {
+ lck_rw_lock_shared(lck);
+ }
+ }
+ } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
+ (void)lck_rw_done(lck);
+ }
+
+ if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
+ if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
+ /* sched_flags checked without lock, but will be rechecked while clearing */
+
+ /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
+ assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
+
+ lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
+ }
+ }
+
+ return res;
+}
+
+/*
+ * Reader-writer lock promotion
+ *
+ * We support a limited form of reader-writer
+ * lock promotion whose effects are:
+ *
+ * * Qualifying threads have decay disabled
+ * * Scheduler priority is reset to a floor of
+ * of their statically assigned priority
+ * or MINPRI_RWLOCK
+ *
+ * The rationale is that lck_rw_ts do not have
+ * a single owner, so we cannot apply a directed
+ * priority boost from all waiting threads
+ * to all holding threads without maintaining
+ * lists of all shared owners and all waiting
+ * threads for every lock.
+ *
+ * Instead (and to preserve the uncontended fast-
+ * path), acquiring (or attempting to acquire)
+ * a RW lock in shared or exclusive lock increments
+ * a per-thread counter. Only if that thread stops
+ * making forward progress (for instance blocking
+ * on a mutex, or being preempted) do we consult
+ * the counter and apply the priority floor.
+ * When the thread becomes runnable again (or in
+ * the case of preemption it never stopped being
+ * runnable), it has the priority boost and should
+ * be in a good position to run on the CPU and
+ * release all RW locks (at which point the priority
+ * boost is cleared).
+ *
+ * Care must be taken to ensure that priority
+ * boosts are not retained indefinitely, since unlike
+ * mutex priority boosts (where the boost is tied
+ * to the mutex lifecycle), the boost is tied
+ * to the thread and independent of any particular
+ * lck_rw_t. Assertions are in place on return
+ * to userspace so that the boost is not held
+ * indefinitely.
+ *
+ * The routines that increment/decrement the
+ * per-thread counter should err on the side of
+ * incrementing any time a preemption is possible
+ * and the lock would be visible to the rest of the
+ * system as held (so it should be incremented before
+ * interlocks are dropped/preemption is enabled, or
+ * before a CAS is executed to acquire the lock).
+ *
+ */
+
+/*
+ * lck_rw_clear_promotion: Undo priority promotions when the last RW
+ * lock is released by a thread (if a promotion was active)
+ */
+void
+lck_rw_clear_promotion(thread_t thread, uintptr_t trace_obj)
+{
+ assert(thread->rwlock_count == 0);
+
+ /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
+ spl_t s = splsched();
+ thread_lock(thread);
+
+ if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
+ sched_thread_unpromote_reason(thread, TH_SFLAG_RW_PROMOTED, trace_obj);
+ }
+
+ thread_unlock(thread);
+ splx(s);
+}
+
+/*
+ * Callout from context switch if the thread goes
+ * off core with a positive rwlock_count
+ *
+ * Called at splsched with the thread locked
+ */
+void
+lck_rw_set_promotion_locked(thread_t thread)
+{
+ if (LcksOpts & disLkRWPrio) {
+ return;
+ }
+
+ assert(thread->rwlock_count > 0);
+
+ if (!(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
+ sched_thread_promote_reason(thread, TH_SFLAG_RW_PROMOTED, 0);
+ }
+}
+
+kern_return_t
+host_lockgroup_info(
+ host_t host,
+ lockgroup_info_array_t *lockgroup_infop,
+ mach_msg_type_number_t *lockgroup_infoCntp)
+{
+ lockgroup_info_t *lockgroup_info_base;
+ lockgroup_info_t *lockgroup_info;
+ vm_offset_t lockgroup_info_addr;
+ vm_size_t lockgroup_info_size;
+ vm_size_t lockgroup_info_vmsize;
+ lck_grp_t *lck_grp;
+ unsigned int i;
+ vm_map_copy_t copy;
+ kern_return_t kr;
+
+ if (host == HOST_NULL) {
+ return KERN_INVALID_HOST;
+ }
+
+ lck_mtx_lock(&lck_grp_lock);
+
+ lockgroup_info_size = lck_grp_cnt * sizeof(*lockgroup_info);
+ lockgroup_info_vmsize = round_page(lockgroup_info_size);
+ kr = kmem_alloc_pageable(ipc_kernel_map,
+ &lockgroup_info_addr, lockgroup_info_vmsize, VM_KERN_MEMORY_IPC);
+ if (kr != KERN_SUCCESS) {
+ lck_mtx_unlock(&lck_grp_lock);
+ return kr;
+ }
+
+ lockgroup_info_base = (lockgroup_info_t *) lockgroup_info_addr;
+ lck_grp = (lck_grp_t *)queue_first(&lck_grp_queue);
+ lockgroup_info = lockgroup_info_base;
+
+ for (i = 0; i < lck_grp_cnt; i++) {
+ lockgroup_info->lock_spin_cnt = lck_grp->lck_grp_spincnt;
+ lockgroup_info->lock_rw_cnt = lck_grp->lck_grp_rwcnt;
+ lockgroup_info->lock_mtx_cnt = lck_grp->lck_grp_mtxcnt;
+
+#if LOCK_STATS
+ lockgroup_info->lock_spin_held_cnt = lck_grp->lck_grp_stats.lgss_spin_held.lgs_count;
+ lockgroup_info->lock_spin_miss_cnt = lck_grp->lck_grp_stats.lgss_spin_miss.lgs_count;
+#endif /* LOCK_STATS */
+
+ // Historically on x86, held was used for "direct wait" and util for "held"
+ lockgroup_info->lock_mtx_util_cnt = lck_grp->lck_grp_stats.lgss_mtx_held.lgs_count;
+ lockgroup_info->lock_mtx_held_cnt = lck_grp->lck_grp_stats.lgss_mtx_direct_wait.lgs_count;
+ lockgroup_info->lock_mtx_miss_cnt = lck_grp->lck_grp_stats.lgss_mtx_miss.lgs_count;
+ lockgroup_info->lock_mtx_wait_cnt = lck_grp->lck_grp_stats.lgss_mtx_wait.lgs_count;
+
+ (void) strncpy(lockgroup_info->lockgroup_name, lck_grp->lck_grp_name, LOCKGROUP_MAX_NAME);
+
+ lck_grp = (lck_grp_t *)(queue_next((queue_entry_t)(lck_grp)));
+ lockgroup_info++;
+ }
+
+ *lockgroup_infoCntp = lck_grp_cnt;
+ lck_mtx_unlock(&lck_grp_lock);
+
+ if (lockgroup_info_size != lockgroup_info_vmsize) {
+ bzero((char *)lockgroup_info, lockgroup_info_vmsize - lockgroup_info_size);
+ }
+
+ kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)lockgroup_info_addr,
+ (vm_map_size_t)lockgroup_info_size, TRUE, ©);
+ assert(kr == KERN_SUCCESS);
+
+ *lockgroup_infop = (lockgroup_info_t *) copy;
+
+ return KERN_SUCCESS;
+}
+
+/*
+ * sleep_with_inheritor and wakeup_with_inheritor KPI
+ *
+ * Functions that allow to sleep on an event and use turnstile to propagate the priority of the sleeping threads to
+ * the latest thread specified as inheritor.
+ *
+ * The inheritor management is delegated to the caller, the caller needs to store a thread identifier to provide to this functions to specified upon whom
+ * direct the push. The inheritor cannot run in user space while holding a push from an event. Therefore is the caller responsibility to call a
+ * wakeup_with_inheritor from inheritor before running in userspace or specify another inheritor before letting the old inheritor run in userspace.
+ *
+ * sleep_with_inheritor requires to hold a locking primitive while invoked, but wakeup_with_inheritor and change_sleep_inheritor don't require it.
+ *
+ * Turnstile requires a non blocking primitive as interlock to synchronize the turnstile data structure manipulation, threfore sleep_with_inheritor, change_sleep_inheritor and
+ * wakeup_with_inheritor will require the same interlock to manipulate turnstiles.
+ * If sleep_with_inheritor is associated with a locking primitive that can block (like lck_mtx_t or lck_rw_t), an handoff to a non blocking primitive is required before
+ * invoking any turnstile operation.
+ *
+ * All functions will save the turnstile associated with the event on the turnstile kernel hash table and will use the the turnstile kernel hash table bucket
+ * spinlock as the turnstile interlock. Because we do not want to hold interrupt disabled while holding the bucket interlock a new turnstile kernel hash table
+ * is instantiated for this KPI to manage the hash without interrupt disabled.
+ * Also:
+ * - all events on the system that hash on the same bucket will contend on the same spinlock.
+ * - every event will have a dedicated wait_queue.
+ *
+ * Different locking primitives can be associated with sleep_with_inheritor as long as the primitive_lock() and primitive_unlock() functions are provided to
+ * sleep_with_inheritor_turnstile to perform the handoff with the bucket spinlock.
+ */
+
+kern_return_t
+wakeup_with_inheritor_and_turnstile_type(event_t event, turnstile_type_t type, wait_result_t result, bool wake_one, lck_wake_action_t action, thread_t *thread_wokenup)
+{
+ uint32_t index;
+ struct turnstile *ts = NULL;
+ kern_return_t ret = KERN_NOT_WAITING;
+ int priority;
+ thread_t wokeup;
+
+ /*
+ * the hash bucket spinlock is used as turnstile interlock
+ */
+ turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
+
+ ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
+
+ if (wake_one) {
+ if (action == LCK_WAKE_DEFAULT) {
+ priority = WAITQ_PROMOTE_ON_WAKE;
+ } else {
+ assert(action == LCK_WAKE_DO_NOT_TRANSFER_PUSH);
+ priority = WAITQ_ALL_PRIORITIES;
+ }
+
+ /*
+ * WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor
+ * if it finds a thread
+ */
+ wokeup = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(event), result, priority);
+ if (wokeup != NULL) {
+ if (thread_wokenup != NULL) {
+ *thread_wokenup = wokeup;
+ } else {
+ thread_deallocate_safe(wokeup);
+ }
+ ret = KERN_SUCCESS;
+ if (action == LCK_WAKE_DO_NOT_TRANSFER_PUSH) {
+ goto complete;
+ }
+ } else {
+ if (thread_wokenup != NULL) {
+ *thread_wokenup = NULL;