/*
- * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
+ *
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
- *
+ *
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
+ *
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
- *
+ *
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* @OSF_COPYRIGHT@
*/
-/*
+/*
* Mach Operating System
* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
* All Rights Reserved.
- *
+ *
* Permission to use, copy, modify and distribute this software and its
* documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
- *
+ *
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
+ *
* Carnegie Mellon requests users of this software to return to
- *
+ *
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
- *
+ *
* any improvements or extensions that they make and grant Carnegie Mellon
* the rights to redistribute these changes.
*/
* Locking primitives implementation
*/
+#define LOCK_PRIVATE 1
+
#include <mach_ldebug.h>
+#include <kern/lock_stat.h>
#include <kern/locks.h>
#include <kern/kalloc.h>
#include <kern/misc_protos.h>
#include <kern/cpu_data.h>
#include <kern/cpu_number.h>
#include <kern/sched_prim.h>
-#include <kern/xpr.h>
#include <kern/debug.h>
#include <string.h>
#include <i386/machine_routines.h> /* machine_timeout_suspended() */
+#include <machine/atomic.h>
#include <machine/machine_cpu.h>
#include <i386/mp.h>
-
+#include <machine/atomic.h>
#include <sys/kdebug.h>
-#include <mach/branch_predicates.h>
+#include <i386/locks_i386_inlines.h>
-/*
- * We need only enough declarations from the BSD-side to be able to
- * test if our probe is active, and to call __dtrace_probe(). Setting
- * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
- */
#if CONFIG_DTRACE
-#define NEED_DTRACE_DEFS
-#include <../bsd/sys/lockstat.h>
-#endif
+#define DTRACE_RW_SHARED 0x0 //reader
+#define DTRACE_RW_EXCL 0x1 //writer
+#define DTRACE_NO_FLAG 0x0 //not applicable
+#endif /* CONFIG_DTRACE */
#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
#define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
unsigned int LcksOpts=0;
+#if DEVELOPMENT || DEBUG
+unsigned int LckDisablePreemptCheck = 0;
+#endif
+
/* Forwards */
#if USLOCK_DEBUG
*/
int uslock_check = 1;
int max_lock_loops = 100000000;
-decl_simple_lock_data(extern , printf_lock)
-decl_simple_lock_data(extern , panic_lock)
+decl_simple_lock_data(extern , printf_lock);
+decl_simple_lock_data(extern , panic_lock);
#endif /* USLOCK_DEBUG */
extern unsigned int not_in_kdp;
#endif /* lint */
#endif /* USLOCK_DEBUG */
+/*
+ * atomic exchange API is a low level abstraction of the operations
+ * to atomically read, modify, and write a pointer. This abstraction works
+ * for both Intel and ARMv8.1 compare and exchange atomic instructions as
+ * well as the ARM exclusive instructions.
+ *
+ * atomic_exchange_begin() - begin exchange and retrieve current value
+ * atomic_exchange_complete() - conclude an exchange
+ * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
+ */
+static uint32_t
+atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
+{
+ uint32_t val;
+
+ (void)ord; // Memory order not used
+ val = os_atomic_load(target, relaxed);
+ *previous = val;
+ return val;
+}
+
+static boolean_t
+atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
+{
+ return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
+}
+
+static void
+atomic_exchange_abort(void) { }
+
+static boolean_t
+atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
+{
+ uint32_t value, prev;
+
+ for ( ; ; ) {
+ value = atomic_exchange_begin32(target, &prev, ord);
+ if (value & test_mask) {
+ if (wait)
+ cpu_pause();
+ else
+ atomic_exchange_abort();
+ return FALSE;
+ }
+ value |= set_mask;
+ if (atomic_exchange_complete32(target, prev, value, ord))
+ return TRUE;
+ }
+}
+
+inline boolean_t
+hw_atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
+{
+ return atomic_test_and_set32(target, test_mask, set_mask, ord, wait);
+}
/*
* Portable lock package implementation of usimple_locks.
#define USLDBG(stmt)
#endif /* USLOCK_DEBUG */
-
-extern int lck_rw_grab_want(lck_rw_t *lck);
-extern int lck_rw_grab_shared(lck_rw_t *lck);
-extern int lck_rw_held_read_or_upgrade(lck_rw_t *lck);
-
-
/*
* Forward definitions
*/
-void lck_rw_lock_shared_gen(
- lck_rw_t *lck);
-
-void lck_rw_lock_exclusive_gen(
- lck_rw_t *lck);
-
-boolean_t lck_rw_lock_shared_to_exclusive_success(
- lck_rw_t *lck);
-
-boolean_t lck_rw_lock_shared_to_exclusive_failure(
- lck_rw_t *lck,
- int prior_lock_state);
-
-void lck_rw_lock_exclusive_to_shared_gen(
- lck_rw_t *lck,
- int prior_lock_state);
-
-lck_rw_type_t lck_rw_done_gen(
- lck_rw_t *lck,
- int prior_lock_state);
-
+static void lck_rw_lock_shared_gen(lck_rw_t *lck);
+static void lck_rw_lock_exclusive_gen(lck_rw_t *lck);
+static boolean_t lck_rw_lock_shared_to_exclusive_success(lck_rw_t *lck);
+static boolean_t lck_rw_lock_shared_to_exclusive_failure(lck_rw_t *lck, uint32_t prior_lock_state);
+static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t *lck, uint32_t prior_lock_state);
+static lck_rw_type_t lck_rw_done_gen(lck_rw_t *lck, uint32_t prior_lock_state);
void lck_rw_clear_promotions_x86(thread_t thread);
+static boolean_t lck_rw_held_read_or_upgrade(lck_rw_t *lock);
+static boolean_t lck_rw_grab_want(lck_rw_t *lock);
+static boolean_t lck_rw_grab_shared(lck_rw_t *lock);
+static void lck_mtx_unlock_wakeup_tail(lck_mtx_t *mutex, uint32_t state, boolean_t indirect);
+static void lck_mtx_interlock_lock(lck_mtx_t *mutex, uint32_t *new_state);
+static void lck_mtx_interlock_lock_clear_flags(lck_mtx_t *mutex, uint32_t and_flags, uint32_t *new_state);
+static int lck_mtx_interlock_try_lock(lck_mtx_t *mutex, uint32_t *new_state);
+static int lck_mtx_interlock_try_lock_set_flags(lck_mtx_t *mutex, uint32_t or_flags, uint32_t *new_state);
+static boolean_t lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
+static boolean_t lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
+
/*
* Routine: lck_spin_alloc_init
__unused lck_attr_t *attr)
{
usimple_lock_init((usimple_lock_t) lck, 0);
- lck_grp_reference(grp);
- lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
+ if (grp) {
+ lck_grp_reference(grp);
+ lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
+ }
}
/*
if (lck->interlock == LCK_SPIN_TAG_DESTROYED)
return;
lck->interlock = LCK_SPIN_TAG_DESTROYED;
- lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
- lck_grp_deallocate(grp);
+ if (grp) {
+ lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
+ lck_grp_deallocate(grp);
+ }
return;
}
/*
* Routine: lck_spin_lock
*/
+void
+lck_spin_lock_grp(
+ lck_spin_t *lck,
+ lck_grp_t *grp)
+{
+#pragma unused(grp)
+ usimple_lock((usimple_lock_t) lck, grp);
+}
+
void
lck_spin_lock(
lck_spin_t *lck)
{
- usimple_lock((usimple_lock_t) lck);
+ usimple_lock((usimple_lock_t) lck, NULL);
}
/*
usimple_unlock((usimple_lock_t) lck);
}
+boolean_t
+lck_spin_try_lock_grp(
+ lck_spin_t *lck,
+ lck_grp_t *grp)
+{
+#pragma unused(grp)
+ boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, grp);
+#if DEVELOPMENT || DEBUG
+ if (lrval) {
+ pltrace(FALSE);
+ }
+#endif
+ return(lrval);
+}
+
/*
* Routine: lck_spin_try_lock
lck_spin_try_lock(
lck_spin_t *lck)
{
- boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck);
+ boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, LCK_GRP_NULL);
#if DEVELOPMENT || DEBUG
if (lrval) {
pltrace(FALSE);
if (__improbable(holder != THREAD_NULL)) {
if (holder == thread) {
panic("Lock owned by current thread %p = %lx", lock, state);
- } else {
- panic("Lock %p owned by thread %p", lock, holder);
}
}
}
volatile usimple_lock_t spinlock_timed_out;
uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
- uint64_t deadline;
uint32_t i;
for (i = 0; i < real_ncpus; i++) {
- if ((uintptr_t)cpu_data_ptr[i]->cpu_active_thread == thread_addr) {
+ if ((cpu_data_ptr[i] != NULL) && ((uintptr_t)cpu_data_ptr[i]->cpu_active_thread == thread_addr)) {
spinlock_owner_cpu = i;
- if ((uint32_t) cpu_number() == i)
- break;
- cpu_datap(i)->cpu_NMI_acknowledged = FALSE;
- cpu_NMI_interrupt(i);
- deadline = mach_absolute_time() + (LockTimeOut * 2);
- while (mach_absolute_time() < deadline && cpu_datap(i)->cpu_NMI_acknowledged == FALSE)
- cpu_pause();
+ if ((uint32_t) cpu_number() != i) {
+ /* Cause NMI and panic on the owner's cpu */
+ NMIPI_panic(cpu_to_cpumask(i), SPINLOCK_TIMEOUT);
+ }
break;
}
}
* maintaining preemption state.
*/
void
-usimple_lock(
- usimple_lock_t l)
+(usimple_lock)(
+ usimple_lock_t l
+ LCK_GRP_ARG(lck_grp_t *grp))
{
#ifndef MACHINE_SIMPLE_LOCK
DECL_PC(pc);
OBTAIN_PC(pc);
USLDBG(usld_lock_pre(l, pc));
- if(__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC) == 0)) {
+ if(__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC, grp) == 0)) {
boolean_t uslock_acquired = FALSE;
while (machine_timeout_suspended()) {
enable_preemption();
- if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC)))
+ if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC, grp)))
break;
}
uintptr_t lowner = (uintptr_t)l->interlock.lock_data;
spinlock_timed_out = l;
lock_cpu = spinlock_timeout_NMI(lowner);
- panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx", l, lowner, current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data);
+ panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx, time: %llu",
+ l, lowner, current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data, mach_absolute_time());
}
}
#if DEVELOPMENT || DEBUG
USLDBG(usld_lock_post(l, pc));
#else
- simple_lock((simple_lock_t)l);
+ simple_lock((simple_lock_t)l, grp);
+#endif
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0, (uintptr_t)LCK_GRP_PROBEARG(grp));
#endif
}
*/
unsigned int
usimple_lock_try(
- usimple_lock_t l)
+ usimple_lock_t l,
+ lck_grp_t *grp)
{
#ifndef MACHINE_SIMPLE_LOCK
unsigned int success;
OBTAIN_PC(pc);
USLDBG(usld_lock_try_pre(l, pc));
- if ((success = hw_lock_try(&l->interlock))) {
+ if ((success = hw_lock_try(&l->interlock, grp))) {
#if DEVELOPMENT || DEBUG
pltrace(FALSE);
#endif
}
return success;
#else
- return(simple_lock_try((simple_lock_t)l));
+ return(simple_lock_try((simple_lock_t)l, grp));
#endif
}
/*
- * Acquire a usimple_lock while polling for pending TLB flushes
+ * Acquire a usimple_lock while polling for pending cpu signals
* and spinning on a lock.
*
*/
-void
-usimple_lock_try_lock_loop(usimple_lock_t l)
+unsigned int
+(usimple_lock_try_lock_mp_signal_safe_loop_deadline)(usimple_lock_t l,
+ uint64_t deadline
+ LCK_GRP_ARG(lck_grp_t *grp))
{
boolean_t istate = ml_get_interrupts_enabled();
- while (!simple_lock_try((l))) {
+
+ if (deadline < mach_absolute_time()) {
+ return 0;
+ }
+
+ while (!simple_lock_try(l, grp)) {
if (!istate)
- handle_pending_TLB_flushes();
+ cpu_signal_handler(NULL);
+
+ if (deadline < mach_absolute_time()) {
+ return 0;
+ }
+
cpu_pause();
}
+
+ return 1;
+}
+
+void
+(usimple_lock_try_lock_loop)(usimple_lock_t l
+ LCK_GRP_ARG(lck_grp_t *grp))
+{
+ usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, ULLONG_MAX, grp);
+}
+
+unsigned int
+(usimple_lock_try_lock_mp_signal_safe_loop_duration)(usimple_lock_t l,
+ uint64_t duration
+ LCK_GRP_ARG(lck_grp_t *grp))
+{
+ uint64_t deadline;
+ uint64_t base_at = mach_absolute_time();
+ uint64_t duration_at;
+
+ nanoseconds_to_absolutetime(duration, &duration_at);
+ deadline = base_at + duration_at;
+ if (deadline < base_at) {
+ /* deadline has overflowed, make it saturate */
+ deadline = ULLONG_MAX;
+ }
+
+ return usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, deadline, grp);
}
#if USLOCK_DEBUG
#define USLOCK_CHECKING(l) (uslock_check && \
((l)->debug.state & USLOCK_CHECKED))
-/*
- * Trace activities of a particularly interesting lock.
- */
-void usl_trace(usimple_lock_t, int, pc_t, const char *);
-
-
/*
* Initialize the debugging information contained
* in a usimple_lock.
panic("%s", caller);
}
mp_disable_preemption();
- usl_trace(l, cpu_number(), pc, caller);
mp_enable_preemption();
}
l->debug.state |= USLOCK_TAKEN;
l->debug.lock_pc = pc;
l->debug.lock_cpu = mycpu;
-
- usl_trace(l, mycpu, pc, caller);
}
printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
panic("%s", caller);
}
- usl_trace(l, mycpu, pc, caller);
l->debug.unlock_thread = l->debug.lock_thread;
l->debug.lock_thread = INVALID_PC;
void
usld_lock_try_pre(
usimple_lock_t l,
- pc_t pc)
+ __unused pc_t pc)
{
char caller[] = "usimple_lock_try";
if (!usld_lock_common_checks(l, caller))
return;
- mp_disable_preemption();
- usl_trace(l, cpu_number(), pc, caller);
- mp_enable_preemption();
}
l->debug.state |= USLOCK_TAKEN;
l->debug.lock_pc = pc;
l->debug.lock_cpu = mycpu;
-
- usl_trace(l, mycpu, pc, caller);
-}
-
-
-/*
- * For very special cases, set traced_lock to point to a
- * specific lock of interest. The result is a series of
- * XPRs showing lock operations on that lock. The lock_seq
- * value is used to show the order of those operations.
- */
-usimple_lock_t traced_lock;
-unsigned int lock_seq;
-
-void
-usl_trace(
- usimple_lock_t l,
- int mycpu,
- pc_t pc,
- const char * op_name)
-{
- if (traced_lock == l) {
- XPR(XPR_SLOCK,
- "seq %d, cpu %d, %s @ %x\n",
- (uintptr_t) lock_seq, (uintptr_t) mycpu,
- (uintptr_t) op_name, (uintptr_t) pc, 0);
- lock_seq++;
- }
}
-
-
#endif /* USLOCK_DEBUG */
/*
#define DECREMENTER_TIMEOUT 1000000
-#define RW_LOCK_READER_EVENT(x) \
- ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_tag))))
-
-#define RW_LOCK_WRITER_EVENT(x) \
- ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_pad8))))
-
/*
* We disable interrupts while holding the RW interlock to prevent an
* interrupt from exacerbating hold time.
* Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
*/
-static boolean_t
+static inline boolean_t
lck_interlock_lock(lck_rw_t *lck)
{
boolean_t istate;
- istate = ml_set_interrupts_enabled(FALSE);
+ istate = ml_set_interrupts_enabled(FALSE);
hw_lock_byte_lock(&lck->lck_rw_interlock);
-
return istate;
}
-static void
+static inline void
lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
-{
+{
hw_lock_byte_unlock(&lck->lck_rw_interlock);
ml_set_interrupts_enabled(istate);
}
cpu_pause();
}
+static inline boolean_t
+lck_rw_held_read_or_upgrade(lck_rw_t *lock)
+{
+ if (ordered_load(&lock->data) & (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE))
+ return TRUE;
+ return FALSE;
+}
/*
* compute the deadline to spin against when
}
+/*
+ * Spin while interlock is held.
+ */
+
+static inline void
+lck_rw_interlock_spin(lck_rw_t *lock)
+{
+ while (ordered_load(&lock->data) & LCK_RW_INTERLOCK) {
+ cpu_pause();
+ }
+}
+
+static boolean_t
+lck_rw_grab_want(lck_rw_t *lock)
+{
+ uint32_t data, prev;
+
+ for ( ; ; ) {
+ data = atomic_exchange_begin32(&lock->data, &prev, memory_order_relaxed);
+ if ((data & LCK_RW_INTERLOCK) == 0)
+ break;
+ atomic_exchange_abort();
+ lck_rw_interlock_spin(lock);
+ }
+ if (data & LCK_RW_WANT_WRITE) {
+ atomic_exchange_abort();
+ return FALSE;
+ }
+ data |= LCK_RW_WANT_WRITE;
+ return atomic_exchange_complete32(&lock->data, prev, data, memory_order_relaxed);
+}
+
+static boolean_t
+lck_rw_grab_shared(lck_rw_t *lock)
+{
+ uint32_t data, prev;
+
+ for ( ; ; ) {
+ data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
+ if ((data & LCK_RW_INTERLOCK) == 0)
+ break;
+ atomic_exchange_abort();
+ lck_rw_interlock_spin(lock);
+ }
+ if (data & (LCK_RW_WANT_WRITE | LCK_RW_WANT_UPGRADE)) {
+ if (((data & LCK_RW_SHARED_MASK) == 0) || (data & LCK_RW_PRIV_EXCL)) {
+ atomic_exchange_abort();
+ return FALSE;
+ }
+ }
+ data += LCK_RW_SHARED_READER;
+ return atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp);
+}
+
/*
* Routine: lck_rw_lock_exclusive
*/
-void
+static void
lck_rw_lock_exclusive_gen(
lck_rw_t *lck)
{
- __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
+ __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
uint64_t deadline = 0;
int slept = 0;
int gotlock = 0;
deadline = lck_rw_deadline_for_spin(lck);
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
-
+
while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline)
lck_rw_lock_pause(istate);
lck->lck_w_waiting = TRUE;
- res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
+ thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
+ res = assert_wait(RW_LOCK_WRITER_EVENT(lck),
+ THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
lck_interlock_unlock(lck, istate);
if (res == THREAD_WAITING) {
lck->lck_w_waiting = TRUE;
- res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
+ thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
+ res = assert_wait(RW_LOCK_WRITER_EVENT(lck),
+ THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
lck_interlock_unlock(lck, istate);
if (res == THREAD_WAITING) {
*/
if (dtrace_ls_enabled == TRUE) {
if (slept == 0) {
- LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
+ LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
mach_absolute_time() - wait_interval, 1);
} else {
/*
* Notice that above we recorded this before we dropped
* the interlock so the count is accurate.
*/
- LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
+ LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
mach_absolute_time() - wait_interval, 1,
(readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
}
#endif
}
+/*
+ * Routine: lck_rw_done
+ */
+
+lck_rw_type_t lck_rw_done(lck_rw_t *lock)
+{
+ uint32_t data, prev;
+
+ for ( ; ; ) {
+ data = atomic_exchange_begin32(&lock->data, &prev, memory_order_release_smp);
+ if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
+ atomic_exchange_abort();
+ lck_rw_interlock_spin(lock);
+ continue;
+ }
+ if (data & LCK_RW_SHARED_MASK) {
+ data -= LCK_RW_SHARED_READER;
+ if ((data & LCK_RW_SHARED_MASK) == 0) /* if reader count has now gone to 0, check for waiters */
+ goto check_waiters;
+ } else { /* if reader count == 0, must be exclusive lock */
+ if (data & LCK_RW_WANT_UPGRADE) {
+ data &= ~(LCK_RW_WANT_UPGRADE);
+ } else {
+ if (data & LCK_RW_WANT_WRITE)
+ data &= ~(LCK_RW_WANT_EXCL);
+ else /* lock is not 'owned', panic */
+ panic("Releasing non-exclusive RW lock without a reader refcount!");
+ }
+check_waiters:
+ if (prev & LCK_RW_W_WAITING) {
+ data &= ~(LCK_RW_W_WAITING);
+ if ((prev & LCK_RW_PRIV_EXCL) == 0)
+ data &= ~(LCK_RW_R_WAITING);
+ } else
+ data &= ~(LCK_RW_R_WAITING);
+ }
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_release_smp))
+ break;
+ cpu_pause();
+ }
+ return lck_rw_done_gen(lock, prev);
+}
/*
* Routine: lck_rw_done_gen
*
- * called from the assembly language wrapper...
+ * called from lck_rw_done()
* prior_lock_state is the value in the 1st
* word of the lock at the time of a successful
* atomic compare and exchange with the new value...
* this by examining the state of the lock before
* we changed it
*/
-lck_rw_type_t
+static lck_rw_type_t
lck_rw_done_gen(
- lck_rw_t *lck,
- int prior_lock_state)
+ lck_rw_t *lck,
+ uint32_t prior_lock_state)
{
- lck_rw_t *fake_lck;
- lck_rw_type_t lock_type;
- thread_t thread;
- uint32_t rwlock_count;
+ lck_rw_t *fake_lck;
+ lck_rw_type_t lock_type;
+ thread_t thread;
+ uint32_t rwlock_count;
- /*
- * prior_lock state is a snapshot of the 1st word of the
- * lock in question... we'll fake up a pointer to it
- * and carefully not access anything beyond whats defined
- * in the first word of a lck_rw_t
- */
+ thread = current_thread();
+ rwlock_count = thread->rwlock_count--;
fake_lck = (lck_rw_t *)&prior_lock_state;
- if (fake_lck->lck_rw_shared_count <= 1) {
- if (fake_lck->lck_w_waiting)
- thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
+ if (lck->lck_rw_can_sleep) {
+ /*
+ * prior_lock state is a snapshot of the 1st word of the
+ * lock in question... we'll fake up a pointer to it
+ * and carefully not access anything beyond whats defined
+ * in the first word of a lck_rw_t
+ */
- if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
- thread_wakeup(RW_LOCK_READER_EVENT(lck));
- }
- if (fake_lck->lck_rw_shared_count)
- lock_type = LCK_RW_TYPE_SHARED;
- else
- lock_type = LCK_RW_TYPE_EXCLUSIVE;
+ if (fake_lck->lck_rw_shared_count <= 1) {
+ if (fake_lck->lck_w_waiting) {
+ thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
+ }
- /* Check if dropping the lock means that we need to unpromote */
- thread = current_thread();
- rwlock_count = thread->rwlock_count--;
+ if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting) {
+ thread_wakeup(RW_LOCK_READER_EVENT(lck));
+ }
+ }
#if MACH_LDEBUG
- if (rwlock_count == 0) {
- panic("rw lock count underflow for thread %p", thread);
- }
+ if (rwlock_count == 0) {
+ panic("rw lock count underflow for thread %p", thread);
+ }
#endif
- if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
- /* sched_flags checked without lock, but will be rechecked while clearing */
- lck_rw_clear_promotion(thread);
+ /* Check if dropping the lock means that we need to unpromote */
+
+ if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
+ /* sched_flags checked without lock, but will be rechecked while clearing */
+ lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
+ }
+ }
+ if (fake_lck->lck_rw_shared_count) {
+ lock_type = LCK_RW_TYPE_SHARED;
+ } else {
+ lock_type = LCK_RW_TYPE_EXCLUSIVE;
}
#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
#endif
- return(lock_type);
+ return lock_type;
}
{
lck_rw_type_t ret;
+ assertf(lck->lck_rw_shared_count > 0, "lck %p has shared_count=0x%x", lck, lck->lck_rw_shared_count);
ret = lck_rw_done(lck);
if (ret != LCK_RW_TYPE_SHARED)
panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type);
}
+/*
+ * Routine: lck_rw_lock_shared
+ */
+void
+lck_rw_lock_shared(lck_rw_t *lock)
+{
+ uint32_t data, prev;
+
+ current_thread()->rwlock_count++;
+ for ( ; ; ) {
+ data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
+ if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
+ atomic_exchange_abort();
+ if (lock->lck_rw_can_sleep) {
+ lck_rw_lock_shared_gen(lock);
+ } else {
+ cpu_pause();
+ continue;
+ }
+ break;
+ }
+ data += LCK_RW_SHARED_READER;
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+ break;
+ cpu_pause();
+ }
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
+#endif /* CONFIG_DTRACE */
+ return;
+}
/*
* Routine: lck_rw_lock_shared_gen
* is held exclusively... this is where we spin/block
* until we can acquire the lock in the shared mode
*/
-void
+static void
lck_rw_lock_shared_gen(
lck_rw_t *lck)
{
- __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
+ __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
uint64_t deadline = 0;
int gotlock = 0;
int slept = 0;
lck->lck_r_waiting = TRUE;
- res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT);
+ thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
+ res = assert_wait(RW_LOCK_READER_EVENT(lck),
+ THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
lck_interlock_unlock(lck, istate);
if (res == THREAD_WAITING) {
#if CONFIG_DTRACE
if (dtrace_ls_enabled == TRUE) {
if (slept == 0) {
- LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
+ LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
} else {
- LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
+ LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
mach_absolute_time() - wait_interval, 0,
(readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
}
/*
- * Routine: lck_rw_lock_shared_to_exclusive_failure
- * Function:
- * assembly fast path code has already dropped our read
- * count and determined that someone else owns 'lck_rw_want_upgrade'
- * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
- * all we need to do here is determine if a wakeup is needed
+ * Routine: lck_rw_lock_exclusive
*/
-boolean_t
-lck_rw_lock_shared_to_exclusive_failure(
- lck_rw_t *lck,
- int prior_lock_state)
+
+void
+lck_rw_lock_exclusive(lck_rw_t *lock)
{
- lck_rw_t *fake_lck;
- thread_t thread = current_thread();
- uint32_t rwlock_count;
+ current_thread()->rwlock_count++;
+ if (atomic_test_and_set32(&lock->data,
+ (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
+ LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
+#endif /* CONFIG_DTRACE */
+ } else
+ lck_rw_lock_exclusive_gen(lock);
+}
- /* Check if dropping the lock means that we need to unpromote */
- rwlock_count = thread->rwlock_count--;
-#if MACH_LDEBUG
- if (rwlock_count == 0) {
- panic("rw lock count underflow for thread %p", thread);
- }
-#endif
- if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
- /* sched_flags checked without lock, but will be rechecked while clearing */
- lck_rw_clear_promotion(thread);
+
+/*
+ * Routine: lck_rw_lock_shared_to_exclusive
+ *
+ * False returned upon failure, in this case the shared lock is dropped.
+ */
+
+boolean_t
+lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
+{
+ uint32_t data, prev;
+
+ for ( ; ; ) {
+ data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
+ if (data & LCK_RW_INTERLOCK) {
+ atomic_exchange_abort();
+ lck_rw_interlock_spin(lock);
+ continue;
+ }
+ if (data & LCK_RW_WANT_UPGRADE) {
+ data -= LCK_RW_SHARED_READER;
+ if ((data & LCK_RW_SHARED_MASK) == 0) /* we were the last reader */
+ data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+ return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
+ } else {
+ data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
+ data -= LCK_RW_SHARED_READER; /* and shed our read count */
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+ break;
+ }
+ cpu_pause();
}
+ /* we now own the WANT_UPGRADE */
+ if (data & LCK_RW_SHARED_MASK) /* check to see if all of the readers are drained */
+ lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
+#endif
+ return TRUE;
+}
- /*
- * prior_lock state is a snapshot of the 1st word of the
- * lock in question... we'll fake up a pointer to it
- * and carefully not access anything beyond whats defined
- * in the first word of a lck_rw_t
- */
+
+/*
+ * Routine: lck_rw_lock_shared_to_exclusive_failure
+ * Function:
+ * assembly fast path code has already dropped our read
+ * count and determined that someone else owns 'lck_rw_want_upgrade'
+ * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
+ * all we need to do here is determine if a wakeup is needed
+ */
+static boolean_t
+lck_rw_lock_shared_to_exclusive_failure(
+ lck_rw_t *lck,
+ uint32_t prior_lock_state)
+{
+ lck_rw_t *fake_lck;
+ thread_t thread = current_thread();
+ uint32_t rwlock_count;
+
+ /* Check if dropping the lock means that we need to unpromote */
+ rwlock_count = thread->rwlock_count--;
+#if MACH_LDEBUG
+ if (rwlock_count == 0) {
+ panic("rw lock count underflow for thread %p", thread);
+ }
+#endif
fake_lck = (lck_rw_t *)&prior_lock_state;
if (fake_lck->lck_w_waiting && fake_lck->lck_rw_shared_count == 1) {
*/
thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
}
+
+ if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
+ /* sched_flags checked without lock, but will be rechecked while clearing */
+ lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
+ }
+
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
* we just need to wait for the rest of the readers to drain
* and then we can return as the exclusive holder of this lock
*/
-boolean_t
+static boolean_t
lck_rw_lock_shared_to_exclusive_success(
lck_rw_t *lck)
{
- __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
+ __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
uint64_t deadline = 0;
int slept = 0;
int still_shared = 0;
* check to see if we're allowed to do a thread_block
*/
if (lck->lck_rw_can_sleep) {
-
+
istate = lck_interlock_lock(lck);
-
+
if (lck->lck_rw_shared_count != 0) {
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
lck->lck_w_waiting = TRUE;
- res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
+ thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
+ res = assert_wait(RW_LOCK_WRITER_EVENT(lck),
+ THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
lck_interlock_unlock(lck, istate);
if (res == THREAD_WAITING) {
*/
if (dtrace_ls_enabled == TRUE) {
if (slept == 0) {
- LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
+ LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
} else {
- LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck,
+ LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck,
mach_absolute_time() - wait_interval, 1,
(readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
}
return (TRUE);
}
+/*
+ * Routine: lck_rw_lock_exclusive_to_shared
+ */
+
+void lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
+{
+ uint32_t data, prev;
+
+ for ( ; ; ) {
+ data = atomic_exchange_begin32(&lock->data, &prev, memory_order_release_smp);
+ if (data & LCK_RW_INTERLOCK) {
+ atomic_exchange_abort();
+ lck_rw_interlock_spin(lock); /* wait for interlock to clear */
+ continue;
+ }
+ data += LCK_RW_SHARED_READER;
+ if (data & LCK_RW_WANT_UPGRADE)
+ data &= ~(LCK_RW_WANT_UPGRADE);
+ else
+ data &= ~(LCK_RW_WANT_EXCL);
+ if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL)))
+ data &= ~(LCK_RW_W_WAITING);
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_release_smp))
+ break;
+ cpu_pause();
+ }
+ return lck_rw_lock_exclusive_to_shared_gen(lock, prev);
+}
+
/*
- * Routine: lck_rw_lock_exclusive_to_shared
+ * Routine: lck_rw_lock_exclusive_to_shared_gen
* Function:
* assembly fast path has already dropped
* our exclusive state and bumped lck_rw_shared_count
* all we need to do here is determine if anyone
* needs to be awakened.
*/
-void
+static void
lck_rw_lock_exclusive_to_shared_gen(
lck_rw_t *lck,
- int prior_lock_state)
+ uint32_t prior_lock_state)
{
- __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
+ __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
lck_rw_t *fake_lck;
- /*
- * prior_lock state is a snapshot of the 1st word of the
- * lock in question... we'll fake up a pointer to it
- * and carefully not access anything beyond whats defined
- * in the first word of a lck_rw_t
- */
fake_lck = (lck_rw_t *)&prior_lock_state;
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
return(FALSE);
}
+/*
+ * Routine: lck_rw_try_lock_shared
+ */
+
+boolean_t lck_rw_try_lock_shared(lck_rw_t *lock)
+{
+ uint32_t data, prev;
+
+ for ( ; ; ) {
+ data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
+ if (data & LCK_RW_INTERLOCK) {
+ atomic_exchange_abort();
+ lck_rw_interlock_spin(lock);
+ continue;
+ }
+ if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
+ atomic_exchange_abort();
+ return FALSE; /* lock is busy */
+ }
+ data += LCK_RW_SHARED_READER; /* Increment reader refcount */
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+ break;
+ cpu_pause();
+ }
+ current_thread()->rwlock_count++;
+ /* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
+#endif /* CONFIG_DTRACE */
+ return TRUE;
+}
+
+
+/*
+ * Routine: lck_rw_try_lock_exclusive
+ */
+
+boolean_t lck_rw_try_lock_exclusive(lck_rw_t *lock)
+{
+ uint32_t data, prev;
+
+ for ( ; ; ) {
+ data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
+ if (data & LCK_RW_INTERLOCK) {
+ atomic_exchange_abort();
+ lck_rw_interlock_spin(lock);
+ continue;
+ }
+ if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
+ atomic_exchange_abort();
+ return FALSE; /* can't get it */
+ }
+ data |= LCK_RW_WANT_EXCL;
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+ break;
+ cpu_pause();
+ }
+
+ current_thread()->rwlock_count++;
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
+#endif /* CONFIG_DTRACE */
+ return TRUE;
+}
+
void
lck_rw_assert(
}
/* On return to userspace, this routine is called if the rwlock_count is somehow imbalanced */
+#if MACH_LDEBUG
+__dead2
+#endif
void
lck_rw_clear_promotions_x86(thread_t thread)
{
#else
/* Paper over the issue */
thread->rwlock_count = 0;
- lck_rw_clear_promotion(thread);
+ lck_rw_clear_promotion(thread, 0);
#endif
}
+boolean_t
+lck_rw_lock_yield_shared(lck_rw_t *lck, boolean_t force_yield)
+{
+ lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
+
+ if (lck->lck_rw_want_write || lck->lck_rw_want_upgrade || force_yield) {
+ lck_rw_unlock_shared(lck);
+ mutex_pause(2);
+ lck_rw_lock_shared(lck);
+ return TRUE;
+ }
+
+ return FALSE;
+}
/*
* Routine: kdp_lck_rw_lock_is_acquired_exclusive
return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_write) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
}
+/*
+ * Slow path routines for lck_mtx locking and unlocking functions.
+ *
+ * These functions were previously implemented in x86 assembly,
+ * and some optimizations are in place in this c code to obtain a compiled code
+ * as performant and compact as the assembly version.
+ *
+ * To avoid to inline these functions on the fast path, all functions directly called by
+ * the fast paths have the __attribute__((noinline)) specified. Also they are all implemented
+ * in such a way the fast path can tail call into them. In this way the return address
+ * does not need to be pushed on the caller stack and stack optimization can happen on the caller.
+ *
+ * Slow path code is structured in such a way there are no calls to functions that will return
+ * on the context of the caller function, i.e. all functions called are or tail call functions
+ * or inline functions. The number of arguments of the tail call functions are less then six,
+ * so that they can be passed over registers and do not need to be pushed on stack.
+ * This allows the compiler to not create a stack frame for the functions.
+ *
+ * __improbable and __probable are used to compile the slow path code in such a way
+ * the fast path case will be on a sequence of instructions with as less jumps as possible,
+ * to make this case the most optimized even if falling through the slow path.
+ */
+
+/*
+ * Intel lock invariants:
+ *
+ * lck_mtx_waiters: contains the count of threads currently in the mutex waitqueue
+ *
+ * The lock owner is promoted to the max priority of all its waiters only if it
+ * was a lower priority when it acquired or was an owner when a waiter waited.
+ * Max priority is capped at MAXPRI_PROMOTE.
+ *
+ * The last waiter will not be promoted as it is woken up, but the last
+ * lock owner may not have been the last thread to have been woken up depending on the
+ * luck of the draw. Therefore a last-owner may still have the promoted-on-wakeup
+ * flag set.
+ *
+ * TODO: Figure out an algorithm for stopping a lock holder which is already at the right
+ * priority from dropping priority in the future without having to take thread lock
+ * on acquire.
+ */
#ifdef MUTEX_ZONE
extern zone_t lck_mtx_zone;
#endif
+
/*
* Routine: lck_mtx_alloc_init
*/
#else
if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0)
lck_mtx_init(lck, grp, attr);
-#endif
+#endif
return(lck);
}
if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) {
- lck_mtx_ext_init(lck_ext, grp, lck_attr);
+ lck_mtx_ext_init(lck_ext, grp, lck_attr);
lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
lck->lck_mtx_ptr = lck_ext;
}
lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
}
+static void
+lck_mtx_lock_mark_destroyed(
+ lck_mtx_t *mutex,
+ boolean_t indirect)
+{
+ uint32_t state;
+
+ if (indirect) {
+ /* convert to destroyed state */
+ ordered_store_mtx_state_release(mutex, LCK_MTX_TAG_DESTROYED);
+ return;
+ }
+
+ state = ordered_load_mtx_state(mutex);
+ lck_mtx_interlock_lock(mutex, &state);
+
+ ordered_store_mtx_state_release(mutex, LCK_MTX_TAG_DESTROYED);
+
+ enable_preemption();
+}
+
/*
* Routine: lck_mtx_destroy
*/
lck_mtx_t *lck,
lck_grp_t *grp)
{
- boolean_t lck_is_indirect;
-
+ boolean_t indirect;
+
if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
return;
#if MACH_LDEBUG
lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
#endif
- lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
+ indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
- lck_mtx_lock_mark_destroyed(lck);
+ lck_mtx_lock_mark_destroyed(lck, indirect);
- if (lck_is_indirect)
+ if (indirect)
kfree(lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t));
lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
lck_grp_deallocate(grp);
}
-#define LCK_MTX_LCK_WAIT_CODE 0x20
-#define LCK_MTX_LCK_WAKEUP_CODE 0x21
-#define LCK_MTX_LCK_SPIN_CODE 0x22
-#define LCK_MTX_LCK_ACQUIRE_CODE 0x23
-#define LCK_MTX_LCK_DEMOTE_CODE 0x24
+#if DEVELOPMENT | DEBUG
+__attribute__((noinline))
+void
+lck_mtx_owner_check_panic(
+ lck_mtx_t *lock)
+{
+ thread_t owner = (thread_t)lock->lck_mtx_owner;
+ panic("Mutex unlock attempted from non-owner thread. Owner=%p lock=%p", owner, lock);
+}
+#endif
+__attribute__((always_inline))
+static boolean_t
+get_indirect_mutex(
+ lck_mtx_t **lock,
+ uint32_t *state)
+{
+ *lock = &((*lock)->lck_mtx_ptr->lck_mtx);
+ *state = ordered_load_mtx_state(*lock);
+ return TRUE;
+}
/*
- * Routine: lck_mtx_unlock_wakeup_x86
+ * Routine: lck_mtx_unlock_slow
*
- * Invoked on unlock when there is
- * contention (i.e. the assembly routine sees that
- * that mutex->lck_mtx_waiters != 0 or
- * that mutex->lck_mtx_promoted != 0...
+ * Unlocks a mutex held by current thread.
*
- * neither the mutex or interlock is held
+ * It will wake up waiters if necessary.
+ *
+ * Interlock can be held.
*/
+__attribute__((noinline))
void
-lck_mtx_unlock_wakeup_x86 (
- lck_mtx_t *mutex,
- int prior_lock_state)
+lck_mtx_unlock_slow(
+ lck_mtx_t *lock)
{
- __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
- lck_mtx_t fake_lck;
+ thread_t thread;
+ uint32_t state, prev;
+ boolean_t indirect = FALSE;
- /*
- * prior_lock state is a snapshot of the 2nd word of the
- * lock in question... we'll fake up a lock with the bits
- * copied into place and carefully not access anything
- * beyond whats defined in the second word of a lck_mtx_t
- */
- fake_lck.lck_mtx_state = prior_lock_state;
+ state = ordered_load_mtx_state(lock);
- KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
- trace_lck, fake_lck.lck_mtx_promoted, fake_lck.lck_mtx_waiters, fake_lck.lck_mtx_pri, 0);
+ /* Is this an indirect mutex? */
+ if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
+ indirect = get_indirect_mutex(&lock, &state);
+ }
- if (__probable(fake_lck.lck_mtx_waiters)) {
- if (fake_lck.lck_mtx_waiters > 1)
- thread_wakeup_one_with_pri(LCK_MTX_EVENT(mutex), fake_lck.lck_mtx_pri);
- else
- thread_wakeup_one(LCK_MTX_EVENT(mutex));
+ thread = current_thread();
+
+#if DEVELOPMENT | DEBUG
+ thread_t owner = (thread_t)lock->lck_mtx_owner;
+ if(__improbable(owner != thread))
+ lck_mtx_owner_check_panic(lock);
+#endif
+
+ /* check if it is held as a spinlock */
+ if (__improbable((state & LCK_MTX_MLOCKED_MSK) == 0))
+ goto unlock;
+
+ lck_mtx_interlock_lock_clear_flags(lock, LCK_MTX_MLOCKED_MSK, &state);
+
+unlock:
+ /* preemption disabled, interlock held and mutex not held */
+
+ /* clear owner */
+ ordered_store_mtx_owner(lock, 0);
+ /* keep original state in prev for later evaluation */
+ prev = state;
+
+ if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
+#if MACH_LDEBUG
+ if (thread)
+ thread->mutex_count--;
+#endif
+ return lck_mtx_unlock_wakeup_tail(lock, state, indirect);
}
- if (__improbable(fake_lck.lck_mtx_promoted)) {
- thread_t thread = current_thread();
+ /* release interlock, promotion and clear spin flag */
+ state &= (~(LCK_MTX_ILOCKED_MSK | LCK_MTX_SPIN_MSK));
+ ordered_store_mtx_state_release(lock, state); /* since I own the interlock, I don't need an atomic update */
+#if MACH_LDEBUG
+ /* perform lock statistics after drop to prevent delay */
+ if (thread)
+ thread->mutex_count--; /* lock statistic */
+#endif /* MACH_LDEBUG */
- KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_DEMOTE_CODE) | DBG_FUNC_NONE,
- thread_tid(thread), thread->promotions, thread->sched_flags & TH_SFLAG_PROMOTED, 0, 0);
+ /* re-enable preemption */
+ lck_mtx_unlock_finish_inline(lock, FALSE);
- if (thread->promotions > 0) {
- spl_t s = splsched();
+ return;
+}
- thread_lock(thread);
+#define LCK_MTX_LCK_WAIT_CODE 0x20
+#define LCK_MTX_LCK_WAKEUP_CODE 0x21
+#define LCK_MTX_LCK_SPIN_CODE 0x22
+#define LCK_MTX_LCK_ACQUIRE_CODE 0x23
+#define LCK_MTX_LCK_DEMOTE_CODE 0x24
- if (--thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED)) {
+/*
+ * Routine: lck_mtx_unlock_wakeup_tail
+ *
+ * Invoked on unlock when there is
+ * contention, i.e. the assembly routine sees
+ * that mutex->lck_mtx_waiters != 0
+ *
+ * neither the mutex or interlock is held
+ *
+ * Note that this routine might not be called if there are pending
+ * waiters which have previously been woken up, and they didn't
+ * end up boosting the old owner.
+ *
+ * assembly routine previously did the following to mutex:
+ * (after saving the state in prior_lock_state)
+ * decremented lck_mtx_waiters if nonzero
+ *
+ * This function needs to be called as a tail call
+ * to optimize the compiled code.
+ */
+__attribute__((noinline))
+static void
+lck_mtx_unlock_wakeup_tail (
+ lck_mtx_t *mutex,
+ uint32_t state,
+ boolean_t indirect)
+{
+ struct turnstile *ts;
- thread->sched_flags &= ~TH_SFLAG_PROMOTED;
+ __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
+ kern_return_t did_wake;
- if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
- /* Thread still has a RW lock promotion */
- } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
- KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
- thread->sched_pri, DEPRESSPRI, 0, trace_lck, 0);
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
+ trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
- set_sched_pri(thread, DEPRESSPRI);
- }
- else {
- if (thread->base_pri < thread->sched_pri) {
- KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
- thread->sched_pri, thread->base_pri, 0, trace_lck, 0);
+ ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
- thread_recompute_sched_pri(thread, FALSE);
- }
- }
- }
- thread_unlock(thread);
- splx(s);
- }
+ if (mutex->lck_mtx_waiters > 1) {
+ /* WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor on the wokenup thread */
+ did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
+ } else {
+ did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
+ turnstile_update_inheritor(ts, NULL, TURNSTILE_IMMEDIATE_UPDATE);
}
+ assert(did_wake == KERN_SUCCESS);
+
+ turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
+ turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
+
+ state -= LCK_MTX_WAITER;
+ state &= (~(LCK_MTX_SPIN_MSK | LCK_MTX_ILOCKED_MSK));
+ ordered_store_mtx_state_release(mutex, state);
+
+ assert(current_thread()->turnstile != NULL);
+
+ turnstile_cleanup();
+
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
- trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
-}
+ trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
+ lck_mtx_unlock_finish_inline(mutex, indirect);
+}
/*
* Routine: lck_mtx_lock_acquire_x86
*
* Invoked on acquiring the mutex when there is
* contention (i.e. the assembly routine sees that
- * that mutex->lck_mtx_waiters != 0 or
- * thread->was_promoted_on_wakeup != 0)...
+ * that mutex->lck_mtx_waiters != 0
*
* mutex is owned... interlock is held... preemption is disabled
*/
-void
-lck_mtx_lock_acquire_x86(
- lck_mtx_t *mutex)
+__attribute__((always_inline))
+static void
+lck_mtx_lock_acquire_inline(
+ lck_mtx_t *mutex,
+ struct turnstile *ts)
{
- __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
- thread_t thread;
- integer_t priority;
- spl_t s;
+ __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
- trace_lck, thread->was_promoted_on_wakeup, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
+ trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
- if (mutex->lck_mtx_waiters)
- priority = mutex->lck_mtx_pri;
- else
- priority = 0;
+ thread_t thread = (thread_t)mutex->lck_mtx_owner; /* faster than current_thread() */
+ assert(thread->waiting_for_mutex == NULL);
- thread = (thread_t)mutex->lck_mtx_owner; /* faster then current_thread() */
+ if (mutex->lck_mtx_waiters > 0) {
+ if (ts == NULL) {
+ ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
+ }
- if (thread->sched_pri < priority || thread->was_promoted_on_wakeup) {
+ turnstile_update_inheritor(ts, thread, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
+ turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
+ }
- KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
- thread->sched_pri, priority, thread->was_promoted_on_wakeup, trace_lck, 0);
+ if (ts != NULL) {
+ turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
+ }
- s = splsched();
- thread_lock(thread);
+ assert(current_thread()->turnstile != NULL);
- if (thread->sched_pri < priority) {
- /* Do not promote past promotion ceiling */
- assert(priority <= MAXPRI_PROMOTE);
- set_sched_pri(thread, priority);
- }
- if (mutex->lck_mtx_promoted == 0) {
- mutex->lck_mtx_promoted = 1;
-
- thread->promotions++;
- thread->sched_flags |= TH_SFLAG_PROMOTED;
- }
- thread->was_promoted_on_wakeup = 0;
-
- thread_unlock(thread);
- splx(s);
- }
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
}
-
-static int
-lck_mtx_interlock_try_lock(lck_mtx_t *mutex, boolean_t *istate)
+void
+lck_mtx_lock_acquire_x86(
+ lck_mtx_t *mutex)
{
- int retval;
+ return lck_mtx_lock_acquire_inline(mutex, NULL);
+}
- *istate = ml_set_interrupts_enabled(FALSE);
- retval = lck_mtx_ilk_try_lock(mutex);
+/*
+ * Tail call helpers for lock functions that perform
+ * lck_mtx_lock_acquire followed by the caller's finish routine, to optimize
+ * the caller's compiled code.
+ */
- if (retval == 0)
- ml_set_interrupts_enabled(*istate);
+__attribute__((noinline))
+static void
+lck_mtx_lock_acquire_tail(
+ lck_mtx_t *mutex,
+ boolean_t indirect,
+ struct turnstile *ts)
+{
+ lck_mtx_lock_acquire_inline(mutex, ts);
+ lck_mtx_lock_finish_inline_with_cleanup(mutex, ordered_load_mtx_state(mutex), indirect);
+}
- return retval;
+__attribute__((noinline))
+static boolean_t
+lck_mtx_try_lock_acquire_tail(
+ lck_mtx_t *mutex)
+{
+ lck_mtx_lock_acquire_inline(mutex, NULL);
+ lck_mtx_try_lock_finish_inline(mutex, ordered_load_mtx_state(mutex));
+
+ return TRUE;
}
+__attribute__((noinline))
static void
-lck_mtx_interlock_unlock(lck_mtx_t *mutex, boolean_t istate)
-{
- lck_mtx_ilk_unlock(mutex);
- ml_set_interrupts_enabled(istate);
+lck_mtx_convert_spin_acquire_tail(
+ lck_mtx_t *mutex)
+{
+ lck_mtx_lock_acquire_inline(mutex, NULL);
+ lck_mtx_convert_spin_finish_inline(mutex, ordered_load_mtx_state(mutex));
}
-
-/*
- * Routine: lck_mtx_lock_spinwait_x86
- *
- * Invoked trying to acquire a mutex when there is contention but
- * the holder is running on another processor. We spin for up to a maximum
- * time waiting for the lock to be released.
- *
- * Called with the interlock unlocked.
- * returns 0 if mutex acquired
- * returns 1 if we spun
- * returns 2 if we didn't spin due to the holder not running
- */
-int
-lck_mtx_lock_spinwait_x86(
- lck_mtx_t *mutex)
+boolean_t
+lck_mtx_ilk_unlock(
+ lck_mtx_t *mutex)
{
- __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
- thread_t holder;
- uint64_t overall_deadline;
- uint64_t check_owner_deadline;
- uint64_t cur_time;
- int retval = 1;
- int loopcount = 0;
+ lck_mtx_ilk_unlock_inline(mutex, ordered_load_mtx_state(mutex));
+ return TRUE;
+}
- KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
- trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, 0, 0);
+static inline void
+lck_mtx_interlock_lock_set_and_clear_flags(
+ lck_mtx_t *mutex,
+ uint32_t xor_flags,
+ uint32_t and_flags,
+ uint32_t *new_state)
+{
+ uint32_t state, prev;
+ state = *new_state;
+
+ for ( ; ; ) {
+ /* have to wait for interlock to clear */
+ while (__improbable(state & (LCK_MTX_ILOCKED_MSK | xor_flags))) {
+ cpu_pause();
+ state = ordered_load_mtx_state(mutex);
+ }
+ prev = state; /* prev contains snapshot for exchange */
+ state |= LCK_MTX_ILOCKED_MSK | xor_flags; /* pick up interlock */
+ state &= ~and_flags; /* clear flags */
- cur_time = mach_absolute_time();
- overall_deadline = cur_time + MutexSpin;
+ disable_preemption();
+ if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire))
+ break;
+ enable_preemption();
+ cpu_pause();
+ state = ordered_load_mtx_state(mutex);
+ }
+ *new_state = state;
+ return;
+}
+
+static inline void
+lck_mtx_interlock_lock_clear_flags(
+ lck_mtx_t *mutex,
+ uint32_t and_flags,
+ uint32_t *new_state)
+{
+ return lck_mtx_interlock_lock_set_and_clear_flags(mutex, 0, and_flags, new_state);
+}
+
+static inline void
+lck_mtx_interlock_lock(
+ lck_mtx_t *mutex,
+ uint32_t *new_state)
+{
+ return lck_mtx_interlock_lock_set_and_clear_flags(mutex, 0, 0, new_state);
+}
+
+static inline int
+lck_mtx_interlock_try_lock_set_flags(
+ lck_mtx_t *mutex,
+ uint32_t or_flags,
+ uint32_t *new_state)
+{
+ uint32_t state, prev;
+ state = *new_state;
+
+ /* have to wait for interlock to clear */
+ if (state & (LCK_MTX_ILOCKED_MSK | or_flags)) {
+ return 0;
+ }
+ prev = state; /* prev contains snapshot for exchange */
+ state |= LCK_MTX_ILOCKED_MSK | or_flags; /* pick up interlock */
+ disable_preemption();
+ if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
+ *new_state = state;
+ return 1;
+ }
+
+ enable_preemption();
+ return 0;
+}
+
+static inline int
+lck_mtx_interlock_try_lock(
+ lck_mtx_t *mutex,
+ uint32_t *new_state)
+{
+ return lck_mtx_interlock_try_lock_set_flags(mutex, 0, new_state);
+}
+
+static inline int
+lck_mtx_interlock_try_lock_disable_interrupts(
+ lck_mtx_t *mutex,
+ boolean_t *istate)
+{
+ uint32_t state;
+
+ *istate = ml_set_interrupts_enabled(FALSE);
+ state = ordered_load_mtx_state(mutex);
+
+ if (lck_mtx_interlock_try_lock(mutex, &state)) {
+ return 1;
+ } else {
+ ml_set_interrupts_enabled(*istate);
+ return 0;
+ }
+}
+
+static inline void
+lck_mtx_interlock_unlock_enable_interrupts(
+ lck_mtx_t *mutex,
+ boolean_t istate)
+{
+ lck_mtx_ilk_unlock(mutex);
+ ml_set_interrupts_enabled(istate);
+}
+
+__attribute__((noinline))
+static void
+lck_mtx_lock_contended(
+ lck_mtx_t *lock,
+ boolean_t indirect,
+ boolean_t *first_miss)
+{
+ lck_mtx_spinwait_ret_type_t ret;
+ uint32_t state;
+ thread_t thread;
+ struct turnstile *ts = NULL;
+
+try_again:
+
+ if (indirect) {
+ lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, first_miss);
+ }
+
+ ret = lck_mtx_lock_spinwait_x86(lock);
+ state = ordered_load_mtx_state(lock);
+ switch (ret) {
+ case LCK_MTX_SPINWAIT_NO_SPIN:
+ /*
+ * owner not on core, lck_mtx_lock_spinwait_x86 didn't even
+ * try to spin.
+ */
+ if (indirect) {
+ lck_grp_mtx_update_direct_wait((struct _lck_mtx_ext_*)lock);
+ }
+
+ /* just fall through case LCK_MTX_SPINWAIT_SPUN */
+ case LCK_MTX_SPINWAIT_SPUN:
+ /*
+ * mutex not acquired but lck_mtx_lock_spinwait_x86 tried to spin
+ * interlock not held
+ */
+ lck_mtx_interlock_lock(lock, &state);
+ assert(state & LCK_MTX_ILOCKED_MSK);
+
+ if (state & LCK_MTX_MLOCKED_MSK) {
+ if (indirect) {
+ lck_grp_mtx_update_wait((struct _lck_mtx_ext_*)lock, first_miss);
+ }
+ lck_mtx_lock_wait_x86(lock, &ts);
+ /*
+ * interlock is not held here.
+ */
+ goto try_again;
+ } else {
+
+ /* grab the mutex */
+ state |= LCK_MTX_MLOCKED_MSK;
+ ordered_store_mtx_state_release(lock, state);
+ thread = current_thread();
+ ordered_store_mtx_owner(lock, (uintptr_t)thread);
+#if MACH_LDEBUG
+ if (thread) {
+ thread->mutex_count++;
+ }
+#endif /* MACH_LDEBUG */
+ }
+
+ break;
+ case LCK_MTX_SPINWAIT_ACQUIRED:
+ /*
+ * mutex has been acquired by lck_mtx_lock_spinwait_x86
+ * interlock is held and preemption disabled
+ * owner is set and mutex marked as locked
+ * statistics updated too
+ */
+ break;
+ default:
+ panic("lck_mtx_lock_spinwait_x86 returned %d for mutex %p\n", ret, lock);
+ }
+
+ /*
+ * interlock is already acquired here
+ */
+
+ /* mutex has been acquired */
+ thread = (thread_t)lock->lck_mtx_owner;
+ if (state & LCK_MTX_WAITERS_MSK) {
+ /*
+ * lck_mtx_lock_acquire_tail will call
+ * turnstile_complete.
+ */
+ return lck_mtx_lock_acquire_tail(lock, indirect, ts);
+ }
+
+ if (ts != NULL) {
+ turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
+ }
+
+ assert(current_thread()->turnstile != NULL);
+
+ /* release the interlock */
+ lck_mtx_lock_finish_inline_with_cleanup(lock, ordered_load_mtx_state(lock), indirect);
+}
+
+/*
+ * Helper noinline functions for calling
+ * panic to optimize compiled code.
+ */
+
+__attribute__((noinline)) __abortlike
+static void
+lck_mtx_destroyed(
+ lck_mtx_t *lock)
+{
+ panic("trying to interlock destroyed mutex (%p)", lock);
+}
+
+__attribute__((noinline))
+static boolean_t
+lck_mtx_try_destroyed(
+ lck_mtx_t *lock)
+{
+ panic("trying to interlock destroyed mutex (%p)", lock);
+ return FALSE;
+}
+
+__attribute__((always_inline))
+static boolean_t
+lck_mtx_lock_wait_interlock_to_clear(
+ lck_mtx_t *lock,
+ uint32_t* new_state)
+{
+ uint32_t state;
+
+ for ( ; ; ) {
+ cpu_pause();
+ state = ordered_load_mtx_state(lock);
+ if (!(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
+ *new_state = state;
+ return TRUE;
+ }
+ if (state & LCK_MTX_MLOCKED_MSK) {
+ /* if it is held as mutex, just fail */
+ return FALSE;
+ }
+ }
+}
+
+__attribute__((always_inline))
+static boolean_t
+lck_mtx_try_lock_wait_interlock_to_clear(
+ lck_mtx_t *lock,
+ uint32_t* new_state)
+{
+ uint32_t state;
+
+ for ( ; ; ) {
+ cpu_pause();
+ state = ordered_load_mtx_state(lock);
+ if (state & (LCK_MTX_MLOCKED_MSK | LCK_MTX_SPIN_MSK)) {
+ /* if it is held as mutex or spin, just fail */
+ return FALSE;
+ }
+ if (!(state & LCK_MTX_ILOCKED_MSK)) {
+ *new_state = state;
+ return TRUE;
+ }
+ }
+}
+
+/*
+ * Routine: lck_mtx_lock_slow
+ *
+ * Locks a mutex for current thread.
+ * If the lock is contended this function might
+ * sleep.
+ *
+ * Called with interlock not held.
+ */
+__attribute__((noinline))
+void
+lck_mtx_lock_slow(
+ lck_mtx_t *lock)
+{
+ boolean_t indirect = FALSE;
+ uint32_t state;
+ int first_miss = 0;
+
+ state = ordered_load_mtx_state(lock);
+
+ /* is the interlock or mutex held */
+ if (__improbable(state & ((LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))) {
+ /*
+ * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
+ * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
+ * set in state (state == lck_mtx_tag)
+ */
+
+
+ /* is the mutex already held and not indirect */
+ if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))){
+ /* no, must have been the mutex */
+ return lck_mtx_lock_contended(lock, indirect, &first_miss);
+ }
+
+ /* check to see if it is marked destroyed */
+ if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
+ lck_mtx_destroyed(lock);
+ }
+
+ /* Is this an indirect mutex? */
+ if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
+ indirect = get_indirect_mutex(&lock, &state);
+
+ first_miss = 0;
+ lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
+
+ if (state & LCK_MTX_SPIN_MSK) {
+ /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
+ assert(state & LCK_MTX_ILOCKED_MSK);
+ lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
+ }
+ }
+
+ if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
+ return lck_mtx_lock_contended(lock, indirect, &first_miss);
+ }
+ }
+
+ /* no - can't be INDIRECT, DESTROYED or locked */
+ while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
+ if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
+ return lck_mtx_lock_contended(lock, indirect, &first_miss);
+ }
+ }
+
+ /* lock and interlock acquired */
+
+ thread_t thread = current_thread();
+ /* record owner of mutex */
+ ordered_store_mtx_owner(lock, (uintptr_t)thread);
+
+#if MACH_LDEBUG
+ if (thread) {
+ thread->mutex_count++; /* lock statistic */
+ }
+#endif
+ /*
+ * Check if there are waiters to
+ * inherit their priority.
+ */
+ if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
+ return lck_mtx_lock_acquire_tail(lock, indirect, NULL);
+ }
+
+ /* release the interlock */
+ lck_mtx_lock_finish_inline(lock, ordered_load_mtx_state(lock), indirect);
+
+ return;
+}
+
+__attribute__((noinline))
+boolean_t
+lck_mtx_try_lock_slow(
+ lck_mtx_t *lock)
+{
+ boolean_t indirect = FALSE;
+ uint32_t state;
+ int first_miss = 0;
+
+ state = ordered_load_mtx_state(lock);
+
+ /* is the interlock or mutex held */
+ if (__improbable(state & ((LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))) {
+ /*
+ * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
+ * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
+ * set in state (state == lck_mtx_tag)
+ */
+
+ /* is the mutex already held and not indirect */
+ if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))){
+ return FALSE;
+ }
+
+ /* check to see if it is marked destroyed */
+ if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
+ lck_mtx_try_destroyed(lock);
+ }
+
+ /* Is this an indirect mutex? */
+ if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
+ indirect = get_indirect_mutex(&lock, &state);
+
+ first_miss = 0;
+ lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
+ }
+
+ if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
+ if (indirect)
+ lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
+ return FALSE;
+ }
+ }
+
+ /* no - can't be INDIRECT, DESTROYED or locked */
+ while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
+ if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
+ if (indirect)
+ lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
+ return FALSE;
+ }
+ }
+
+ /* lock and interlock acquired */
+
+ thread_t thread = current_thread();
+ /* record owner of mutex */
+ ordered_store_mtx_owner(lock, (uintptr_t)thread);
+
+#if MACH_LDEBUG
+ if (thread) {
+ thread->mutex_count++; /* lock statistic */
+ }
+#endif
+ /*
+ * Check if there are waiters to
+ * inherit their priority.
+ */
+ if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
+ return lck_mtx_try_lock_acquire_tail(lock);
+ }
+
+ /* release the interlock */
+ lck_mtx_try_lock_finish_inline(lock, ordered_load_mtx_state(lock));
+
+ return TRUE;
+
+}
+
+__attribute__((noinline))
+void
+lck_mtx_lock_spin_slow(
+ lck_mtx_t *lock)
+{
+ boolean_t indirect = FALSE;
+ uint32_t state;
+ int first_miss = 0;
+
+ state = ordered_load_mtx_state(lock);
+
+ /* is the interlock or mutex held */
+ if (__improbable(state & ((LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))) {
+ /*
+ * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
+ * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
+ * set in state (state == lck_mtx_tag)
+ */
+
+
+ /* is the mutex already held and not indirect */
+ if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))){
+ /* no, must have been the mutex */
+ return lck_mtx_lock_contended(lock, indirect, &first_miss);
+ }
+
+ /* check to see if it is marked destroyed */
+ if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
+ lck_mtx_destroyed(lock);
+ }
+
+ /* Is this an indirect mutex? */
+ if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
+ indirect = get_indirect_mutex(&lock, &state);
+
+ first_miss = 0;
+ lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
+
+ if (state & LCK_MTX_SPIN_MSK) {
+ /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
+ assert(state & LCK_MTX_ILOCKED_MSK);
+ lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
+ }
+ }
+
+ if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
+ return lck_mtx_lock_contended(lock, indirect, &first_miss);
+ }
+ }
+
+ /* no - can't be INDIRECT, DESTROYED or locked */
+ while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state) )) {
+ if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
+ return lck_mtx_lock_contended(lock, indirect, &first_miss);
+ }
+ }
+
+ /* lock as spinlock and interlock acquired */
+
+ thread_t thread = current_thread();
+ /* record owner of mutex */
+ ordered_store_mtx_owner(lock, (uintptr_t)thread);
+
+#if MACH_LDEBUG
+ if (thread) {
+ thread->mutex_count++; /* lock statistic */
+ }
+#endif
+
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
+#endif
+ /* return with the interlock held and preemption disabled */
+ return;
+}
+
+__attribute__((noinline))
+boolean_t
+lck_mtx_try_lock_spin_slow(
+ lck_mtx_t *lock)
+{
+ boolean_t indirect = FALSE;
+ uint32_t state;
+ int first_miss = 0;
+
+ state = ordered_load_mtx_state(lock);
+
+ /* is the interlock or mutex held */
+ if (__improbable(state & ((LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))) {
+ /*
+ * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
+ * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
+ * set in state (state == lck_mtx_tag)
+ */
+
+ /* is the mutex already held and not indirect */
+ if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))){
+ return FALSE;
+ }
+
+ /* check to see if it is marked destroyed */
+ if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
+ lck_mtx_try_destroyed(lock);
+ }
+
+ /* Is this an indirect mutex? */
+ if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
+ indirect = get_indirect_mutex(&lock, &state);
+
+ first_miss = 0;
+ lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
+ }
+
+ if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
+ if (indirect)
+ lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
+ return FALSE;
+ }
+ }
+
+ /* no - can't be INDIRECT, DESTROYED or locked */
+ while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
+ if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
+ if (indirect)
+ lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
+ return FALSE;
+ }
+ }
+
+ /* lock and interlock acquired */
+
+ thread_t thread = current_thread();
+ /* record owner of mutex */
+ ordered_store_mtx_owner(lock, (uintptr_t)thread);
+
+#if MACH_LDEBUG
+ if (thread) {
+ thread->mutex_count++; /* lock statistic */
+ }
+#endif
+
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, lock, 0);
+#endif
+ return TRUE;
+
+}
+
+__attribute__((noinline))
+void
+lck_mtx_convert_spin(
+ lck_mtx_t *lock)
+{
+ uint32_t state;
+
+ state = ordered_load_mtx_state(lock);
+
+ /* Is this an indirect mutex? */
+ if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
+ /* If so, take indirection */
+ get_indirect_mutex(&lock, &state);
+ }
+
+ assertf((thread_t)lock->lck_mtx_owner == current_thread(), "lock %p not owned by thread %p (current owner %p)", lock, current_thread(), (thread_t)lock->lck_mtx_owner );
+
+ if (__improbable(state & LCK_MTX_MLOCKED_MSK)) {
+ /* already owned as a mutex, just return */
+ return;
+ }
+
+ assert(get_preemption_level() > 0);
+ assert(state & LCK_MTX_ILOCKED_MSK);
+ assert(state & LCK_MTX_SPIN_MSK);
+
+ /*
+ * Check if there are waiters to
+ * inherit their priority.
+ */
+ if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
+ return lck_mtx_convert_spin_acquire_tail(lock);
+ }
+
+ lck_mtx_convert_spin_finish_inline(lock, ordered_load_mtx_state(lock));
+
+ return;
+}
+
+static inline boolean_t
+lck_mtx_lock_grab_mutex(
+ lck_mtx_t *lock)
+{
+ uint32_t state;
+
+ state = ordered_load_mtx_state(lock);
+
+ if (!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state)) {
+ return FALSE;
+ }
+
+ /* lock and interlock acquired */
+
+ thread_t thread = current_thread();
+ /* record owner of mutex */
+ ordered_store_mtx_owner(lock, (uintptr_t)thread);
+
+#if MACH_LDEBUG
+ if (thread) {
+ thread->mutex_count++; /* lock statistic */
+ }
+#endif
+ return TRUE;
+}
+
+__attribute__((noinline))
+void
+lck_mtx_assert(
+ lck_mtx_t *lock,
+ unsigned int type)
+{
+ thread_t thread, owner;
+ uint32_t state;
+
+ thread = current_thread();
+ state = ordered_load_mtx_state(lock);
+
+ if (state == LCK_MTX_TAG_INDIRECT) {
+ get_indirect_mutex(&lock, &state);
+ }
+
+ owner = (thread_t)lock->lck_mtx_owner;
+
+ if (type == LCK_MTX_ASSERT_OWNED) {
+ if (owner != thread || !(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))
+ panic("mutex (%p) not owned\n", lock);
+ } else {
+ assert (type == LCK_MTX_ASSERT_NOTOWNED);
+ if (owner == thread)
+ panic("mutex (%p) owned\n", lock);
+ }
+}
+
+/*
+ * Routine: lck_mtx_lock_spinwait_x86
+ *
+ * Invoked trying to acquire a mutex when there is contention but
+ * the holder is running on another processor. We spin for up to a maximum
+ * time waiting for the lock to be released.
+ *
+ * Called with the interlock unlocked.
+ * returns LCK_MTX_SPINWAIT_ACQUIRED if mutex acquired
+ * returns LCK_MTX_SPINWAIT_SPUN if we spun
+ * returns LCK_MTX_SPINWAIT_NO_SPIN if we didn't spin due to the holder not running
+ */
+__attribute__((noinline))
+lck_mtx_spinwait_ret_type_t
+lck_mtx_lock_spinwait_x86(
+ lck_mtx_t *mutex)
+{
+ __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
+ thread_t holder;
+ uint64_t overall_deadline;
+ uint64_t check_owner_deadline;
+ uint64_t cur_time;
+ lck_mtx_spinwait_ret_type_t retval = LCK_MTX_SPINWAIT_SPUN;
+ int loopcount = 0;
+
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
+ trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, 0, 0);
+
+ cur_time = mach_absolute_time();
+ overall_deadline = cur_time + MutexSpin;
check_owner_deadline = cur_time;
/*
*/
do {
if (__probable(lck_mtx_lock_grab_mutex(mutex))) {
- retval = 0;
+ retval = LCK_MTX_SPINWAIT_ACQUIRED;
break;
}
cur_time = mach_absolute_time();
if (cur_time >= check_owner_deadline && mutex->lck_mtx_owner) {
boolean_t istate;
- if (lck_mtx_interlock_try_lock(mutex, &istate)) {
+ /*
+ * We will repeatedly peek at the state of the lock while spinning,
+ * and we will acquire the interlock to do so.
+ * The thread that will unlock the mutex will also need to acquire
+ * the interlock, and we want to avoid to slow it down.
+ * To avoid to get an interrupt while holding the interlock
+ * and increase the time we are holding it, we
+ * will try to acquire the interlock with interrupts disabled.
+ * This is safe because it is a "try_lock", if we can't acquire
+ * the interlock we re-enable the interrupts and fail, so it is
+ * ok to call it even if the interlock was already held.
+ */
+ if (lck_mtx_interlock_try_lock_disable_interrupts(mutex, &istate)) {
if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) {
if ( !(holder->machine.specFlags & OnProc) ||
(holder->state & TH_IDLE)) {
- lck_mtx_interlock_unlock(mutex, istate);
+ lck_mtx_interlock_unlock_enable_interrupts(mutex, istate);
if (loopcount == 0)
- retval = 2;
+ retval = LCK_MTX_SPINWAIT_NO_SPIN;
break;
}
}
- lck_mtx_interlock_unlock(mutex, istate);
+ lck_mtx_interlock_unlock_enable_interrupts(mutex, istate);
check_owner_deadline = cur_time + (MutexSpin / 4);
}
* Invoked in order to wait on contention.
*
* Called with the interlock locked and
- * preemption disabled...
+ * preemption disabled...
* returns it unlocked and with preemption enabled
+ *
+ * lck_mtx_waiters is 1:1 with a wakeup needing to occur.
+ * A runnable waiter can exist between wait and acquire
+ * without a waiters count being set.
+ * This allows us to never make a spurious wakeup call.
+ *
+ * Priority:
+ * This avoids taking the thread lock if the owning thread is the same priority.
+ * This optimizes the case of same-priority threads contending on a lock.
+ * However, that allows the owning thread to drop in priority while holding the lock,
+ * because there is no state that the priority change can notice that
+ * says that the targeted thread holds a contended mutex.
+ *
+ * One possible solution: priority changes could look for some atomic tag
+ * on the thread saying 'holding contended lock', and then set up a promotion.
+ * Needs a story for dropping that promotion - the last contended unlock
+ * has to notice that this has happened.
*/
+__attribute__((noinline))
void
lck_mtx_lock_wait_x86 (
- lck_mtx_t *mutex)
+ lck_mtx_t *mutex,
+ struct turnstile **ts)
{
- __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
- thread_t self = current_thread();
- thread_t holder;
- integer_t priority;
- spl_t s;
+ thread_t self = current_thread();
+
#if CONFIG_DTRACE
- uint64_t sleep_start = 0;
+ uint64_t sleep_start = 0;
if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
sleep_start = mach_absolute_time();
}
#endif
- KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
- trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
-
- priority = self->sched_pri;
+ __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
- if (priority < self->base_pri)
- priority = self->base_pri;
- if (priority < BASEPRI_DEFAULT)
- priority = BASEPRI_DEFAULT;
-
- /* Do not promote past promotion ceiling */
- priority = MIN(priority, MAXPRI_PROMOTE);
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
+ trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner),
+ mutex->lck_mtx_waiters, 0, 0);
- if (mutex->lck_mtx_waiters == 0 || priority > mutex->lck_mtx_pri)
- mutex->lck_mtx_pri = priority;
+ assert(self->waiting_for_mutex == NULL);
+ self->waiting_for_mutex = mutex;
mutex->lck_mtx_waiters++;
- if ( (holder = (thread_t)mutex->lck_mtx_owner) &&
- holder->sched_pri < mutex->lck_mtx_pri ) {
- s = splsched();
- thread_lock(holder);
+ thread_t holder = (thread_t)mutex->lck_mtx_owner;
+ assert(holder != NULL);
- /* holder priority may have been bumped by another thread
- * before thread_lock was taken
- */
- if (holder->sched_pri < mutex->lck_mtx_pri) {
- KERNEL_DEBUG_CONSTANT(
- MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
- holder->sched_pri, priority, thread_tid(holder), trace_lck, 0);
- /* Assert that we're not altering the priority of a
- * thread above the MAXPRI_PROMOTE band
- */
- assert(holder->sched_pri < MAXPRI_PROMOTE);
- set_sched_pri(holder, priority);
-
- if (mutex->lck_mtx_promoted == 0) {
- holder->promotions++;
- holder->sched_flags |= TH_SFLAG_PROMOTED;
-
- mutex->lck_mtx_promoted = 1;
- }
- }
- thread_unlock(holder);
- splx(s);
+ /*
+ * lck_mtx_lock_wait_x86 might be called on a loop. Call prepare just once and reuse
+ * the same turnstile while looping, the matching turnstile compleate will be called
+ * by lck_mtx_lock_contended when finally acquiring the lock.
+ */
+ if (*ts == NULL) {
+ *ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
}
- assert_wait(LCK_MTX_EVENT(mutex), THREAD_UNINT);
+
+ struct turnstile *turnstile = *ts;
+ thread_set_pending_block_hint(self, kThreadWaitKernelMutex);
+ turnstile_update_inheritor(turnstile, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
+
+ waitq_assert_wait64(&turnstile->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
lck_mtx_ilk_unlock(mutex);
+ turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
+
thread_block(THREAD_CONTINUE_NULL);
+ self->waiting_for_mutex = NULL;
+
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
- trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
+ trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner),
+ mutex->lck_mtx_waiters, 0, 0);
#if CONFIG_DTRACE
/*
return FALSE;
}
+void
+kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
+{
+ lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
+ waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
+ thread_t holder = (thread_t)mutex->lck_mtx_owner;
+ waitinfo->owner = thread_tid(holder);
+}
+
+void
+kdp_rwlck_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
+{
+ lck_rw_t *rwlck = NULL;
+ switch(waitinfo->wait_type) {
+ case kThreadWaitKernelRWLockRead:
+ rwlck = READ_EVENT_TO_RWLOCK(event);
+ break;
+ case kThreadWaitKernelRWLockWrite:
+ case kThreadWaitKernelRWLockUpgrade:
+ rwlck = WRITE_EVENT_TO_RWLOCK(event);
+ break;
+ default:
+ panic("%s was called with an invalid blocking type", __FUNCTION__);
+ break;
+ }
+ waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
+ waitinfo->owner = 0;
+}