/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* Locking primitives implementation
*/
-#include <mach_kdb.h>
#include <mach_ldebug.h>
-#include <kern/lock.h>
#include <kern/locks.h>
#include <kern/kalloc.h>
#include <kern/misc_protos.h>
#include <kern/debug.h>
#include <string.h>
-#if MACH_KDB
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_print.h>
-#endif /* MACH_KDB */
#include <i386/machine_routines.h> /* machine_timeout_suspended() */
+#include <machine/atomic.h>
#include <machine/machine_cpu.h>
#include <i386/mp.h>
#if CONFIG_DTRACE
#define NEED_DTRACE_DEFS
#include <../bsd/sys/lockstat.h>
+
+#define DTRACE_RW_SHARED 0x0 //reader
+#define DTRACE_RW_EXCL 0x1 //writer
+#define DTRACE_NO_FLAG 0x0 //not applicable
+
#endif
#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
unsigned int LcksOpts=0;
-/* Forwards */
-
-#if MACH_KDB
-void db_print_simple_lock(
- simple_lock_t addr);
-#endif /* MACH_KDB */
+#if DEVELOPMENT || DEBUG
+unsigned int LckDisablePreemptCheck = 0;
+#endif
+/* Forwards */
#if USLOCK_DEBUG
/*
decl_simple_lock_data(extern , panic_lock)
#endif /* USLOCK_DEBUG */
+extern unsigned int not_in_kdp;
/*
* We often want to know the addresses of the callers
#endif /* lint */
#endif /* USLOCK_DEBUG */
+// Enforce program order of loads and stores.
+#define ordered_load(target) _Generic( (target),\
+ uint32_t* : __c11_atomic_load((_Atomic uint32_t* )(target), memory_order_relaxed), \
+ uintptr_t*: __c11_atomic_load((_Atomic uintptr_t*)(target), memory_order_relaxed) )
+#define ordered_store(target, value) _Generic( (target),\
+ uint32_t* : __c11_atomic_store((_Atomic uint32_t* )(target), (value), memory_order_relaxed), \
+ uintptr_t*: __c11_atomic_store((_Atomic uintptr_t*)(target), (value), memory_order_relaxed) )
+
+/*
+ * atomic exchange API is a low level abstraction of the operations
+ * to atomically read, modify, and write a pointer. This abstraction works
+ * for both Intel and ARMv8.1 compare and exchange atomic instructions as
+ * well as the ARM exclusive instructions.
+ *
+ * atomic_exchange_begin() - begin exchange and retrieve current value
+ * atomic_exchange_complete() - conclude an exchange
+ * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
+ */
+static uint32_t
+atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
+{
+ uint32_t val;
+
+ (void)ord; // Memory order not used
+ val = __c11_atomic_load((_Atomic uint32_t *)target, memory_order_relaxed);
+ *previous = val;
+ return val;
+}
+
+static boolean_t
+atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
+{
+ return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
+}
+
+static void
+atomic_exchange_abort(void) { }
+
+static boolean_t
+atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
+{
+ uint32_t value, prev;
+
+ for ( ; ; ) {
+ value = atomic_exchange_begin32(target, &prev, ord);
+ if (value & test_mask) {
+ if (wait)
+ cpu_pause();
+ else
+ atomic_exchange_abort();
+ return FALSE;
+ }
+ value |= set_mask;
+ if (atomic_exchange_complete32(target, prev, value, ord))
+ return TRUE;
+ }
+}
/*
* Portable lock package implementation of usimple_locks.
#endif /* USLOCK_DEBUG */
-extern int lck_rw_grab_want(lck_rw_t *lck);
-extern int lck_rw_grab_shared(lck_rw_t *lck);
-extern int lck_rw_held_read_or_upgrade(lck_rw_t *lck);
-
-
/*
* Forward definitions
*/
-void lck_rw_lock_shared_gen(
- lck_rw_t *lck);
-
-void lck_rw_lock_exclusive_gen(
- lck_rw_t *lck);
-
-boolean_t lck_rw_lock_shared_to_exclusive_success(
- lck_rw_t *lck);
-
-boolean_t lck_rw_lock_shared_to_exclusive_failure(
- lck_rw_t *lck,
- int prior_lock_state);
-
-void lck_rw_lock_exclusive_to_shared_gen(
- lck_rw_t *lck,
- int prior_lock_state);
-
-lck_rw_type_t lck_rw_done_gen(
- lck_rw_t *lck,
- int prior_lock_state);
+static void lck_rw_lock_shared_gen(lck_rw_t *lck);
+static void lck_rw_lock_exclusive_gen(lck_rw_t *lck);
+static boolean_t lck_rw_lock_shared_to_exclusive_success(lck_rw_t *lck);
+static boolean_t lck_rw_lock_shared_to_exclusive_failure(lck_rw_t *lck, uint32_t prior_lock_state);
+static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t *lck, uint32_t prior_lock_state);
+static lck_rw_type_t lck_rw_done_gen(lck_rw_t *lck, uint32_t prior_lock_state);
+void lck_rw_clear_promotions_x86(thread_t thread);
+static boolean_t lck_rw_held_read_or_upgrade(lck_rw_t *lock);
+static boolean_t lck_rw_grab_want(lck_rw_t *lock);
+static boolean_t lck_rw_grab_shared(lck_rw_t *lock);
/*
* Routine: lck_spin_alloc_init
lck_spin_try_lock(
lck_spin_t *lck)
{
- return((boolean_t)usimple_lock_try((usimple_lock_t) lck));
+ boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck);
+#if DEVELOPMENT || DEBUG
+ if (lrval) {
+ pltrace(FALSE);
+ }
+#endif
+ return(lrval);
+}
+
+/*
+ * Routine: lck_spin_assert
+ */
+void
+lck_spin_assert(lck_spin_t *lock, unsigned int type)
+{
+ thread_t thread, holder;
+ uintptr_t state;
+
+ if (__improbable(type != LCK_ASSERT_OWNED && type != LCK_ASSERT_NOTOWNED)) {
+ panic("lck_spin_assert(): invalid arg (%u)", type);
+ }
+
+ state = lock->interlock;
+ holder = (thread_t)state;
+ thread = current_thread();
+ if (type == LCK_ASSERT_OWNED) {
+ if (__improbable(holder == THREAD_NULL)) {
+ panic("Lock not owned %p = %lx", lock, state);
+ }
+ if (__improbable(holder != thread)) {
+ panic("Lock not owned by current thread %p = %lx", lock, state);
+ }
+ } else if (type == LCK_ASSERT_NOTOWNED) {
+ if (__improbable(holder != THREAD_NULL)) {
+ if (holder == thread) {
+ panic("Lock owned by current thread %p = %lx", lock, state);
+ } else {
+ panic("Lock %p owned by thread %p", lock, holder);
+ }
+ }
+ }
+}
+
+/*
+ * Routine: kdp_lck_spin_is_acquired
+ * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
+ * Returns: TRUE if lock is acquired.
+ */
+boolean_t
+kdp_lck_spin_is_acquired(lck_spin_t *lck) {
+ if (not_in_kdp) {
+ panic("panic: spinlock acquired check done outside of kernel debugger");
+ }
+ return (lck->interlock != 0)? TRUE : FALSE;
}
/*
volatile uint32_t spinlock_owner_cpu = ~0;
volatile usimple_lock_t spinlock_timed_out;
-static uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
- uint64_t deadline;
+uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
uint32_t i;
for (i = 0; i < real_ncpus; i++) {
- if ((uintptr_t)cpu_data_ptr[i]->cpu_active_thread == thread_addr) {
+ if ((cpu_data_ptr[i] != NULL) && ((uintptr_t)cpu_data_ptr[i]->cpu_active_thread == thread_addr)) {
spinlock_owner_cpu = i;
- if ((uint32_t) cpu_number() == i)
- break;
- cpu_datap(i)->cpu_NMI_acknowledged = FALSE;
- cpu_NMI_interrupt(i);
- deadline = mach_absolute_time() + (LockTimeOut * 2);
- while (mach_absolute_time() < deadline && cpu_datap(i)->cpu_NMI_acknowledged == FALSE)
- cpu_pause();
+ if ((uint32_t) cpu_number() != i) {
+ /* Cause NMI and panic on the owner's cpu */
+ NMIPI_panic(cpu_to_cpumask(i), SPINLOCK_TIMEOUT);
+ }
break;
}
}
uintptr_t lowner = (uintptr_t)l->interlock.lock_data;
spinlock_timed_out = l;
lock_cpu = spinlock_timeout_NMI(lowner);
- panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx", l, lowner, current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data);
+ panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx, time: %llu",
+ l, lowner, current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data, mach_absolute_time());
}
}
+#if DEVELOPMENT || DEBUG
+ pltrace(FALSE);
+#endif
+
USLDBG(usld_lock_post(l, pc));
#else
simple_lock((simple_lock_t)l);
#endif
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0);
+#endif
}
OBTAIN_PC(pc);
USLDBG(usld_unlock(l, pc));
+#if DEVELOPMENT || DEBUG
+ pltrace(TRUE);
+#endif
hw_lock_unlock(&l->interlock);
#else
simple_unlock_rwmb((simple_lock_t)l);
OBTAIN_PC(pc);
USLDBG(usld_lock_try_pre(l, pc));
if ((success = hw_lock_try(&l->interlock))) {
- USLDBG(usld_lock_try_post(l, pc));
+#if DEVELOPMENT || DEBUG
+ pltrace(FALSE);
+#endif
+ USLDBG(usld_lock_try_post(l, pc));
}
return success;
#else
#endif
}
+/*
+ * Acquire a usimple_lock while polling for pending TLB flushes
+ * and spinning on a lock.
+ *
+ */
+void
+usimple_lock_try_lock_loop(usimple_lock_t l)
+{
+ boolean_t istate = ml_get_interrupts_enabled();
+ while (!simple_lock_try((l))) {
+ if (!istate)
+ handle_pending_TLB_flushes();
+ cpu_pause();
+ }
+}
+
#if USLOCK_DEBUG
/*
* States of a usimple_lock. The default when initializing
usimple_lock_t l,
pc_t pc)
{
- register int mycpu;
+ int mycpu;
char caller[] = "successful usimple_lock";
usimple_lock_t l,
pc_t pc)
{
- register int mycpu;
+ int mycpu;
char caller[] = "usimple_unlock";
usimple_lock_t l,
pc_t pc)
{
- register int mycpu;
+ int mycpu;
char caller[] = "successful usimple_lock_try";
if (!usld_lock_common_checks(l, caller))
#endif /* USLOCK_DEBUG */
-/*
- * Routine: lock_alloc
- * Function:
- * Allocate a lock for external users who cannot
- * hard-code the structure definition into their
- * objects.
- * For now just use kalloc, but a zone is probably
- * warranted.
- */
-lock_t *
-lock_alloc(
- boolean_t can_sleep,
- unsigned short tag,
- unsigned short tag1)
-{
- lock_t *l;
-
- if ((l = (lock_t *)kalloc(sizeof(lock_t))) != 0)
- lock_init(l, can_sleep, tag, tag1);
- return(l);
-}
-
-/*
- * Routine: lock_free
- * Function:
- * Free a lock allocated for external users.
- * For now just use kfree, but a zone is probably
- * warranted.
- */
-void
-lock_free(
- lock_t *l)
-{
- kfree(l, sizeof(lock_t));
-}
-
-
-/*
- * Routine: lock_init
- * Function:
- * Initialize a lock; required before use.
- * Note that clients declare the "struct lock"
- * variables and then initialize them, rather
- * than getting a new one from this module.
- */
-void
-lock_init(
- lock_t *l,
- boolean_t can_sleep,
- __unused unsigned short tag,
- __unused unsigned short tag1)
-{
- hw_lock_byte_init(&l->lck_rw_interlock);
- l->lck_rw_want_write = FALSE;
- l->lck_rw_want_upgrade = FALSE;
- l->lck_rw_shared_count = 0;
- l->lck_rw_can_sleep = can_sleep;
- l->lck_rw_tag = tag;
- l->lck_rw_priv_excl = 1;
- l->lck_r_waiting = l->lck_w_waiting = 0;
-}
-
-
-/*
- * Sleep locks. These use the same data structure and algorithm
- * as the spin locks, but the process sleeps while it is waiting
- * for the lock. These work on uniprocessor systems.
- */
-
-#define DECREMENTER_TIMEOUT 1000000
-
-void
-lock_write(
- register lock_t * l)
-{
- lck_rw_lock_exclusive(l);
-}
-
-void
-lock_done(
- register lock_t * l)
-{
- (void) lck_rw_done(l);
-}
-
-void
-lock_read(
- register lock_t * l)
-{
- lck_rw_lock_shared(l);
-}
-
-
-/*
- * Routine: lock_read_to_write
- * Function:
- * Improves a read-only lock to one with
- * write permission. If another reader has
- * already requested an upgrade to a write lock,
- * no lock is held upon return.
- *
- * Returns FALSE if the upgrade *failed*.
- */
-
-boolean_t
-lock_read_to_write(
- register lock_t * l)
-{
- return lck_rw_lock_shared_to_exclusive(l);
-}
-
-void
-lock_write_to_read(
- register lock_t * l)
-{
- lck_rw_lock_exclusive_to_shared(l);
-}
-
-
-
/*
* Routine: lck_rw_alloc_init
*/
{
if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
return;
+#if MACH_LDEBUG
+ lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
+#endif
lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
lck_grp_deallocate(grp);
#define DECREMENTER_TIMEOUT 1000000
-#define RW_LOCK_READER_EVENT(x) \
- ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_tag))))
-
-#define RW_LOCK_WRITER_EVENT(x) \
- ((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_pad8))))
-
/*
* We disable interrupts while holding the RW interlock to prevent an
* interrupt from exacerbating hold time.
* Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
*/
-static boolean_t
+static inline boolean_t
lck_interlock_lock(lck_rw_t *lck)
{
boolean_t istate;
istate = ml_set_interrupts_enabled(FALSE);
hw_lock_byte_lock(&lck->lck_rw_interlock);
-
return istate;
}
-static void
+static inline void
lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
{
hw_lock_byte_unlock(&lck->lck_rw_interlock);
cpu_pause();
}
+static inline boolean_t
+lck_rw_held_read_or_upgrade(lck_rw_t *lock)
+{
+ if (ordered_load(&lock->data) & (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE))
+ return TRUE;
+ return FALSE;
+}
/*
* compute the deadline to spin against when
}
+/*
+ * Spin while interlock is held.
+ */
+
+static inline void
+lck_rw_interlock_spin(lck_rw_t *lock)
+{
+ while (ordered_load(&lock->data) & LCK_RW_INTERLOCK) {
+ cpu_pause();
+ }
+}
+
+static boolean_t
+lck_rw_grab_want(lck_rw_t *lock)
+{
+ uint32_t data, prev;
+
+ for ( ; ; ) {
+ data = atomic_exchange_begin32(&lock->data, &prev, memory_order_relaxed);
+ if ((data & LCK_RW_INTERLOCK) == 0)
+ break;
+ atomic_exchange_abort();
+ lck_rw_interlock_spin(lock);
+ }
+ if (data & LCK_RW_WANT_WRITE) {
+ atomic_exchange_abort();
+ return FALSE;
+ }
+ data |= LCK_RW_WANT_WRITE;
+ return atomic_exchange_complete32(&lock->data, prev, data, memory_order_relaxed);
+}
+
+static boolean_t
+lck_rw_grab_shared(lck_rw_t *lock)
+{
+ uint32_t data, prev;
+
+ for ( ; ; ) {
+ data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
+ if ((data & LCK_RW_INTERLOCK) == 0)
+ break;
+ atomic_exchange_abort();
+ lck_rw_interlock_spin(lock);
+ }
+ if (data & (LCK_RW_WANT_WRITE | LCK_RW_WANT_UPGRADE)) {
+ if (((data & LCK_RW_SHARED_MASK) == 0) || (data & LCK_RW_PRIV_EXCL)) {
+ atomic_exchange_abort();
+ return FALSE;
+ }
+ }
+ data += LCK_RW_SHARED_READER;
+ return atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp);
+}
+
/*
* Routine: lck_rw_lock_exclusive
*/
-void
+static void
lck_rw_lock_exclusive_gen(
lck_rw_t *lck)
{
+ __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
uint64_t deadline = 0;
int slept = 0;
int gotlock = 0;
deadline = lck_rw_deadline_for_spin(lck);
- KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline)
lck_rw_lock_pause(istate);
- KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, gotlock, 0);
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
if (gotlock)
break;
if (lck->lck_rw_want_write) {
- KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
lck->lck_w_waiting = TRUE;
+ thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
lck_interlock_unlock(lck, istate);
res = thread_block(THREAD_CONTINUE_NULL);
slept++;
}
- KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0);
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
} else {
lck->lck_rw_want_write = TRUE;
lck_interlock_unlock(lck, istate);
deadline = lck_rw_deadline_for_spin(lck);
- KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline)
lck_rw_lock_pause(istate);
- KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, lockheld, 0);
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, lockheld, 0);
if ( !lockheld)
break;
istate = lck_interlock_lock(lck);
if (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade) {
- KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
lck->lck_w_waiting = TRUE;
+ thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
lck_interlock_unlock(lck, istate);
res = thread_block(THREAD_CONTINUE_NULL);
slept++;
}
- KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0);
+ KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
} else {
lck_interlock_unlock(lck, istate);
/*
#endif
}
+/*
+ * Routine: lck_rw_done
+ */
+
+lck_rw_type_t lck_rw_done(lck_rw_t *lock)
+{
+ uint32_t data, prev;
+
+ for ( ; ; ) {
+ data = atomic_exchange_begin32(&lock->data, &prev, memory_order_release_smp);
+ if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
+ atomic_exchange_abort();
+ lck_rw_interlock_spin(lock);
+ continue;
+ }
+ if (data & LCK_RW_SHARED_MASK) {
+ data -= LCK_RW_SHARED_READER;
+ if ((data & LCK_RW_SHARED_MASK) == 0) /* if reader count has now gone to 0, check for waiters */
+ goto check_waiters;
+ } else { /* if reader count == 0, must be exclusive lock */
+ if (data & LCK_RW_WANT_UPGRADE) {
+ data &= ~(LCK_RW_WANT_UPGRADE);
+ } else {
+ if (data & LCK_RW_WANT_WRITE)
+ data &= ~(LCK_RW_WANT_EXCL);
+ else /* lock is not 'owned', panic */
+ panic("Releasing non-exclusive RW lock without a reader refcount!");
+ }
+check_waiters:
+ if (prev & LCK_RW_W_WAITING) {
+ data &= ~(LCK_RW_W_WAITING);
+ if ((prev & LCK_RW_PRIV_EXCL) == 0)
+ data &= ~(LCK_RW_R_WAITING);
+ } else
+ data &= ~(LCK_RW_R_WAITING);
+ }
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_release_smp))
+ break;
+ cpu_pause();
+ }
+ return lck_rw_done_gen(lock, prev);
+}
/*
* Routine: lck_rw_done_gen
*
- * called from the assembly language wrapper...
+ * called from lck_rw_done()
* prior_lock_state is the value in the 1st
* word of the lock at the time of a successful
* atomic compare and exchange with the new value...
* this by examining the state of the lock before
* we changed it
*/
-lck_rw_type_t
+static lck_rw_type_t
lck_rw_done_gen(
lck_rw_t *lck,
- int prior_lock_state)
+ uint32_t prior_lock_state)
{
lck_rw_t *fake_lck;
lck_rw_type_t lock_type;
+ thread_t thread;
+ uint32_t rwlock_count;
/*
* prior_lock state is a snapshot of the 1st word of the
else
lock_type = LCK_RW_TYPE_EXCLUSIVE;
+ /* Check if dropping the lock means that we need to unpromote */
+ thread = current_thread();
+ rwlock_count = thread->rwlock_count--;
+#if MACH_LDEBUG
+ if (rwlock_count == 0) {
+ panic("rw lock count underflow for thread %p", thread);
+ }
+#endif
+ if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
+ /* sched_flags checked without lock, but will be rechecked while clearing */
+ lck_rw_clear_promotion(thread);
+ }
+
#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
#endif
{
lck_rw_type_t ret;
+ assertf(lck->lck_rw_shared_count > 0, "lck %p has shared_count=0x%x", lck, lck->lck_rw_shared_count);
ret = lck_rw_done(lck);
if (ret != LCK_RW_TYPE_SHARED)
- panic("lck_rw_unlock(): lock held in mode: %d\n", ret);
+ panic("lck_rw_unlock_shared(): lock %p held in mode: %d\n", lck, ret);
}
panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type);
}
+/*
+ * Routine: lck_rw_lock_shared
+ */
+void
+lck_rw_lock_shared(lck_rw_t *lock)
+{
+ uint32_t data, prev;
+
+ current_thread()->rwlock_count++;
+ for ( ; ; ) {
+ data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
+ if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
+ atomic_exchange_abort();
+ lck_rw_lock_shared_gen(lock);
+ break;
+ }
+ data += LCK_RW_SHARED_READER;
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+ break;
+ cpu_pause();
+ }
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
+#endif /* CONFIG_DTRACE */
+ return;
+}
/*
* Routine: lck_rw_lock_shared_gen
* is held exclusively... this is where we spin/block
* until we can acquire the lock in the shared mode
*/
-void
+static void
lck_rw_lock_shared_gen(
lck_rw_t *lck)
{
+ __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
uint64_t deadline = 0;
int gotlock = 0;
int slept = 0;
wait_result_t res = 0;
boolean_t istate = -1;
-
+
#if CONFIG_DTRACE
uint64_t wait_interval = 0;
int readers_at_sleep = 0;
deadline = lck_rw_deadline_for_spin(lck);
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
- (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
+ trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline)
lck_rw_lock_pause(istate);
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
- (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0);
+ trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0);
if (gotlock)
break;
((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
- (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
+ trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
lck->lck_r_waiting = TRUE;
+ thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT);
lck_interlock_unlock(lck, istate);
slept++;
}
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
- (int)lck, res, slept, 0, 0);
+ trace_lck, res, slept, 0, 0);
} else {
lck->lck_rw_shared_count++;
lck_interlock_unlock(lck, istate);
}
+/*
+ * Routine: lck_rw_lock_exclusive
+ */
+
+void
+lck_rw_lock_exclusive(lck_rw_t *lock)
+{
+ current_thread()->rwlock_count++;
+ if (atomic_test_and_set32(&lock->data,
+ (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
+ LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
+#endif /* CONFIG_DTRACE */
+ } else
+ lck_rw_lock_exclusive_gen(lock);
+}
+
+
+/*
+ * Routine: lck_rw_lock_shared_to_exclusive
+ */
+
+boolean_t
+lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
+{
+ uint32_t data, prev;
+
+ for ( ; ; ) {
+ data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
+ if (data & LCK_RW_INTERLOCK) {
+ atomic_exchange_abort();
+ lck_rw_interlock_spin(lock);
+ continue;
+ }
+ if (data & LCK_RW_WANT_UPGRADE) {
+ data -= LCK_RW_SHARED_READER;
+ if ((data & LCK_RW_SHARED_MASK) == 0) /* we were the last reader */
+ data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+ return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
+ } else {
+ data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
+ data -= LCK_RW_SHARED_READER; /* and shed our read count */
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+ break;
+ }
+ cpu_pause();
+ }
+ /* we now own the WANT_UPGRADE */
+ if (data & LCK_RW_SHARED_MASK) /* check to see if all of the readers are drained */
+ lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
+#endif
+ return TRUE;
+}
+
+
/*
* Routine: lck_rw_lock_shared_to_exclusive_failure
* Function:
* if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
* all we need to do here is determine if a wakeup is needed
*/
-boolean_t
+static boolean_t
lck_rw_lock_shared_to_exclusive_failure(
lck_rw_t *lck,
- int prior_lock_state)
+ uint32_t prior_lock_state)
{
lck_rw_t *fake_lck;
-
- /*
- * prior_lock state is a snapshot of the 1st word of the
- * lock in question... we'll fake up a pointer to it
- * and carefully not access anything beyond whats defined
- * in the first word of a lck_rw_t
- */
+ thread_t thread = current_thread();
+ uint32_t rwlock_count;
+
+ /* Check if dropping the lock means that we need to unpromote */
+ rwlock_count = thread->rwlock_count--;
+#if MACH_LDEBUG
+ if (rwlock_count == 0) {
+ panic("rw lock count underflow for thread %p", thread);
+ }
+#endif
fake_lck = (lck_rw_t *)&prior_lock_state;
if (fake_lck->lck_w_waiting && fake_lck->lck_rw_shared_count == 1) {
*/
thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
}
+
+ if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
+ /* sched_flags checked without lock, but will be rechecked while clearing */
+ lck_rw_clear_promotion(thread);
+ }
+
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
- (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
+ VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
return (FALSE);
}
* we just need to wait for the rest of the readers to drain
* and then we can return as the exclusive holder of this lock
*/
-boolean_t
+static boolean_t
lck_rw_lock_shared_to_exclusive_success(
lck_rw_t *lck)
{
+ __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
uint64_t deadline = 0;
int slept = 0;
int still_shared = 0;
deadline = lck_rw_deadline_for_spin(lck);
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
- (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
+ trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline)
lck_rw_lock_pause(istate);
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
- (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
+ trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
if ( !still_shared)
break;
if (lck->lck_rw_shared_count != 0) {
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
- (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
+ trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
lck->lck_w_waiting = TRUE;
+ thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
lck_interlock_unlock(lck, istate);
slept++;
}
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
- (int)lck, res, slept, 0, 0);
+ trace_lck, res, slept, 0, 0);
} else {
lck_interlock_unlock(lck, istate);
break;
return (TRUE);
}
+/*
+ * Routine: lck_rw_lock_exclusive_to_shared
+ */
+
+void lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
+{
+ uint32_t data, prev;
+
+ for ( ; ; ) {
+ data = atomic_exchange_begin32(&lock->data, &prev, memory_order_release_smp);
+ if (data & LCK_RW_INTERLOCK) {
+ atomic_exchange_abort();
+ lck_rw_interlock_spin(lock); /* wait for interlock to clear */
+ continue;
+ }
+ data += LCK_RW_SHARED_READER;
+ if (data & LCK_RW_WANT_UPGRADE)
+ data &= ~(LCK_RW_WANT_UPGRADE);
+ else
+ data &= ~(LCK_RW_WANT_EXCL);
+ if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL)))
+ data &= ~(LCK_RW_W_WAITING);
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_release_smp))
+ break;
+ cpu_pause();
+ }
+ return lck_rw_lock_exclusive_to_shared_gen(lock, prev);
+}
+
/*
- * Routine: lck_rw_lock_exclusive_to_shared
+ * Routine: lck_rw_lock_exclusive_to_shared_gen
* Function:
* assembly fast path has already dropped
* our exclusive state and bumped lck_rw_shared_count
* all we need to do here is determine if anyone
* needs to be awakened.
*/
-void
+static void
lck_rw_lock_exclusive_to_shared_gen(
lck_rw_t *lck,
- int prior_lock_state)
+ uint32_t prior_lock_state)
{
- lck_rw_t *fake_lck;
+ __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
+ lck_rw_t *fake_lck;
- /*
- * prior_lock state is a snapshot of the 1st word of the
- * lock in question... we'll fake up a pointer to it
- * and carefully not access anything beyond whats defined
- * in the first word of a lck_rw_t
- */
fake_lck = (lck_rw_t *)&prior_lock_state;
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
- (int)lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0);
+ trace_lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0);
/*
* don't wake up anyone waiting to take the lock exclusively
thread_wakeup(RW_LOCK_READER_EVENT(lck));
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
- (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
+ trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
return(FALSE);
}
+/*
+ * Routine: lck_rw_try_lock_shared
+ */
+
+boolean_t lck_rw_try_lock_shared(lck_rw_t *lock)
+{
+ uint32_t data, prev;
+
+ for ( ; ; ) {
+ data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
+ if (data & LCK_RW_INTERLOCK) {
+ atomic_exchange_abort();
+ lck_rw_interlock_spin(lock);
+ continue;
+ }
+ if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
+ atomic_exchange_abort();
+ return FALSE; /* lock is busy */
+ }
+ data += LCK_RW_SHARED_READER; /* Increment reader refcount */
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+ break;
+ cpu_pause();
+ }
+ current_thread()->rwlock_count++;
+ /* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
+#endif /* CONFIG_DTRACE */
+ return TRUE;
+}
+
+
+/*
+ * Routine: lck_rw_try_lock_exclusive
+ */
+
+boolean_t lck_rw_try_lock_exclusive(lck_rw_t *lock)
+{
+ uint32_t data, prev;
+
+ for ( ; ; ) {
+ data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
+ if (data & LCK_RW_INTERLOCK) {
+ atomic_exchange_abort();
+ lck_rw_interlock_spin(lock);
+ continue;
+ }
+ if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
+ atomic_exchange_abort();
+ return FALSE; /* can't get it */
+ }
+ data |= LCK_RW_WANT_EXCL;
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+ break;
+ cpu_pause();
+ }
+
+ current_thread()->rwlock_count++;
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
+#endif /* CONFIG_DTRACE */
+ return TRUE;
+}
+
void
lck_rw_assert(
return;
}
break;
+ case LCK_RW_ASSERT_NOTHELD:
+ if (!(lck->lck_rw_want_write ||
+ lck->lck_rw_want_upgrade ||
+ lck->lck_rw_shared_count != 0)) {
+ return;
+ }
+ break;
default:
break;
}
- panic("rw lock (%p) not held (mode=%u), first word %08x\n", lck, type, *(uint32_t *)lck);
+ panic("rw lock (%p)%s held (mode=%u), first word %08x\n", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type, *(uint32_t *)lck);
+}
+
+/* On return to userspace, this routine is called if the rwlock_count is somehow imbalanced */
+void
+lck_rw_clear_promotions_x86(thread_t thread)
+{
+#if MACH_LDEBUG
+ /* It's fatal to leave a RW lock locked and return to userspace */
+ panic("%u rw lock(s) held on return to userspace for thread %p", thread->rwlock_count, thread);
+#else
+ /* Paper over the issue */
+ thread->rwlock_count = 0;
+ lck_rw_clear_promotion(thread);
+#endif
+}
+
+boolean_t
+lck_rw_lock_yield_shared(lck_rw_t *lck, boolean_t force_yield)
+{
+ lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
+
+ if (lck->lck_rw_want_write || lck->lck_rw_want_upgrade || force_yield) {
+ lck_rw_unlock_shared(lck);
+ mutex_pause(2);
+ lck_rw_lock_shared(lck);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+/*
+ * Routine: kdp_lck_rw_lock_is_acquired_exclusive
+ * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
+ */
+boolean_t
+kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck) {
+ if (not_in_kdp) {
+ panic("panic: rw lock exclusive check done outside of kernel debugger");
+ }
+ return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_write) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
}
+
#ifdef MUTEX_ZONE
extern zone_t lck_mtx_zone;
#endif
lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
lck->lck_mtx.lck_mtx_is_ext = 1;
-#if defined(__x86_64__)
- lck->lck_mtx.lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
-#endif
+ lck->lck_mtx.lck_mtx_pad32 = 0xFFFFFFFF;
}
/*
lck->lck_mtx_owner = 0;
lck->lck_mtx_state = 0;
}
-#if defined(__x86_64__)
- lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
-#endif
+ lck->lck_mtx_pad32 = 0xFFFFFFFF;
lck_grp_reference(grp);
lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
}
lck->lck_mtx_owner = 0;
lck->lck_mtx_state = 0;
}
-#if defined(__x86_64__)
- lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
-#endif
+ lck->lck_mtx_pad32 = 0xFFFFFFFF;
lck_grp_reference(grp);
lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
return;
+#if MACH_LDEBUG
+ lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
+#endif
lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
lck_mtx_lock_mark_destroyed(lck);
lck_mtx_t *mutex,
int prior_lock_state)
{
- lck_mtx_t fake_lck;
+ __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
+ lck_mtx_t fake_lck;
/*
* prior_lock state is a snapshot of the 2nd word of the
fake_lck.lck_mtx_state = prior_lock_state;
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
- mutex, fake_lck.lck_mtx_promoted, fake_lck.lck_mtx_waiters, fake_lck.lck_mtx_pri, 0);
+ trace_lck, fake_lck.lck_mtx_promoted, fake_lck.lck_mtx_waiters, fake_lck.lck_mtx_pri, 0);
if (__probable(fake_lck.lck_mtx_waiters)) {
-
if (fake_lck.lck_mtx_waiters > 1)
- thread_wakeup_one_with_pri((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)), fake_lck.lck_mtx_pri);
+ thread_wakeup_one_with_pri(LCK_MTX_EVENT(mutex), fake_lck.lck_mtx_pri);
else
- thread_wakeup_one((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));
+ thread_wakeup_one(LCK_MTX_EVENT(mutex));
}
if (__improbable(fake_lck.lck_mtx_promoted)) {
thread->sched_flags &= ~TH_SFLAG_PROMOTED;
- if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
+ if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
+ /* Thread still has a RW lock promotion */
+ } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
- thread->sched_pri, DEPRESSPRI, 0, mutex, 0);
+ thread->sched_pri, DEPRESSPRI, 0, trace_lck, 0);
set_sched_pri(thread, DEPRESSPRI);
}
else {
- if (thread->priority < thread->sched_pri) {
+ if (thread->base_pri < thread->sched_pri) {
KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
- thread->sched_pri, thread->priority, 0, mutex, 0);
+ thread->sched_pri, thread->base_pri, 0, trace_lck, 0);
- SCHED(compute_priority)(thread, FALSE);
+ thread_recompute_sched_pri(thread, FALSE);
}
}
}
}
}
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
- mutex, 0, mutex->lck_mtx_waiters, 0, 0);
+ trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
}
lck_mtx_lock_acquire_x86(
lck_mtx_t *mutex)
{
- thread_t thread;
- integer_t priority;
- spl_t s;
+ __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
+ thread_t thread;
+ integer_t priority;
+ spl_t s;
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
- mutex, thread->was_promoted_on_wakeup, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
+ trace_lck, thread->was_promoted_on_wakeup, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
if (mutex->lck_mtx_waiters)
priority = mutex->lck_mtx_pri;
if (thread->sched_pri < priority || thread->was_promoted_on_wakeup) {
KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
- thread->sched_pri, priority, thread->was_promoted_on_wakeup, mutex, 0);
+ thread->sched_pri, priority, thread->was_promoted_on_wakeup, trace_lck, 0);
s = splsched();
thread_lock(thread);
- if (thread->sched_pri < priority)
+ if (thread->sched_pri < priority) {
+ /* Do not promote past promotion ceiling */
+ assert(priority <= MAXPRI_PROMOTE);
set_sched_pri(thread, priority);
-
+ }
if (mutex->lck_mtx_promoted == 0) {
mutex->lck_mtx_promoted = 1;
splx(s);
}
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
- mutex, 0, mutex->lck_mtx_waiters, 0, 0);
+ trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
+}
+
+
+static int
+lck_mtx_interlock_try_lock(lck_mtx_t *mutex, boolean_t *istate)
+{
+ int retval;
+
+ *istate = ml_set_interrupts_enabled(FALSE);
+ retval = lck_mtx_ilk_try_lock(mutex);
+
+ if (retval == 0)
+ ml_set_interrupts_enabled(*istate);
+
+ return retval;
}
+static void
+lck_mtx_interlock_unlock(lck_mtx_t *mutex, boolean_t istate)
+{
+ lck_mtx_ilk_unlock(mutex);
+ ml_set_interrupts_enabled(istate);
+}
/*
lck_mtx_lock_spinwait_x86(
lck_mtx_t *mutex)
{
+ __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
thread_t holder;
- uint64_t deadline;
+ uint64_t overall_deadline;
+ uint64_t check_owner_deadline;
+ uint64_t cur_time;
int retval = 1;
int loopcount = 0;
-
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
- mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0);
+ trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, 0, 0);
- deadline = mach_absolute_time() + MutexSpin;
+ cur_time = mach_absolute_time();
+ overall_deadline = cur_time + MutexSpin;
+ check_owner_deadline = cur_time;
/*
* Spin while:
retval = 0;
break;
}
- if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) {
+ cur_time = mach_absolute_time();
- if ( !(holder->machine.specFlags & OnProc) ||
- (holder->state & TH_IDLE)) {
- if (loopcount == 0)
- retval = 2;
- break;
+ if (cur_time >= overall_deadline)
+ break;
+
+ if (cur_time >= check_owner_deadline && mutex->lck_mtx_owner) {
+ boolean_t istate;
+
+ if (lck_mtx_interlock_try_lock(mutex, &istate)) {
+
+ if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) {
+
+ if ( !(holder->machine.specFlags & OnProc) ||
+ (holder->state & TH_IDLE)) {
+
+ lck_mtx_interlock_unlock(mutex, istate);
+
+ if (loopcount == 0)
+ retval = 2;
+ break;
+ }
+ }
+ lck_mtx_interlock_unlock(mutex, istate);
+
+ check_owner_deadline = cur_time + (MutexSpin / 4);
}
}
cpu_pause();
loopcount++;
- } while (mach_absolute_time() < deadline);
-
+ } while (TRUE);
#if CONFIG_DTRACE
/*
- * We've already kept a count via deadline of how long we spun.
+ * We've already kept a count via overall_deadline of how long we spun.
* If dtrace is active, then we compute backwards to decide how
* long we spun.
*
*/
if (__probable(mutex->lck_mtx_is_ext == 0)) {
LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex,
- mach_absolute_time() - (deadline - MutexSpin));
+ mach_absolute_time() - (overall_deadline - MutexSpin));
} else {
LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, mutex,
- mach_absolute_time() - (deadline - MutexSpin));
+ mach_absolute_time() - (overall_deadline - MutexSpin));
}
/* The lockstat acquire event is recorded by the assembly code beneath us. */
#endif
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
- mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, retval, 0);
+ trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, retval, 0);
return retval;
}
lck_mtx_lock_wait_x86 (
lck_mtx_t *mutex)
{
+ __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
thread_t self = current_thread();
thread_t holder;
integer_t priority;
}
#endif
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
- mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
+ trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
priority = self->sched_pri;
- if (priority < self->priority)
- priority = self->priority;
+ if (priority < self->base_pri)
+ priority = self->base_pri;
if (priority < BASEPRI_DEFAULT)
priority = BASEPRI_DEFAULT;
+ /* Do not promote past promotion ceiling */
+ priority = MIN(priority, MAXPRI_PROMOTE);
+
if (mutex->lck_mtx_waiters == 0 || priority > mutex->lck_mtx_pri)
mutex->lck_mtx_pri = priority;
mutex->lck_mtx_waiters++;
if ( (holder = (thread_t)mutex->lck_mtx_owner) &&
holder->sched_pri < mutex->lck_mtx_pri ) {
-
s = splsched();
thread_lock(holder);
+ /* holder priority may have been bumped by another thread
+ * before thread_lock was taken
+ */
if (holder->sched_pri < mutex->lck_mtx_pri) {
KERNEL_DEBUG_CONSTANT(
MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
- holder->sched_pri, priority, thread_tid(holder), mutex, 0);
-
+ holder->sched_pri, priority, thread_tid(holder), trace_lck, 0);
+ /* Assert that we're not altering the priority of a
+ * thread above the MAXPRI_PROMOTE band
+ */
+ assert(holder->sched_pri < MAXPRI_PROMOTE);
set_sched_pri(holder, priority);
if (mutex->lck_mtx_promoted == 0) {
thread_unlock(holder);
splx(s);
}
- assert_wait((event_t)(((unsigned int*)mutex)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
+ thread_set_pending_block_hint(self, kThreadWaitKernelMutex);
+ assert_wait(LCK_MTX_EVENT(mutex), THREAD_UNINT);
lck_mtx_ilk_unlock(mutex);
thread_block(THREAD_CONTINUE_NULL);
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
- mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
+ trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
#if CONFIG_DTRACE
/*
#endif
}
-
-#if MACH_KDB
-
-void
-db_show_one_lock(
- lock_t *lock)
-{
- db_printf("Read_count = 0x%x, %swant_upgrade, %swant_write, ",
- lock->lck_rw_shared_count,
- lock->lck_rw_want_upgrade ? "" : "!",
- lock->lck_rw_want_write ? "" : "!");
- db_printf("%swaiting, %scan_sleep\n",
- (lock->lck_r_waiting || lock->lck_w_waiting) ? "" : "!",
- lock->lck_rw_can_sleep ? "" : "!");
- db_printf("Interlock:\n");
- db_show_one_simple_lock((db_expr_t) ((vm_offset_t)simple_lock_addr(lock->lck_rw_interlock)),
- TRUE, (db_expr_t)0, (char *)0);
-}
-
/*
- * Routines to print out simple_locks and mutexes in a nicely-formatted
- * fashion.
+ * Routine: kdp_lck_mtx_lock_spin_is_acquired
+ * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
+ * Returns: TRUE if lock is acquired.
*/
-
-const char *simple_lock_labels = "ENTRY ILK THREAD DURATION CALLER";
-
-void
-db_show_one_simple_lock (
- db_expr_t addr,
- boolean_t have_addr,
- __unused db_expr_t count,
- __unused char * modif)
+boolean_t
+kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
{
- simple_lock_t saddr = (simple_lock_t) ((vm_offset_t) addr);
+ if (not_in_kdp) {
+ panic("panic: kdp_lck_mtx_lock_spin_is_acquired called outside of kernel debugger");
+ }
- if (saddr == (simple_lock_t)0 || !have_addr) {
- db_error ("No simple_lock\n");
+ if (lck->lck_mtx_ilocked || lck->lck_mtx_mlocked) {
+ return TRUE;
}
-#if USLOCK_DEBUG
- else if (saddr->lock_type != USLOCK_TAG)
- db_error ("Not a simple_lock\n");
-#endif /* USLOCK_DEBUG */
- db_printf ("%s\n", simple_lock_labels);
- db_print_simple_lock (saddr);
+ return FALSE;
}
void
-db_print_simple_lock (
- simple_lock_t addr)
+kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
{
-
- db_printf ("%08x %3d", addr, *hw_lock_addr(addr->interlock));
-#if USLOCK_DEBUG
- db_printf (" %08x", addr->debug.lock_thread);
- db_printf (" %08x ", addr->debug.duration[1]);
- db_printsym ((int)addr->debug.lock_pc, DB_STGY_ANY);
-#endif /* USLOCK_DEBUG */
- db_printf ("\n");
+ lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
+ waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
+ thread_t holder = (thread_t)mutex->lck_mtx_owner;
+ waitinfo->owner = thread_tid(holder);
}
-#endif /* MACH_KDB */
+void
+kdp_rwlck_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
+{
+ lck_rw_t *rwlck = NULL;
+ switch(waitinfo->wait_type) {
+ case kThreadWaitKernelRWLockRead:
+ rwlck = READ_EVENT_TO_RWLOCK(event);
+ break;
+ case kThreadWaitKernelRWLockWrite:
+ case kThreadWaitKernelRWLockUpgrade:
+ rwlck = WRITE_EVENT_TO_RWLOCK(event);
+ break;
+ default:
+ panic("%s was called with an invalid blocking type", __FUNCTION__);
+ break;
+ }
+ waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
+ waitinfo->owner = 0;
+}