/*
- * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2018 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* Locking primitives implementation
*/
-#define ATOMIC_PRIVATE 1
#define LOCK_PRIVATE 1
#include <mach_ldebug.h>
-#include <kern/kalloc.h>
+#include <kern/zalloc.h>
#include <kern/lock_stat.h>
#include <kern/locks.h>
#include <kern/misc_protos.h>
#include <kern/thread.h>
#include <kern/processor.h>
#include <kern/sched_prim.h>
-#include <kern/xpr.h>
#include <kern/debug.h>
#include <kern/kcdata.h>
#include <string.h>
+#include <arm/cpu_internal.h>
+#include <os/hash.h>
+#include <arm/cpu_data.h>
#include <arm/cpu_data_internal.h>
#include <arm/proc_reg.h>
// These are undesirable when in a panic or a debugger is runnning.
#define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
-unsigned int LcksOpts = 0;
-
#define ADAPTIVE_SPIN_ENABLE 0x1
-#if __SMP__
int lck_mtx_adaptive_spin_mode = ADAPTIVE_SPIN_ENABLE;
-#else /* __SMP__ */
-int lck_mtx_adaptive_spin_mode = 0;
-#endif /* __SMP__ */
#define SPINWAIT_OWNER_CHECK_COUNT 4
typedef enum {
SPINWAIT_ACQUIRED, /* Got the lock. */
SPINWAIT_INTERLOCK, /* Got the interlock, no owner, but caller must finish acquiring the lock. */
- SPINWAIT_DID_SPIN, /* Got the interlock, spun, but failed to get the lock. */
+ SPINWAIT_DID_SPIN_HIGH_THR, /* Got the interlock, spun, but failed to get the lock. */
+ SPINWAIT_DID_SPIN_OWNER_NOT_CORE, /* Got the interlock, spun, but failed to get the lock. */
+ SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION, /* Got the interlock, spun, but failed to get the lock. */
+ SPINWAIT_DID_SPIN_SLIDING_THR,/* Got the interlock, spun, but failed to get the lock. */
SPINWAIT_DID_NOT_SPIN, /* Got the interlock, did not spin. */
} spinwait_result_t;
-#if CONFIG_DTRACE && __SMP__
+#if CONFIG_DTRACE
extern uint64_t dtrace_spin_threshold;
#endif
/* Forwards */
-
-#if USLOCK_DEBUG
-/*
- * Perform simple lock checks.
- */
-int uslock_check = 1;
-int max_lock_loops = 100000000;
-decl_simple_lock_data(extern, printf_lock)
-decl_simple_lock_data(extern, panic_lock)
-#endif /* USLOCK_DEBUG */
-
extern unsigned int not_in_kdp;
/*
* Portable lock package implementation of usimple_locks.
*/
-#if USLOCK_DEBUG
-#define USLDBG(stmt) stmt
-void usld_lock_init(usimple_lock_t, unsigned short);
-void usld_lock_pre(usimple_lock_t, pc_t);
-void usld_lock_post(usimple_lock_t, pc_t);
-void usld_unlock(usimple_lock_t, pc_t);
-void usld_lock_try_pre(usimple_lock_t, pc_t);
-void usld_lock_try_post(usimple_lock_t, pc_t);
-int usld_lock_common_checks(usimple_lock_t, const char *);
-#else /* USLOCK_DEBUG */
-#define USLDBG(stmt)
-#endif /* USLOCK_DEBUG */
-
/*
* Owner thread pointer when lock held in spin mode
*/
#define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT, LCK_GRP_NULL)
#define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
-#define memory_barrier() __c11_atomic_thread_fence(memory_order_acq_rel_smp)
-#define load_memory_barrier() __c11_atomic_thread_fence(memory_order_acquire_smp)
-#define store_memory_barrier() __c11_atomic_thread_fence(memory_order_release_smp)
+#define load_memory_barrier() os_atomic_thread_fence(acquire)
// Enforce program order of loads and stores.
-#define ordered_load(target, type) \
- __c11_atomic_load((_Atomic type *)(target), memory_order_relaxed)
-#define ordered_store(target, type, value) \
- __c11_atomic_store((_Atomic type *)(target), value, memory_order_relaxed)
-
-#define ordered_load_mtx(lock) ordered_load(&(lock)->lck_mtx_data, uintptr_t)
-#define ordered_store_mtx(lock, value) ordered_store(&(lock)->lck_mtx_data, uintptr_t, (value))
-#define ordered_load_rw(lock) ordered_load(&(lock)->lck_rw_data, uint32_t)
-#define ordered_store_rw(lock, value) ordered_store(&(lock)->lck_rw_data, uint32_t, (value))
-#define ordered_load_rw_owner(lock) ordered_load(&(lock)->lck_rw_owner, thread_t)
-#define ordered_store_rw_owner(lock, value) ordered_store(&(lock)->lck_rw_owner, thread_t, (value))
-#define ordered_load_hw(lock) ordered_load(&(lock)->lock_data, uintptr_t)
-#define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, uintptr_t, (value))
-#define ordered_load_bit(lock) ordered_load((lock), uint32_t)
-#define ordered_store_bit(lock, value) ordered_store((lock), uint32_t, (value))
+#define ordered_load(target) \
+ os_atomic_load(target, compiler_acq_rel)
+#define ordered_store(target, value) \
+ os_atomic_store(target, value, compiler_acq_rel)
+
+#define ordered_load_mtx(lock) ordered_load(&(lock)->lck_mtx_data)
+#define ordered_store_mtx(lock, value) ordered_store(&(lock)->lck_mtx_data, (value))
+#define ordered_load_rw(lock) ordered_load(&(lock)->lck_rw_data)
+#define ordered_store_rw(lock, value) ordered_store(&(lock)->lck_rw_data, (value))
+#define ordered_load_rw_owner(lock) ordered_load(&(lock)->lck_rw_owner)
+#define ordered_store_rw_owner(lock, value) ordered_store(&(lock)->lck_rw_owner, (value))
+#define ordered_load_hw(lock) ordered_load(&(lock)->lock_data)
+#define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, (value))
+#define ordered_load_bit(lock) ordered_load((lock))
+#define ordered_store_bit(lock, value) ordered_store((lock), (value))
// Prevent the compiler from reordering memory operations around this
#define enable_interrupts() __asm__ volatile ("cpsie if" ::: "memory");
#endif
+ZONE_VIEW_DEFINE(ZV_LCK_SPIN, "lck_spin",
+ KHEAP_ID_DEFAULT, sizeof(lck_spin_t));
+
+ZONE_VIEW_DEFINE(ZV_LCK_MTX, "lck_mtx",
+ KHEAP_ID_DEFAULT, sizeof(lck_mtx_t));
+
+ZONE_VIEW_DEFINE(ZV_LCK_MTX_EXT, "lck_mtx_ext",
+ KHEAP_ID_DEFAULT, sizeof(lck_mtx_ext_t));
+
+ZONE_VIEW_DEFINE(ZV_LCK_RW, "lck_rw",
+ KHEAP_ID_DEFAULT, sizeof(lck_rw_t));
+
/*
* Forward declarations
*/
* atomic_exchange_complete() - conclude an exchange
* atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
*/
+__unused static uint32_t
+load_exclusive32(uint32_t *target, enum memory_order ord)
+{
+ uint32_t value;
+
+#if __arm__
+ if (_os_atomic_mo_has_release(ord)) {
+ // Pre-load release barrier
+ atomic_thread_fence(memory_order_release);
+ }
+ value = __builtin_arm_ldrex(target);
+#else
+ if (_os_atomic_mo_has_acquire(ord)) {
+ value = __builtin_arm_ldaex(target); // ldaxr
+ } else {
+ value = __builtin_arm_ldrex(target); // ldxr
+ }
+#endif // __arm__
+ return value;
+}
+
+__unused static boolean_t
+store_exclusive32(uint32_t *target, uint32_t value, enum memory_order ord)
+{
+ boolean_t err;
+
+#if __arm__
+ err = __builtin_arm_strex(value, target);
+ if (_os_atomic_mo_has_acquire(ord)) {
+ // Post-store acquire barrier
+ atomic_thread_fence(memory_order_acquire);
+ }
+#else
+ if (_os_atomic_mo_has_release(ord)) {
+ err = __builtin_arm_stlex(value, target); // stlxr
+ } else {
+ err = __builtin_arm_strex(value, target); // stxr
+ }
+#endif // __arm__
+ return !err;
+}
+
static uint32_t
atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
{
uint32_t val;
+#if __ARM_ATOMICS_8_1
+ ord = memory_order_relaxed;
+#endif
val = load_exclusive32(target, ord);
*previous = val;
return val;
static boolean_t
atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
{
+#if __ARM_ATOMICS_8_1
+ return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
+#else
(void)previous; // Previous not needed, monitor is held
return store_exclusive32(target, newval, ord);
+#endif
}
static void
atomic_exchange_abort(void)
{
- clear_exclusive();
+ os_atomic_clear_exclusive();
}
static boolean_t
}
}
+inline boolean_t
+hw_atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
+{
+ return atomic_test_and_set32(target, test_mask, set_mask, ord, wait);
+}
+
+/*
+ * To help _disable_preemption() inline everywhere with LTO,
+ * we keep these nice non inlineable functions as the panic()
+ * codegen setup is quite large and for weird reasons causes a frame.
+ */
+__abortlike
+static void
+_disable_preemption_overflow(void)
+{
+ panic("Preemption count overflow");
+}
+
void
_disable_preemption(void)
{
- thread_t thread = current_thread();
- unsigned int count;
+ thread_t thread = current_thread();
+ unsigned int count = thread->machine.preemption_count;
- count = thread->machine.preemption_count + 1;
- ordered_store(&thread->machine.preemption_count, unsigned int, count);
+ if (__improbable(++count == 0)) {
+ _disable_preemption_overflow();
+ }
+
+ os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
}
-void
-_enable_preemption(void)
+/*
+ * This function checks whether an AST_URGENT has been pended.
+ *
+ * It is called once the preemption has been reenabled, which means the thread
+ * may have been preempted right before this was called, and when this function
+ * actually performs the check, we've changed CPU.
+ *
+ * This race is however benign: the point of AST_URGENT is to trigger a context
+ * switch, so if one happened, there's nothing left to check for, and AST_URGENT
+ * was cleared in the process.
+ *
+ * It follows that this check cannot have false negatives, which allows us
+ * to avoid fiddling with interrupt state for the vast majority of cases
+ * when the check will actually be negative.
+ */
+static NOINLINE void
+kernel_preempt_check(thread_t thread)
{
- thread_t thread = current_thread();
- long state;
- unsigned int count;
+ cpu_data_t *cpu_data_ptr;
+ long state;
+
#if __arm__
#define INTERRUPT_MASK PSR_IRQF
#else // __arm__
#define INTERRUPT_MASK DAIF_IRQF
#endif // __arm__
- count = thread->machine.preemption_count;
- if (count == 0) {
- panic("Preemption count negative"); // Count will go negative when released
- }
- count--;
- if (count > 0) {
- goto update_count; // Preemption is still disabled, just update
- }
- state = get_interrupts(); // Get interrupt state
- if (state & INTERRUPT_MASK) {
- goto update_count; // Interrupts are already masked, can't take AST here
+ /*
+ * This check is racy and could load from another CPU's pending_ast mask,
+ * but as described above, this can't have false negatives.
+ */
+ cpu_data_ptr = os_atomic_load(&thread->machine.CpuDatap, compiler_acq_rel);
+ if (__probable((cpu_data_ptr->cpu_pending_ast & AST_URGENT) == 0)) {
+ return;
}
- disable_interrupts_noread(); // Disable interrupts
- ordered_store(&thread->machine.preemption_count, unsigned int, count);
- if (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
+
+ /* If interrupts are masked, we can't take an AST here */
+ state = get_interrupts();
+ if ((state & INTERRUPT_MASK) == 0) {
+ disable_interrupts_noread(); // Disable interrupts
+
+ /*
+ * Reload cpu_data_ptr: a context switch would cause it to change.
+ * Now that interrupts are disabled, this will debounce false positives.
+ */
+ cpu_data_ptr = os_atomic_load(&thread->machine.CpuDatap, compiler_acq_rel);
+ if (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
#if __arm__
#if __ARM_USER_PROTECT__
- uintptr_t up = arm_user_protect_begin(thread);
+ uintptr_t up = arm_user_protect_begin(thread);
#endif // __ARM_USER_PROTECT__
- enable_fiq();
+ enable_fiq();
#endif // __arm__
- ast_taken_kernel(); // Handle urgent AST
+ ast_taken_kernel(); // Handle urgent AST
#if __arm__
#if __ARM_USER_PROTECT__
- arm_user_protect_end(thread, up, TRUE);
+ arm_user_protect_end(thread, up, TRUE);
#endif // __ARM_USER_PROTECT__
- enable_interrupts();
- return; // Return early on arm only due to FIQ enabling
+ enable_interrupts();
+ return; // Return early on arm only due to FIQ enabling
#endif // __arm__
- }
- restore_interrupts(state); // Enable interrupts
- return;
-
-update_count:
- ordered_store(&thread->machine.preemption_count, unsigned int, count);
- return;
-}
-
-int
-get_preemption_level(void)
-{
- return current_thread()->machine.preemption_count;
-}
-
-#if __SMP__
-static unsigned int
-hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp));
-#endif
-
-static inline unsigned int
-hw_lock_bit_to_internal(hw_lock_bit_t *lock, unsigned int bit, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp))
-{
- unsigned int success = 0;
- uint32_t mask = (1 << bit);
-#if !__SMP__
- uint32_t state;
-#endif
-
-#if __SMP__
- if (__improbable(!atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE))) {
- success = hw_lock_bit_to_contended(lock, mask, timeout LCK_GRP_ARG(grp));
- } else {
- success = 1;
- }
-#else // __SMP__
- (void)timeout;
- state = ordered_load_bit(lock);
- if (!(mask & state)) {
- ordered_store_bit(lock, state | mask);
- success = 1;
- }
-#endif // __SMP__
-
- if (success) {
- lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
- }
-
- return success;
-}
-
-unsigned
-int
-(hw_lock_bit_to)(hw_lock_bit_t * lock, unsigned int bit, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp))
-{
- _disable_preemption();
- return hw_lock_bit_to_internal(lock, bit, timeout LCK_GRP_ARG(grp));
-}
-
-#if __SMP__
-static unsigned int NOINLINE
-hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp))
-{
- uint64_t end = 0;
- int i;
-#if CONFIG_DTRACE || LOCK_STATS
- uint64_t begin = 0;
- boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
-#endif /* CONFIG_DTRACE || LOCK_STATS */
-
-#if LOCK_STATS || CONFIG_DTRACE
- if (__improbable(stat_enabled)) {
- begin = mach_absolute_time();
- }
-#endif /* LOCK_STATS || CONFIG_DTRACE */
- for (;;) {
- for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
- // Always load-exclusive before wfe
- // This grabs the monitor and wakes up on a release event
- if (atomic_test_and_set32(lock, mask, mask, memory_order_acquire, TRUE)) {
- goto end;
- }
- }
- if (end == 0) {
- end = ml_get_timebase() + timeout;
- } else if (ml_get_timebase() >= end) {
- break;
}
+ restore_interrupts(state); // Enable interrupts
}
- return 0;
-end:
-#if CONFIG_DTRACE || LOCK_STATS
- if (__improbable(stat_enabled)) {
- lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp), mach_absolute_time() - begin);
- }
- lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
-#endif /* CONFIG_DTRACE || LCK_GRP_STAT */
-
- return 1;
-}
-#endif // __SMP__
-
-void
-(hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
-{
- if (hw_lock_bit_to(lock, bit, LOCK_PANIC_TIMEOUT, LCK_GRP_PROBEARG(grp))) {
- return;
- }
-#if __SMP__
- panic("hw_lock_bit(): timed out (%p)", lock);
-#else
- panic("hw_lock_bit(): interlock held (%p)", lock);
-#endif
-}
-
-void
-(hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
-{
- if (__improbable(get_preemption_level() == 0)) {
- panic("Attempt to take no-preempt bitlock %p in preemptible context", lock);
- }
- if (hw_lock_bit_to_internal(lock, bit, LOCK_PANIC_TIMEOUT LCK_GRP_ARG(grp))) {
- return;
- }
-#if __SMP__
- panic("hw_lock_bit_nopreempt(): timed out (%p)", lock);
-#else
- panic("hw_lock_bit_nopreempt(): interlock held (%p)", lock);
-#endif
-}
-
-unsigned
-int
-(hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
-{
- uint32_t mask = (1 << bit);
-#if !__SMP__
- uint32_t state;
-#endif
- boolean_t success = FALSE;
-
- _disable_preemption();
-#if __SMP__
- // TODO: consider weak (non-looping) atomic test-and-set
- success = atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE);
-#else
- state = ordered_load_bit(lock);
- if (!(mask & state)) {
- ordered_store_bit(lock, state | mask);
- success = TRUE;
- }
-#endif // __SMP__
- if (!success) {
- _enable_preemption();
- }
-
- if (success) {
- lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
- }
-
- return success;
-}
-
-static inline void
-hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
-{
- uint32_t mask = (1 << bit);
-#if !__SMP__
- uint32_t state;
-#endif
-
-#if __SMP__
- __c11_atomic_fetch_and((_Atomic uint32_t *)lock, ~mask, memory_order_release);
- set_event();
-#else // __SMP__
- state = ordered_load_bit(lock);
- ordered_store_bit(lock, state & ~mask);
-#endif // __SMP__
-#if CONFIG_DTRACE
- LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
-#endif
}
/*
- * Routine: hw_unlock_bit
- *
- * Release spin-lock. The second parameter is the bit number to test and set.
- * Decrement the preemption level.
+ * To help _enable_preemption() inline everywhere with LTO,
+ * we keep these nice non inlineable functions as the panic()
+ * codegen setup is quite large and for weird reasons causes a frame.
*/
-void
-hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
+__abortlike
+static void
+_enable_preemption_underflow(void)
{
- hw_unlock_bit_internal(lock, bit);
- _enable_preemption();
+ panic("Preemption count underflow");
}
void
-hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
+_enable_preemption(void)
{
- if (__improbable(get_preemption_level() == 0)) {
- panic("Attempt to release no-preempt bitlock %p in preemptible context", lock);
- }
- hw_unlock_bit_internal(lock, bit);
-}
+ thread_t thread = current_thread();
+ unsigned int count = thread->machine.preemption_count;
-#if __SMP__
-static inline boolean_t
-interlock_try_disable_interrupts(
- lck_mtx_t *mutex,
- boolean_t *istate)
-{
- *istate = ml_set_interrupts_enabled(FALSE);
+ if (__improbable(count == 0)) {
+ _enable_preemption_underflow();
+ }
+ count -= 1;
- if (interlock_try(mutex)) {
- return 1;
- } else {
- ml_set_interrupts_enabled(*istate);
- return 0;
+ os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
+ if (count == 0) {
+ kernel_preempt_check(thread);
}
+
+ os_compiler_barrier();
}
-static inline void
-interlock_unlock_enable_interrupts(
- lck_mtx_t *mutex,
- boolean_t istate)
+int
+get_preemption_level(void)
{
- interlock_unlock(mutex);
- ml_set_interrupts_enabled(istate);
+ return current_thread()->machine.preemption_count;
}
-#endif /* __SMP__ */
/*
* Routine: lck_spin_alloc_init
lck_grp_t * grp,
lck_attr_t * attr)
{
- lck_spin_t *lck;
-
- if ((lck = (lck_spin_t *) kalloc(sizeof(lck_spin_t))) != 0) {
- lck_spin_init(lck, grp, attr);
- }
+ lck_spin_t *lck;
+ lck = zalloc(ZV_LCK_SPIN);
+ lck_spin_init(lck, grp, attr);
return lck;
}
lck_grp_t * grp)
{
lck_spin_destroy(lck, grp);
- kfree(lck, sizeof(lck_spin_t));
+ zfree(ZV_LCK_SPIN, lck);
}
/*
lck_grp_t * grp,
__unused lck_attr_t * attr)
{
- hw_lock_init(&lck->hwlock);
lck->type = LCK_SPIN_TYPE;
- lck_grp_reference(grp);
- lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
- store_memory_barrier();
+ hw_lock_init(&lck->hwlock);
+ if (grp) {
+ lck_grp_reference(grp);
+ lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
+ }
}
/*
* arm_usimple_lock is a lck_spin_t without a group or attributes
*/
-void inline
+MARK_AS_HIBERNATE_TEXT void inline
arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
{
lck->type = LCK_SPIN_TYPE;
hw_lock_init(&lck->hwlock);
- store_memory_barrier();
}
return;
}
lck->lck_spin_data = LCK_SPIN_TAG_DESTROYED;
- lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
- lck_grp_deallocate(grp);
+ if (grp) {
+ lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
+ lck_grp_deallocate(grp);
+ }
}
/*
usimple_lock_t l,
unsigned short tag)
{
-#ifndef MACHINE_SIMPLE_LOCK
- USLDBG(usld_lock_init(l, tag));
- hw_lock_init(&l->lck_spin_data);
-#else
simple_lock_init((simple_lock_t) l, tag);
-#endif
}
usimple_lock_t l
LCK_GRP_ARG(lck_grp_t *grp))
{
-#ifndef MACHINE_SIMPLE_LOCK
- pc_t pc;
-
- OBTAIN_PC(pc, l);
- USLDBG(usld_lock_pre(l, pc));
-
- if (!hw_lock_to(&l->lck_spin_data, LockTimeOut, LCK_GRP_ARG(grp))) { /* Try to get the lock
- * with a timeout */
- panic("simple lock deadlock detection - l=%p, cpu=%d, ret=%p", &l, cpu_number(), pc);
- }
-
- USLDBG(usld_lock_post(l, pc));
-#else
simple_lock((simple_lock_t) l, LCK_GRP_PROBEARG(grp));
-#endif
}
(usimple_unlock)(
usimple_lock_t l)
{
-#ifndef MACHINE_SIMPLE_LOCK
- pc_t pc;
-
- OBTAIN_PC(pc, l);
- USLDBG(usld_unlock(l, pc));
- sync();
- hw_lock_unlock(&l->lck_spin_data);
-#else
simple_unlock((simple_lock_t)l);
-#endif
}
usimple_lock_t l
LCK_GRP_ARG(lck_grp_t *grp))
{
-#ifndef MACHINE_SIMPLE_LOCK
- pc_t pc;
- unsigned int success;
-
- OBTAIN_PC(pc, l);
- USLDBG(usld_lock_try_pre(l, pc));
- if ((success = hw_lock_try(&l->lck_spin_data LCK_GRP_ARG(grp)))) {
- USLDBG(usld_lock_try_post(l, pc));
- }
- return success;
-#else
return simple_lock_try((simple_lock_t) l, grp);
-#endif
-}
-
-#if USLOCK_DEBUG
-/*
- * States of a usimple_lock. The default when initializing
- * a usimple_lock is setting it up for debug checking.
- */
-#define USLOCK_CHECKED 0x0001 /* lock is being checked */
-#define USLOCK_TAKEN 0x0002 /* lock has been taken */
-#define USLOCK_INIT 0xBAA0 /* lock has been initialized */
-#define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
-#define USLOCK_CHECKING(l) (uslock_check && \
- ((l)->debug.state & USLOCK_CHECKED))
-
-/*
- * Trace activities of a particularly interesting lock.
- */
-void usl_trace(usimple_lock_t, int, pc_t, const char *);
-
-
-/*
- * Initialize the debugging information contained
- * in a usimple_lock.
- */
-void
-usld_lock_init(
- usimple_lock_t l,
- __unused unsigned short tag)
-{
- if (l == USIMPLE_LOCK_NULL) {
- panic("lock initialization: null lock pointer");
- }
- l->lock_type = USLOCK_TAG;
- l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
- l->debug.lock_cpu = l->debug.unlock_cpu = 0;
- l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
- l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
- l->debug.duration[0] = l->debug.duration[1] = 0;
- l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
- l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
- l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
-}
-
-
-/*
- * These checks apply to all usimple_locks, not just
- * those with USLOCK_CHECKED turned on.
- */
-int
-usld_lock_common_checks(
- usimple_lock_t l,
- const char *caller)
-{
- if (l == USIMPLE_LOCK_NULL) {
- panic("%s: null lock pointer", caller);
- }
- if (l->lock_type != USLOCK_TAG) {
- panic("%s: 0x%x is not a usimple lock", caller, (integer_t) l);
- }
- if (!(l->debug.state & USLOCK_INIT)) {
- panic("%s: 0x%x is not an initialized lock",
- caller, (integer_t) l);
- }
- return USLOCK_CHECKING(l);
-}
-
-
-/*
- * Debug checks on a usimple_lock just before attempting
- * to acquire it.
- */
-/* ARGSUSED */
-void
-usld_lock_pre(
- usimple_lock_t l,
- pc_t pc)
-{
- const char *caller = "usimple_lock";
-
-
- if (!usld_lock_common_checks(l, caller)) {
- return;
- }
-
- /*
- * Note that we have a weird case where we are getting a lock when we are]
- * in the process of putting the system to sleep. We are running with no
- * current threads, therefore we can't tell if we are trying to retake a lock
- * we have or someone on the other processor has it. Therefore we just
- * ignore this test if the locking thread is 0.
- */
-
- if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
- l->debug.lock_thread == (void *) current_thread()) {
- printf("%s: lock 0x%x already locked (at %p) by",
- caller, (integer_t) l, l->debug.lock_pc);
- printf(" current thread %p (new attempt at pc %p)\n",
- l->debug.lock_thread, pc);
- panic("%s", caller);
- }
- mp_disable_preemption();
- usl_trace(l, cpu_number(), pc, caller);
- mp_enable_preemption();
-}
-
-
-/*
- * Debug checks on a usimple_lock just after acquiring it.
- *
- * Pre-emption has been disabled at this point,
- * so we are safe in using cpu_number.
- */
-void
-usld_lock_post(
- usimple_lock_t l,
- pc_t pc)
-{
- int mycpu;
- const char *caller = "successful usimple_lock";
-
-
- if (!usld_lock_common_checks(l, caller)) {
- return;
- }
-
- if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
- panic("%s: lock 0x%x became uninitialized",
- caller, (integer_t) l);
- }
- if ((l->debug.state & USLOCK_TAKEN)) {
- panic("%s: lock 0x%x became TAKEN by someone else",
- caller, (integer_t) l);
- }
-
- mycpu = cpu_number();
- l->debug.lock_thread = (void *) current_thread();
- l->debug.state |= USLOCK_TAKEN;
- l->debug.lock_pc = pc;
- l->debug.lock_cpu = mycpu;
-
- usl_trace(l, mycpu, pc, caller);
-}
-
-
-/*
- * Debug checks on a usimple_lock just before
- * releasing it. Note that the caller has not
- * yet released the hardware lock.
- *
- * Preemption is still disabled, so there's
- * no problem using cpu_number.
- */
-void
-usld_unlock(
- usimple_lock_t l,
- pc_t pc)
-{
- int mycpu;
- const char *caller = "usimple_unlock";
-
-
- if (!usld_lock_common_checks(l, caller)) {
- return;
- }
-
- mycpu = cpu_number();
-
- if (!(l->debug.state & USLOCK_TAKEN)) {
- panic("%s: lock 0x%x hasn't been taken",
- caller, (integer_t) l);
- }
- if (l->debug.lock_thread != (void *) current_thread()) {
- panic("%s: unlocking lock 0x%x, owned by thread %p",
- caller, (integer_t) l, l->debug.lock_thread);
- }
- if (l->debug.lock_cpu != mycpu) {
- printf("%s: unlocking lock 0x%x on cpu 0x%x",
- caller, (integer_t) l, mycpu);
- printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
- panic("%s", caller);
- }
- usl_trace(l, mycpu, pc, caller);
-
- l->debug.unlock_thread = l->debug.lock_thread;
- l->debug.lock_thread = INVALID_PC;
- l->debug.state &= ~USLOCK_TAKEN;
- l->debug.unlock_pc = pc;
- l->debug.unlock_cpu = mycpu;
-}
-
-
-/*
- * Debug checks on a usimple_lock just before
- * attempting to acquire it.
- *
- * Preemption isn't guaranteed to be disabled.
- */
-void
-usld_lock_try_pre(
- usimple_lock_t l,
- pc_t pc)
-{
- const char *caller = "usimple_lock_try";
-
- if (!usld_lock_common_checks(l, caller)) {
- return;
- }
- mp_disable_preemption();
- usl_trace(l, cpu_number(), pc, caller);
- mp_enable_preemption();
}
-
-/*
- * Debug checks on a usimple_lock just after
- * successfully attempting to acquire it.
- *
- * Preemption has been disabled by the
- * lock acquisition attempt, so it's safe
- * to use cpu_number.
- */
-void
-usld_lock_try_post(
- usimple_lock_t l,
- pc_t pc)
-{
- int mycpu;
- const char *caller = "successful usimple_lock_try";
-
- if (!usld_lock_common_checks(l, caller)) {
- return;
- }
-
- if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
- panic("%s: lock 0x%x became uninitialized",
- caller, (integer_t) l);
- }
- if ((l->debug.state & USLOCK_TAKEN)) {
- panic("%s: lock 0x%x became TAKEN by someone else",
- caller, (integer_t) l);
- }
-
- mycpu = cpu_number();
- l->debug.lock_thread = (void *) current_thread();
- l->debug.state |= USLOCK_TAKEN;
- l->debug.lock_pc = pc;
- l->debug.lock_cpu = mycpu;
-
- usl_trace(l, mycpu, pc, caller);
-}
-
-
-/*
- * For very special cases, set traced_lock to point to a
- * specific lock of interest. The result is a series of
- * XPRs showing lock operations on that lock. The lock_seq
- * value is used to show the order of those operations.
- */
-usimple_lock_t traced_lock;
-unsigned int lock_seq;
-
-void
-usl_trace(
- usimple_lock_t l,
- int mycpu,
- pc_t pc,
- const char *op_name)
-{
- if (traced_lock == l) {
- XPR(XPR_SLOCK,
- "seq %d, cpu %d, %s @ %x\n",
- (integer_t) lock_seq, (integer_t) mycpu,
- (integer_t) op_name, (integer_t) pc, 0);
- lock_seq++;
- }
-}
-
-
-#endif /* USLOCK_DEBUG */
-
/*
* The C portion of the shared/exclusive locks package.
*/
* compute the deadline to spin against when
* waiting for a change of state on a lck_rw_t
*/
-#if __SMP__
static inline uint64_t
lck_rw_deadline_for_spin(lck_rw_t *lck)
{
return mach_absolute_time() + (100000LL * 1000000000LL);
}
}
-#endif // __SMP__
static boolean_t
lck_rw_drain_status(lck_rw_t *lock, uint32_t status_mask, boolean_t wait __unused)
{
-#if __SMP__
uint64_t deadline = 0;
uint32_t data;
if (wait) {
wait_for_event();
} else {
- clear_exclusive();
+ os_atomic_clear_exclusive();
}
if (!wait || (mach_absolute_time() >= deadline)) {
return FALSE;
}
}
- clear_exclusive();
+ os_atomic_clear_exclusive();
return TRUE;
-#else
- uint32_t data;
-
- data = ordered_load_rw(lock);
- if ((data & status_mask) == 0) {
- return TRUE;
- } else {
- return FALSE;
- }
-#endif // __SMP__
}
/*
static inline void
lck_rw_interlock_spin(lck_rw_t *lock)
{
-#if __SMP__
uint32_t data;
for (;;) {
if (data & LCK_RW_INTERLOCK) {
wait_for_event();
} else {
- clear_exclusive();
+ os_atomic_clear_exclusive();
return;
}
}
-#else
- panic("lck_rw_interlock_spin(): Interlock locked %p %x", lock, lock->lck_rw_data);
-#endif
}
/*
uint32_t data, prev;
boolean_t do_exch;
-#if __SMP__
if (wait) {
deadline = lck_rw_deadline_for_spin(lock);
}
-#else
- wait = FALSE; // Don't spin on UP systems
-#endif
for (;;) {
data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
lck_grp_t *grp,
lck_attr_t *attr)
{
- lck_rw_t *lck;
-
- if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) {
- lck_rw_init(lck, grp, attr);
- }
+ lck_rw_t *lck;
+ lck = zalloc_flags(ZV_LCK_RW, Z_WAITOK | Z_ZERO);
+ lck_rw_init(lck, grp, attr);
return lck;
}
lck_grp_t *grp)
{
lck_rw_destroy(lck, grp);
- kfree(lck, sizeof(lck_rw_t));
+ zfree(ZV_LCK_RW, lck);
}
/*
}
}
+#define LCK_RW_LOCK_EXCLUSIVE_TAS(lck) (atomic_test_and_set32(&(lck)->lck_rw_data, \
+ (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK), \
+ LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE))
+
+/*
+ * Routine: lck_rw_lock_exclusive_check_contended
+ */
+bool
+lck_rw_lock_exclusive_check_contended(lck_rw_t *lock)
+{
+ thread_t thread = current_thread();
+ bool contended = false;
+
+ if (lock->lck_rw_can_sleep) {
+ thread->rwlock_count++;
+ } else if (get_preemption_level() == 0) {
+ panic("Taking non-sleepable RW lock with preemption enabled");
+ }
+ if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
+#endif /* CONFIG_DTRACE */
+ } else {
+ contended = true;
+ lck_rw_lock_exclusive_gen(lock);
+ }
+#if MACH_ASSERT
+ thread_t owner = ordered_load_rw_owner(lock);
+ assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
+#endif
+ ordered_store_rw_owner(lock, thread);
+ return contended;
+}
+
/*
* Routine: lck_rw_lock_exclusive
*/
{
thread_t thread = current_thread();
- thread->rwlock_count++;
- if (atomic_test_and_set32(&lock->lck_rw_data,
- (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
- LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
+ if (lock->lck_rw_can_sleep) {
+ thread->rwlock_count++;
+ } else if (get_preemption_level() == 0) {
+ panic("Taking non-sleepable RW lock with preemption enabled");
+ }
+ if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
#endif /* CONFIG_DTRACE */
{
uint32_t data, prev;
- current_thread()->rwlock_count++;
+ if (lock->lck_rw_can_sleep) {
+ current_thread()->rwlock_count++;
+ } else if (get_preemption_level() == 0) {
+ panic("Taking non-sleepable RW lock with preemption enabled");
+ }
for (;;) {
data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
/*
* Routine: lck_rw_lock_shared_to_exclusive
+ *
+ * False returned upon failure, in this case the shared lock is dropped.
*/
boolean_t
lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
uint32_t rwlock_count;
/* Check if dropping the lock means that we need to unpromote */
- rwlock_count = thread->rwlock_count--;
+ if (lck->lck_rw_can_sleep) {
+ rwlock_count = thread->rwlock_count--;
+ } else {
+ rwlock_count = UINT32_MAX;
+ }
#if MACH_LDEBUG
if (rwlock_count == 0) {
panic("rw lock count underflow for thread %p", thread);
for (;;) {
data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
if (data & LCK_RW_INTERLOCK) {
-#if __SMP__
atomic_exchange_abort();
lck_rw_interlock_spin(lock); /* wait for interlock to clear */
continue;
-#else
- panic("lck_rw_lock_exclusive_to_shared(): Interlock locked (%p): %x", lock, data);
-#endif // __SMP__
}
data += LCK_RW_SHARED_READER;
if (data & LCK_RW_WANT_UPGRADE) {
for (;;) {
data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
if (data & LCK_RW_INTERLOCK) {
-#if __SMP__
atomic_exchange_abort();
lck_rw_interlock_spin(lock);
continue;
-#else
- panic("lck_rw_try_lock_shared(): Interlock locked (%p): %x", lock, data);
-#endif
}
if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
atomic_exchange_abort();
thread_t owner = ordered_load_rw_owner(lock);
assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
#endif
- current_thread()->rwlock_count++;
+
+ if (lock->lck_rw_can_sleep) {
+ current_thread()->rwlock_count++;
+ } else if (get_preemption_level() == 0) {
+ panic("Taking non-sleepable RW lock with preemption enabled");
+ }
+
#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
#endif /* CONFIG_DTRACE */
for (;;) {
data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
if (data & LCK_RW_INTERLOCK) {
-#if __SMP__
atomic_exchange_abort();
lck_rw_interlock_spin(lock);
continue;
-#else
- panic("lck_rw_try_lock_exclusive(): Interlock locked (%p): %x", lock, data);
-#endif
}
if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
atomic_exchange_abort();
cpu_pause();
}
thread = current_thread();
- thread->rwlock_count++;
+ if (lock->lck_rw_can_sleep) {
+ thread->rwlock_count++;
+ } else if (get_preemption_level() == 0) {
+ panic("Taking non-sleepable RW lock with preemption enabled");
+ }
#if MACH_ASSERT
thread_t owner = ordered_load_rw_owner(lock);
assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
for (;;) {
data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
-#if __SMP__
atomic_exchange_abort();
lck_rw_interlock_spin(lock);
continue;
-#else
- panic("lck_rw_done(): Interlock locked (%p): %x", lock, data);
-#endif // __SMP__
}
if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
assertf(lock->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
/* Check if dropping the lock means that we need to unpromote */
thread = current_thread();
- rwlock_count = thread->rwlock_count--;
+ if (fake_lck.can_sleep) {
+ rwlock_count = thread->rwlock_count--;
+ } else {
+ rwlock_count = UINT32_MAX;
+ }
#if MACH_LDEBUG
if (rwlock_count == 0) {
panic("rw lock count underflow for thread %p", thread);
#endif /* CONFIG_DTRACE */
}
-
+/*
+ * Required to verify thread ownership for exclusive locks by virtue of PPL
+ * usage
+ */
void
lck_rw_assert(
lck_rw_t *lck,
{
lck_mtx_t *lck;
- if ((lck = (lck_mtx_t *) kalloc(sizeof(lck_mtx_t))) != 0) {
- lck_mtx_init(lck, grp, attr);
- }
-
+ lck = zalloc(ZV_LCK_MTX);
+ lck_mtx_init(lck, grp, attr);
return lck;
}
lck_grp_t * grp)
{
lck_mtx_destroy(lck, grp);
- kfree(lck, sizeof(lck_mtx_t));
+ zfree(ZV_LCK_MTX, lck);
}
/*
#ifdef BER_XXX
if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
- if ((lck_ext = (lck_mtx_ext_t *) kalloc(sizeof(lck_mtx_ext_t))) != 0) {
- lck_mtx_ext_init(lck_ext, grp, lck_attr);
- lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
- lck->lck_mtx_ptr = lck_ext;
- lck->lck_mtx_type = LCK_MTX_TYPE;
- }
+ lck_ext = zalloc(ZV_LCK_MTX_EXT);
+ lck_mtx_ext_init(lck_ext, grp, lck_attr);
+ lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
+ lck->lck_mtx_ptr = lck_ext;
+ lck->lck_mtx_type = LCK_MTX_TYPE;
} else
#endif
{
lck->lck_mtx_ptr = NULL; // Clear any padding in the union fields below
lck->lck_mtx_waiters = 0;
- lck->lck_mtx_pri = 0;
lck->lck_mtx_type = LCK_MTX_TYPE;
ordered_store_mtx(lck, 0);
}
lck->lck_mtx_type = LCK_MTX_TYPE;
} else {
lck->lck_mtx_waiters = 0;
- lck->lck_mtx_pri = 0;
lck->lck_mtx_type = LCK_MTX_TYPE;
ordered_store_mtx(lck, 0);
}
lck_mtx_check_preemption(lck_mtx_t *lock)
{
#if DEVELOPMENT || DEBUG
+ if (current_cpu_datap()->cpu_hibernate) {
+ return;
+ }
+
int pl = get_preemption_level();
if (pl != 0) {
lck_mtx_verify(lock);
lck_mtx_check_preemption(lock);
thread = current_thread();
- if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread),
- memory_order_acquire_smp, FALSE)) {
+ if (os_atomic_cmpxchg(&lock->lck_mtx_data,
+ 0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
#endif /* CONFIG_DTRACE */
uintptr_t state;
int waiters = 0;
spinwait_result_t sw_res;
+ struct turnstile *ts = NULL;
/* Loop waiting until I see that the mutex is unowned */
for (;;) {
switch (sw_res) {
case SPINWAIT_ACQUIRED:
+ if (ts != NULL) {
+ interlock_lock(lock);
+ turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
+ interlock_unlock(lock);
+ }
goto done;
case SPINWAIT_INTERLOCK:
goto set_owner;
break;
}
ordered_store_mtx(lock, (state | LCK_ILOCK | ARM_LCK_WAITERS)); // Set waiters bit and wait
- lck_mtx_lock_wait(lock, holding_thread);
+ lck_mtx_lock_wait(lock, holding_thread, &ts);
/* returns interlock unlocked */
}
if (state & ARM_LCK_WAITERS) {
/* Skip lck_mtx_lock_acquire if there are no waiters. */
- waiters = lck_mtx_lock_acquire(lock);
+ waiters = lck_mtx_lock_acquire(lock, ts);
+ /*
+ * lck_mtx_lock_acquire will call
+ * turnstile_complete
+ */
+ } else {
+ if (ts != NULL) {
+ turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
+ }
}
state = LCK_MTX_THREAD_TO_STATE(thread);
if (waiters != 0) {
state |= ARM_LCK_WAITERS;
}
-#if __SMP__
state |= LCK_ILOCK; // Preserve interlock
ordered_store_mtx(lock, state); // Set ownership
interlock_unlock(lock); // Release interlock, enable preemption
-#else
- ordered_store_mtx(lock, state); // Set ownership
- enable_preemption();
-#endif
done:
load_memory_barrier();
+ assert(thread->turnstile != NULL);
+
+ if (ts != NULL) {
+ turnstile_cleanup();
+ }
+
#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
#endif /* CONFIG_DTRACE */
lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
{
int has_interlock = (int)interlocked;
-#if __SMP__
__kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
- thread_t holder;
- uint64_t overall_deadline;
- uint64_t check_owner_deadline;
- uint64_t cur_time;
- spinwait_result_t retval = SPINWAIT_DID_SPIN;
- int loopcount = 0;
- uintptr_t state;
- boolean_t istate;
+ thread_t owner, prev_owner;
+ uint64_t window_deadline, sliding_deadline, high_deadline;
+ uint64_t start_time, cur_time, avg_hold_time, bias, delta;
+ int loopcount = 0;
+ uint i, prev_owner_cpu;
+ int total_hold_time_samples, window_hold_time_samples, unfairness;
+ bool owner_on_core, adjust;
+ uintptr_t state, new_state, waiters;
+ spinwait_result_t retval = SPINWAIT_DID_SPIN_HIGH_THR;
if (__improbable(!(lck_mtx_adaptive_spin_mode & ADAPTIVE_SPIN_ENABLE))) {
if (!has_interlock) {
return SPINWAIT_DID_NOT_SPIN;
}
- state = ordered_load_mtx(lock);
-
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, 0, 0);
- cur_time = mach_absolute_time();
- overall_deadline = cur_time + MutexSpin;
- check_owner_deadline = cur_time;
-
- if (has_interlock) {
- istate = ml_get_interrupts_enabled();
+ start_time = mach_absolute_time();
+ /*
+ * window_deadline represents the "learning" phase.
+ * The thread collects statistics about the lock during
+ * window_deadline and then it makes a decision on whether to spin more
+ * or block according to the concurrency behavior
+ * observed.
+ *
+ * Every thread can spin at least low_MutexSpin.
+ */
+ window_deadline = start_time + low_MutexSpin;
+ /*
+ * Sliding_deadline is the adjusted spin deadline
+ * computed after the "learning" phase.
+ */
+ sliding_deadline = window_deadline;
+ /*
+ * High_deadline is a hard deadline. No thread
+ * can spin more than this deadline.
+ */
+ if (high_MutexSpin >= 0) {
+ high_deadline = start_time + high_MutexSpin;
+ } else {
+ high_deadline = start_time + low_MutexSpin * real_ncpus;
}
+ /*
+ * Do not know yet which is the owner cpu.
+ * Initialize prev_owner_cpu with next cpu.
+ */
+ prev_owner_cpu = (cpu_number() + 1) % real_ncpus;
+ total_hold_time_samples = 0;
+ window_hold_time_samples = 0;
+ avg_hold_time = 0;
+ adjust = TRUE;
+ bias = (os_hash_kernel_pointer(lock) + cpu_number()) % real_ncpus;
+
/* Snoop the lock state */
state = ordered_load_mtx(lock);
+ owner = LCK_MTX_STATE_TO_THREAD(state);
+ prev_owner = owner;
+
+ if (has_interlock) {
+ if (owner == NULL) {
+ retval = SPINWAIT_INTERLOCK;
+ goto done_spinning;
+ } else {
+ /*
+ * We are holding the interlock, so
+ * we can safely dereference owner.
+ */
+ if (!machine_thread_on_core(owner) || (owner->state & TH_IDLE)) {
+ retval = SPINWAIT_DID_NOT_SPIN;
+ goto done_spinning;
+ }
+ }
+ interlock_unlock(lock);
+ has_interlock = 0;
+ }
/*
* Spin while:
* - mutex is locked, and
* - it's locked as a spin lock, and
* - owner is running on another processor, and
- * - owner (processor) is not idling, and
* - we haven't spun for long enough.
*/
do {
- if (!(state & LCK_ILOCK) || has_interlock) {
- if (!has_interlock) {
- has_interlock = interlock_try_disable_interrupts(lock, &istate);
+ /*
+ * Try to acquire the lock.
+ */
+ owner = LCK_MTX_STATE_TO_THREAD(state);
+ if (owner == NULL) {
+ waiters = state & ARM_LCK_WAITERS;
+ if (waiters) {
+ /*
+ * preserve the waiter bit
+ * and try acquire the interlock.
+ * Note: we will successfully acquire
+ * the interlock only if we can also
+ * acquire the lock.
+ */
+ new_state = ARM_LCK_WAITERS | LCK_ILOCK;
+ has_interlock = 1;
+ retval = SPINWAIT_INTERLOCK;
+ disable_preemption();
+ } else {
+ new_state = LCK_MTX_THREAD_TO_STATE(thread);
+ retval = SPINWAIT_ACQUIRED;
}
- if (has_interlock) {
- state = ordered_load_mtx(lock);
- holder = LCK_MTX_STATE_TO_THREAD(state);
+ /*
+ * The cmpxchg will succed only if the lock
+ * is not owned (doesn't have an owner set)
+ * and it is not interlocked.
+ * It will not fail if there are waiters.
+ */
+ if (os_atomic_cmpxchgv(&lock->lck_mtx_data,
+ waiters, new_state, &state, acquire)) {
+ goto done_spinning;
+ } else {
+ if (waiters) {
+ has_interlock = 0;
+ enable_preemption();
+ }
+ }
+ }
- if (holder == NULL) {
- retval = SPINWAIT_INTERLOCK;
+ cur_time = mach_absolute_time();
- if (istate) {
- ml_set_interrupts_enabled(istate);
- }
+ /*
+ * Never spin past high_deadline.
+ */
+ if (cur_time >= high_deadline) {
+ retval = SPINWAIT_DID_SPIN_HIGH_THR;
+ break;
+ }
- break;
- }
+ /*
+ * Check if owner is on core. If not block.
+ */
+ owner = LCK_MTX_STATE_TO_THREAD(state);
+ if (owner) {
+ i = prev_owner_cpu;
+ owner_on_core = FALSE;
- if (!(holder->machine.machine_thread_flags & MACHINE_THREAD_FLAGS_ON_CPU) ||
- (holder->state & TH_IDLE)) {
- if (loopcount == 0) {
- retval = SPINWAIT_DID_NOT_SPIN;
- }
+ disable_preemption();
+ state = ordered_load_mtx(lock);
+ owner = LCK_MTX_STATE_TO_THREAD(state);
- if (istate) {
- ml_set_interrupts_enabled(istate);
+ /*
+ * For scalability we want to check if the owner is on core
+ * without locking the mutex interlock.
+ * If we do not lock the mutex interlock, the owner that we see might be
+ * invalid, so we cannot dereference it. Therefore we cannot check
+ * any field of the thread to tell us if it is on core.
+ * Check if the thread that is running on the other cpus matches the owner.
+ */
+ if (owner) {
+ do {
+ cpu_data_t *cpu_data_ptr = CpuDataEntries[i].cpu_data_vaddr;
+ if ((cpu_data_ptr != NULL) && (cpu_data_ptr->cpu_active_thread == owner)) {
+ owner_on_core = TRUE;
+ break;
}
-
- break;
+ if (++i >= real_ncpus) {
+ i = 0;
+ }
+ } while (i != prev_owner_cpu);
+ enable_preemption();
+
+ if (owner_on_core) {
+ prev_owner_cpu = i;
+ } else {
+ prev_owner = owner;
+ state = ordered_load_mtx(lock);
+ owner = LCK_MTX_STATE_TO_THREAD(state);
+ if (owner == prev_owner) {
+ /*
+ * Owner is not on core.
+ * Stop spinning.
+ */
+ if (loopcount == 0) {
+ retval = SPINWAIT_DID_NOT_SPIN;
+ } else {
+ retval = SPINWAIT_DID_SPIN_OWNER_NOT_CORE;
+ }
+ break;
+ }
+ /*
+ * Fall through if the owner changed while we were scanning.
+ * The new owner could potentially be on core, so loop
+ * again.
+ */
}
-
- interlock_unlock_enable_interrupts(lock, istate);
- has_interlock = 0;
+ } else {
+ enable_preemption();
}
}
- cur_time = mach_absolute_time();
-
- if (cur_time >= overall_deadline) {
- break;
+ /*
+ * Save how many times we see the owner changing.
+ * We can roughly estimate the the mutex hold
+ * time and the fairness with that.
+ */
+ if (owner != prev_owner) {
+ prev_owner = owner;
+ total_hold_time_samples++;
+ window_hold_time_samples++;
}
- check_owner_deadline = cur_time + (MutexSpin / SPINWAIT_OWNER_CHECK_COUNT);
+ /*
+ * Learning window expired.
+ * Try to adjust the sliding_deadline.
+ */
+ if (cur_time >= window_deadline) {
+ /*
+ * If there was not contention during the window
+ * stop spinning.
+ */
+ if (window_hold_time_samples < 1) {
+ retval = SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION;
+ break;
+ }
+
+ if (adjust) {
+ /*
+ * For a fair lock, we'd wait for at most (NCPU-1) periods,
+ * but the lock is unfair, so let's try to estimate by how much.
+ */
+ unfairness = total_hold_time_samples / real_ncpus;
+
+ if (unfairness == 0) {
+ /*
+ * We observed the owner changing `total_hold_time_samples` times which
+ * let us estimate the average hold time of this mutex for the duration
+ * of the spin time.
+ * avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
+ *
+ * In this case spin at max avg_hold_time * (real_ncpus - 1)
+ */
+ delta = cur_time - start_time;
+ sliding_deadline = start_time + (delta * (real_ncpus - 1)) / total_hold_time_samples;
+ } else {
+ /*
+ * In this case at least one of the other cpus was able to get the lock twice
+ * while I was spinning.
+ * We could spin longer but it won't necessarily help if the system is unfair.
+ * Try to randomize the wait to reduce contention.
+ *
+ * We compute how much time we could potentially spin
+ * and distribute it over the cpus.
+ *
+ * bias is an integer between 0 and real_ncpus.
+ * distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
+ */
+ delta = high_deadline - cur_time;
+ sliding_deadline = cur_time + ((delta * bias) / real_ncpus);
+ adjust = FALSE;
+ }
+ }
- if (cur_time < check_owner_deadline) {
- machine_delay_until(check_owner_deadline - cur_time, check_owner_deadline);
+ window_deadline += low_MutexSpin;
+ window_hold_time_samples = 0;
}
- /* Snoop the lock state */
- state = ordered_load_mtx(lock);
+ /*
+ * Stop spinning if we past
+ * the adjusted deadline.
+ */
+ if (cur_time >= sliding_deadline) {
+ retval = SPINWAIT_DID_SPIN_SLIDING_THR;
+ break;
+ }
- if (state == 0) {
- /* Try to grab the lock. */
- if (os_atomic_cmpxchg(&lock->lck_mtx_data,
- 0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
- retval = SPINWAIT_ACQUIRED;
- break;
- }
+ /*
+ * We want to arm the monitor for wfe,
+ * so load exclusively the lock.
+ *
+ * NOTE:
+ * we rely on the fact that wfe will
+ * eventually return even if the cache line
+ * is not modified. This way we will keep
+ * looping and checking if the deadlines expired.
+ */
+ state = os_atomic_load_exclusive(&lock->lck_mtx_data, relaxed);
+ owner = LCK_MTX_STATE_TO_THREAD(state);
+ if (owner != NULL) {
+ wait_for_event();
+ state = ordered_load_mtx(lock);
+ } else {
+ atomic_exchange_abort();
}
loopcount++;
} while (TRUE);
+done_spinning:
#if CONFIG_DTRACE
/*
- * We've already kept a count via overall_deadline of how long we spun.
- * If dtrace is active, then we compute backwards to decide how
- * long we spun.
- *
* Note that we record a different probe id depending on whether
* this is a direct or indirect mutex. This allows us to
* penalize only lock groups that have debug/stats enabled
*/
if (__probable(lock->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)) {
LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, lock,
- mach_absolute_time() - (overall_deadline - MutexSpin));
+ mach_absolute_time() - start_time);
} else {
LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, lock,
- mach_absolute_time() - (overall_deadline - MutexSpin));
+ mach_absolute_time() - start_time);
}
/* The lockstat acquire event is recorded by the caller. */
#endif
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, retval, 0);
-#else /* __SMP__ */
- /* Spinwaiting is not useful on UP systems. */
-#pragma unused(lock, thread)
- int retval = SPINWAIT_DID_NOT_SPIN;
-#endif /* __SMP__ */
if ((!has_interlock) && (retval != SPINWAIT_ACQUIRED)) {
/* We must own either the lock or the interlock on return. */
interlock_lock(lock);
return retval;
}
+
/*
* Common code for mutex locking as spinlock
*/
thread_t thread = current_thread();
lck_mtx_verify(lock);
- if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread),
- memory_order_acquire_smp, FALSE)) {
+ if (os_atomic_cmpxchg(&lock->lck_mtx_data,
+ 0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, lock, 0);
#endif /* CONFIG_DTRACE */
uintptr_t state;
int waiters;
-#if __SMP__
interlock_lock(lock);
state = ordered_load_mtx(lock);
holding_thread = LCK_MTX_STATE_TO_THREAD(state);
interlock_unlock(lock);
return FALSE;
}
-#else
- disable_preemption_for_thread(thread);
- state = ordered_load_mtx(lock);
- if (state & LCK_ILOCK) {
- panic("Unexpected interlock set (%p)", lock);
- }
- holding_thread = LCK_MTX_STATE_TO_THREAD(state);
- if (holding_thread) {
- enable_preemption();
- return FALSE;
- }
- state |= LCK_ILOCK;
- ordered_store_mtx(lock, state);
-#endif // __SMP__
- waiters = lck_mtx_lock_acquire(lock);
+ waiters = lck_mtx_lock_acquire(lock, NULL);
state = LCK_MTX_THREAD_TO_STATE(thread);
if (waiters != 0) {
state |= ARM_LCK_WAITERS;
}
-#if __SMP__
state |= LCK_ILOCK; // Preserve interlock
ordered_store_mtx(lock, state); // Set ownership
interlock_unlock(lock); // Release interlock, enable preemption
-#else
- ordered_store_mtx(lock, state); // Set ownership
- enable_preemption();
-#endif
load_memory_barrier();
+
+ turnstile_cleanup();
+
return TRUE;
}
goto slow_case;
}
// Locked as a mutex
- if (atomic_compare_exchange(&lock->lck_mtx_data, LCK_MTX_THREAD_TO_STATE(thread), 0,
- memory_order_release_smp, FALSE)) {
+ if (os_atomic_cmpxchg(&lock->lck_mtx_data,
+ LCK_MTX_THREAD_TO_STATE(thread), 0, release)) {
#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
#endif /* CONFIG_DTRACE */
lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t ilk_held)
{
uintptr_t state;
+ boolean_t cleanup = FALSE;
if (ilk_held) {
state = ordered_load_mtx(lock);
} else {
-#if __SMP__
interlock_lock(lock);
state = ordered_load_mtx(lock);
if (thread != LCK_MTX_STATE_TO_THREAD(state)) {
panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
}
-#else
- disable_preemption_for_thread(thread);
- state = ordered_load_mtx(lock);
- if (state & LCK_ILOCK) {
- panic("lck_mtx_unlock(): Unexpected interlock set (%p)", lock);
- }
- if (thread != LCK_MTX_STATE_TO_THREAD(state)) {
- panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
- }
- state |= LCK_ILOCK;
- ordered_store_mtx(lock, state);
-#endif
if (state & ARM_LCK_WAITERS) {
- lck_mtx_unlock_wakeup(lock, thread);
- state = ordered_load_mtx(lock);
- } else {
- assertf(lock->lck_mtx_pri == 0, "pri=0x%x", lock->lck_mtx_pri);
+ if (lck_mtx_unlock_wakeup(lock, thread)) {
+ state = ARM_LCK_WAITERS;
+ } else {
+ state = 0;
+ }
+ cleanup = TRUE;
+ goto unlock;
}
}
state &= ARM_LCK_WAITERS; /* Clear state, retain waiters bit */
-#if __SMP__
+unlock:
state |= LCK_ILOCK;
ordered_store_mtx(lock, state);
interlock_unlock(lock);
-#else
- ordered_store_mtx(lock, state);
- enable_preemption();
-#endif
+ if (cleanup) {
+ /*
+ * Do not do any turnstile operations outside of this block.
+ * lock/unlock is called at early stage of boot with single thread,
+ * when turnstile is not yet initialized.
+ * Even without contention we can come throught the slow path
+ * if the mutex is acquired as a spin lock.
+ */
+ turnstile_cleanup();
+ }
#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
}
state &= ~(LCK_MTX_THREAD_MASK); // Clear the spin tag
ordered_store_mtx(lock, state);
- waiters = lck_mtx_lock_acquire(lock); // Acquire to manage priority boosts
+ waiters = lck_mtx_lock_acquire(lock, NULL); // Acquire to manage priority boosts
state = LCK_MTX_THREAD_TO_STATE(thread);
if (waiters != 0) {
state |= ARM_LCK_WAITERS;
}
-#if __SMP__
state |= LCK_ILOCK;
ordered_store_mtx(lock, state); // Set ownership
interlock_unlock(lock); // Release interlock, enable preemption
-#else
- ordered_store_mtx(lock, state); // Set ownership
- enable_preemption();
-#endif
+ turnstile_cleanup();
}
if (holder != 0) {
if (holder == thread) {
panic("Lock owned by current thread %p = %lx", lock, state);
- } else {
- panic("Lock %p owned by thread %p", lock, holder);
}
}
- if (state & LCK_ILOCK) {
- panic("Lock bit set %p = %lx", lock, state);
- }
} else {
panic("lck_spin_assert(): invalid arg (%u)", type);
}