#include <mach_ldebug.h>
-#include <kern/kalloc.h>
+#include <kern/zalloc.h>
#include <kern/lock_stat.h>
#include <kern/locks.h>
#include <kern/misc_protos.h>
#include <kern/debug.h>
#include <kern/kcdata.h>
#include <string.h>
+#include <arm/cpu_internal.h>
+#include <os/hash.h>
+#include <arm/cpu_data.h>
#include <arm/cpu_data_internal.h>
#include <arm/proc_reg.h>
// These are undesirable when in a panic or a debugger is runnning.
#define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
-unsigned int LcksOpts = 0;
-
#define ADAPTIVE_SPIN_ENABLE 0x1
-#if __SMP__
int lck_mtx_adaptive_spin_mode = ADAPTIVE_SPIN_ENABLE;
-#else /* __SMP__ */
-int lck_mtx_adaptive_spin_mode = 0;
-#endif /* __SMP__ */
#define SPINWAIT_OWNER_CHECK_COUNT 4
typedef enum {
SPINWAIT_ACQUIRED, /* Got the lock. */
SPINWAIT_INTERLOCK, /* Got the interlock, no owner, but caller must finish acquiring the lock. */
- SPINWAIT_DID_SPIN, /* Got the interlock, spun, but failed to get the lock. */
+ SPINWAIT_DID_SPIN_HIGH_THR, /* Got the interlock, spun, but failed to get the lock. */
+ SPINWAIT_DID_SPIN_OWNER_NOT_CORE, /* Got the interlock, spun, but failed to get the lock. */
+ SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION, /* Got the interlock, spun, but failed to get the lock. */
+ SPINWAIT_DID_SPIN_SLIDING_THR,/* Got the interlock, spun, but failed to get the lock. */
SPINWAIT_DID_NOT_SPIN, /* Got the interlock, did not spin. */
} spinwait_result_t;
-#if CONFIG_DTRACE && __SMP__
+#if CONFIG_DTRACE
extern uint64_t dtrace_spin_threshold;
#endif
#define enable_interrupts() __asm__ volatile ("cpsie if" ::: "memory");
#endif
+ZONE_VIEW_DEFINE(ZV_LCK_SPIN, "lck_spin",
+ KHEAP_ID_DEFAULT, sizeof(lck_spin_t));
+
+ZONE_VIEW_DEFINE(ZV_LCK_MTX, "lck_mtx",
+ KHEAP_ID_DEFAULT, sizeof(lck_mtx_t));
+
+ZONE_VIEW_DEFINE(ZV_LCK_MTX_EXT, "lck_mtx_ext",
+ KHEAP_ID_DEFAULT, sizeof(lck_mtx_ext_t));
+
+ZONE_VIEW_DEFINE(ZV_LCK_RW, "lck_rw",
+ KHEAP_ID_DEFAULT, sizeof(lck_rw_t));
+
/*
* Forward declarations
*/
uint32_t value;
#if __arm__
- if (memory_order_has_release(ord)) {
+ if (_os_atomic_mo_has_release(ord)) {
// Pre-load release barrier
atomic_thread_fence(memory_order_release);
}
value = __builtin_arm_ldrex(target);
#else
- if (memory_order_has_acquire(ord)) {
+ if (_os_atomic_mo_has_acquire(ord)) {
value = __builtin_arm_ldaex(target); // ldaxr
} else {
value = __builtin_arm_ldrex(target); // ldxr
#if __arm__
err = __builtin_arm_strex(value, target);
- if (memory_order_has_acquire(ord)) {
+ if (_os_atomic_mo_has_acquire(ord)) {
// Post-store acquire barrier
atomic_thread_fence(memory_order_acquire);
}
#else
- if (memory_order_has_release(ord)) {
+ if (_os_atomic_mo_has_release(ord)) {
err = __builtin_arm_stlex(value, target); // stlxr
} else {
err = __builtin_arm_strex(value, target); // stxr
return atomic_test_and_set32(target, test_mask, set_mask, ord, wait);
}
+/*
+ * To help _disable_preemption() inline everywhere with LTO,
+ * we keep these nice non inlineable functions as the panic()
+ * codegen setup is quite large and for weird reasons causes a frame.
+ */
+__abortlike
+static void
+_disable_preemption_overflow(void)
+{
+ panic("Preemption count overflow");
+}
+
void
_disable_preemption(void)
{
thread_t thread = current_thread();
unsigned int count = thread->machine.preemption_count;
- count += 1;
- if (__improbable(count == 0)) {
- panic("Preemption count overflow");
+ if (__improbable(++count == 0)) {
+ _disable_preemption_overflow();
}
os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
}
}
+/*
+ * To help _enable_preemption() inline everywhere with LTO,
+ * we keep these nice non inlineable functions as the panic()
+ * codegen setup is quite large and for weird reasons causes a frame.
+ */
+__abortlike
+static void
+_enable_preemption_underflow(void)
+{
+ panic("Preemption count underflow");
+}
+
void
_enable_preemption(void)
{
unsigned int count = thread->machine.preemption_count;
if (__improbable(count == 0)) {
- panic("Preemption count underflow");
+ _enable_preemption_underflow();
}
count -= 1;
if (count == 0) {
kernel_preempt_check(thread);
}
+
+ os_compiler_barrier();
}
int
return current_thread()->machine.preemption_count;
}
-#if __SMP__
-static inline boolean_t
-interlock_try_disable_interrupts(
- lck_mtx_t *mutex,
- boolean_t *istate)
-{
- *istate = ml_set_interrupts_enabled(FALSE);
-
- if (interlock_try(mutex)) {
- return 1;
- } else {
- ml_set_interrupts_enabled(*istate);
- return 0;
- }
-}
-
-static inline void
-interlock_unlock_enable_interrupts(
- lck_mtx_t *mutex,
- boolean_t istate)
-{
- interlock_unlock(mutex);
- ml_set_interrupts_enabled(istate);
-}
-#endif /* __SMP__ */
-
/*
* Routine: lck_spin_alloc_init
*/
lck_grp_t * grp,
lck_attr_t * attr)
{
- lck_spin_t *lck;
-
- if ((lck = (lck_spin_t *) kalloc(sizeof(lck_spin_t))) != 0) {
- lck_spin_init(lck, grp, attr);
- }
+ lck_spin_t *lck;
+ lck = zalloc(ZV_LCK_SPIN);
+ lck_spin_init(lck, grp, attr);
return lck;
}
lck_grp_t * grp)
{
lck_spin_destroy(lck, grp);
- kfree(lck, sizeof(lck_spin_t));
+ zfree(ZV_LCK_SPIN, lck);
}
/*
/*
* arm_usimple_lock is a lck_spin_t without a group or attributes
*/
-void inline
+MARK_AS_HIBERNATE_TEXT void inline
arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
{
lck->type = LCK_SPIN_TYPE;
* compute the deadline to spin against when
* waiting for a change of state on a lck_rw_t
*/
-#if __SMP__
static inline uint64_t
lck_rw_deadline_for_spin(lck_rw_t *lck)
{
return mach_absolute_time() + (100000LL * 1000000000LL);
}
}
-#endif // __SMP__
static boolean_t
lck_rw_drain_status(lck_rw_t *lock, uint32_t status_mask, boolean_t wait __unused)
{
-#if __SMP__
uint64_t deadline = 0;
uint32_t data;
}
os_atomic_clear_exclusive();
return TRUE;
-#else
- uint32_t data;
-
- data = ordered_load_rw(lock);
- if ((data & status_mask) == 0) {
- return TRUE;
- } else {
- return FALSE;
- }
-#endif // __SMP__
}
/*
static inline void
lck_rw_interlock_spin(lck_rw_t *lock)
{
-#if __SMP__
uint32_t data;
for (;;) {
return;
}
}
-#else
- panic("lck_rw_interlock_spin(): Interlock locked %p %x", lock, lock->lck_rw_data);
-#endif
}
/*
uint32_t data, prev;
boolean_t do_exch;
-#if __SMP__
if (wait) {
deadline = lck_rw_deadline_for_spin(lock);
}
-#else
- wait = FALSE; // Don't spin on UP systems
-#endif
for (;;) {
data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
lck_grp_t *grp,
lck_attr_t *attr)
{
- lck_rw_t *lck;
-
- if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) {
- lck_rw_init(lck, grp, attr);
- }
+ lck_rw_t *lck;
+ lck = zalloc_flags(ZV_LCK_RW, Z_WAITOK | Z_ZERO);
+ lck_rw_init(lck, grp, attr);
return lck;
}
lck_grp_t *grp)
{
lck_rw_destroy(lck, grp);
- kfree(lck, sizeof(lck_rw_t));
+ zfree(ZV_LCK_RW, lck);
}
/*
}
}
+#define LCK_RW_LOCK_EXCLUSIVE_TAS(lck) (atomic_test_and_set32(&(lck)->lck_rw_data, \
+ (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK), \
+ LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE))
+
+/*
+ * Routine: lck_rw_lock_exclusive_check_contended
+ */
+bool
+lck_rw_lock_exclusive_check_contended(lck_rw_t *lock)
+{
+ thread_t thread = current_thread();
+ bool contended = false;
+
+ if (lock->lck_rw_can_sleep) {
+ thread->rwlock_count++;
+ } else if (get_preemption_level() == 0) {
+ panic("Taking non-sleepable RW lock with preemption enabled");
+ }
+ if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
+#endif /* CONFIG_DTRACE */
+ } else {
+ contended = true;
+ lck_rw_lock_exclusive_gen(lock);
+ }
+#if MACH_ASSERT
+ thread_t owner = ordered_load_rw_owner(lock);
+ assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
+#endif
+ ordered_store_rw_owner(lock, thread);
+ return contended;
+}
+
/*
* Routine: lck_rw_lock_exclusive
*/
{
thread_t thread = current_thread();
- thread->rwlock_count++;
- if (atomic_test_and_set32(&lock->lck_rw_data,
- (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
- LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
+ if (lock->lck_rw_can_sleep) {
+ thread->rwlock_count++;
+ } else if (get_preemption_level() == 0) {
+ panic("Taking non-sleepable RW lock with preemption enabled");
+ }
+ if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
#endif /* CONFIG_DTRACE */
{
uint32_t data, prev;
- current_thread()->rwlock_count++;
+ if (lock->lck_rw_can_sleep) {
+ current_thread()->rwlock_count++;
+ } else if (get_preemption_level() == 0) {
+ panic("Taking non-sleepable RW lock with preemption enabled");
+ }
for (;;) {
data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
uint32_t rwlock_count;
/* Check if dropping the lock means that we need to unpromote */
- rwlock_count = thread->rwlock_count--;
+ if (lck->lck_rw_can_sleep) {
+ rwlock_count = thread->rwlock_count--;
+ } else {
+ rwlock_count = UINT32_MAX;
+ }
#if MACH_LDEBUG
if (rwlock_count == 0) {
panic("rw lock count underflow for thread %p", thread);
for (;;) {
data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
if (data & LCK_RW_INTERLOCK) {
-#if __SMP__
atomic_exchange_abort();
lck_rw_interlock_spin(lock); /* wait for interlock to clear */
continue;
-#else
- panic("lck_rw_lock_exclusive_to_shared(): Interlock locked (%p): %x", lock, data);
-#endif // __SMP__
}
data += LCK_RW_SHARED_READER;
if (data & LCK_RW_WANT_UPGRADE) {
for (;;) {
data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
if (data & LCK_RW_INTERLOCK) {
-#if __SMP__
atomic_exchange_abort();
lck_rw_interlock_spin(lock);
continue;
-#else
- panic("lck_rw_try_lock_shared(): Interlock locked (%p): %x", lock, data);
-#endif
}
if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
atomic_exchange_abort();
thread_t owner = ordered_load_rw_owner(lock);
assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
#endif
- current_thread()->rwlock_count++;
+
+ if (lock->lck_rw_can_sleep) {
+ current_thread()->rwlock_count++;
+ } else if (get_preemption_level() == 0) {
+ panic("Taking non-sleepable RW lock with preemption enabled");
+ }
+
#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
#endif /* CONFIG_DTRACE */
for (;;) {
data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
if (data & LCK_RW_INTERLOCK) {
-#if __SMP__
atomic_exchange_abort();
lck_rw_interlock_spin(lock);
continue;
-#else
- panic("lck_rw_try_lock_exclusive(): Interlock locked (%p): %x", lock, data);
-#endif
}
if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
atomic_exchange_abort();
cpu_pause();
}
thread = current_thread();
- thread->rwlock_count++;
+ if (lock->lck_rw_can_sleep) {
+ thread->rwlock_count++;
+ } else if (get_preemption_level() == 0) {
+ panic("Taking non-sleepable RW lock with preemption enabled");
+ }
#if MACH_ASSERT
thread_t owner = ordered_load_rw_owner(lock);
assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
for (;;) {
data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
-#if __SMP__
atomic_exchange_abort();
lck_rw_interlock_spin(lock);
continue;
-#else
- panic("lck_rw_done(): Interlock locked (%p): %x", lock, data);
-#endif // __SMP__
}
if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
assertf(lock->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
/* Check if dropping the lock means that we need to unpromote */
thread = current_thread();
- rwlock_count = thread->rwlock_count--;
+ if (fake_lck.can_sleep) {
+ rwlock_count = thread->rwlock_count--;
+ } else {
+ rwlock_count = UINT32_MAX;
+ }
#if MACH_LDEBUG
if (rwlock_count == 0) {
panic("rw lock count underflow for thread %p", thread);
#endif /* CONFIG_DTRACE */
}
-
+/*
+ * Required to verify thread ownership for exclusive locks by virtue of PPL
+ * usage
+ */
void
lck_rw_assert(
lck_rw_t *lck,
{
lck_mtx_t *lck;
- if ((lck = (lck_mtx_t *) kalloc(sizeof(lck_mtx_t))) != 0) {
- lck_mtx_init(lck, grp, attr);
- }
-
+ lck = zalloc(ZV_LCK_MTX);
+ lck_mtx_init(lck, grp, attr);
return lck;
}
lck_grp_t * grp)
{
lck_mtx_destroy(lck, grp);
- kfree(lck, sizeof(lck_mtx_t));
+ zfree(ZV_LCK_MTX, lck);
}
/*
#ifdef BER_XXX
if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
- if ((lck_ext = (lck_mtx_ext_t *) kalloc(sizeof(lck_mtx_ext_t))) != 0) {
- lck_mtx_ext_init(lck_ext, grp, lck_attr);
- lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
- lck->lck_mtx_ptr = lck_ext;
- lck->lck_mtx_type = LCK_MTX_TYPE;
- }
+ lck_ext = zalloc(ZV_LCK_MTX_EXT);
+ lck_mtx_ext_init(lck_ext, grp, lck_attr);
+ lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
+ lck->lck_mtx_ptr = lck_ext;
+ lck->lck_mtx_type = LCK_MTX_TYPE;
} else
#endif
{
lck_mtx_check_preemption(lck_mtx_t *lock)
{
#if DEVELOPMENT || DEBUG
+ if (current_cpu_datap()->cpu_hibernate) {
+ return;
+ }
+
int pl = get_preemption_level();
if (pl != 0) {
if (waiters != 0) {
state |= ARM_LCK_WAITERS;
}
-#if __SMP__
state |= LCK_ILOCK; // Preserve interlock
ordered_store_mtx(lock, state); // Set ownership
interlock_unlock(lock); // Release interlock, enable preemption
-#else
- ordered_store_mtx(lock, state); // Set ownership
- enable_preemption();
-#endif
done:
load_memory_barrier();
lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
{
int has_interlock = (int)interlocked;
-#if __SMP__
__kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
- thread_t holder;
- uint64_t overall_deadline;
- uint64_t check_owner_deadline;
- uint64_t cur_time;
- spinwait_result_t retval = SPINWAIT_DID_SPIN;
- int loopcount = 0;
- uintptr_t state;
- boolean_t istate;
+ thread_t owner, prev_owner;
+ uint64_t window_deadline, sliding_deadline, high_deadline;
+ uint64_t start_time, cur_time, avg_hold_time, bias, delta;
+ int loopcount = 0;
+ uint i, prev_owner_cpu;
+ int total_hold_time_samples, window_hold_time_samples, unfairness;
+ bool owner_on_core, adjust;
+ uintptr_t state, new_state, waiters;
+ spinwait_result_t retval = SPINWAIT_DID_SPIN_HIGH_THR;
if (__improbable(!(lck_mtx_adaptive_spin_mode & ADAPTIVE_SPIN_ENABLE))) {
if (!has_interlock) {
return SPINWAIT_DID_NOT_SPIN;
}
- state = ordered_load_mtx(lock);
-
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, 0, 0);
- cur_time = mach_absolute_time();
- overall_deadline = cur_time + MutexSpin;
- check_owner_deadline = cur_time;
-
- if (has_interlock) {
- istate = ml_get_interrupts_enabled();
+ start_time = mach_absolute_time();
+ /*
+ * window_deadline represents the "learning" phase.
+ * The thread collects statistics about the lock during
+ * window_deadline and then it makes a decision on whether to spin more
+ * or block according to the concurrency behavior
+ * observed.
+ *
+ * Every thread can spin at least low_MutexSpin.
+ */
+ window_deadline = start_time + low_MutexSpin;
+ /*
+ * Sliding_deadline is the adjusted spin deadline
+ * computed after the "learning" phase.
+ */
+ sliding_deadline = window_deadline;
+ /*
+ * High_deadline is a hard deadline. No thread
+ * can spin more than this deadline.
+ */
+ if (high_MutexSpin >= 0) {
+ high_deadline = start_time + high_MutexSpin;
+ } else {
+ high_deadline = start_time + low_MutexSpin * real_ncpus;
}
+ /*
+ * Do not know yet which is the owner cpu.
+ * Initialize prev_owner_cpu with next cpu.
+ */
+ prev_owner_cpu = (cpu_number() + 1) % real_ncpus;
+ total_hold_time_samples = 0;
+ window_hold_time_samples = 0;
+ avg_hold_time = 0;
+ adjust = TRUE;
+ bias = (os_hash_kernel_pointer(lock) + cpu_number()) % real_ncpus;
+
/* Snoop the lock state */
state = ordered_load_mtx(lock);
+ owner = LCK_MTX_STATE_TO_THREAD(state);
+ prev_owner = owner;
+
+ if (has_interlock) {
+ if (owner == NULL) {
+ retval = SPINWAIT_INTERLOCK;
+ goto done_spinning;
+ } else {
+ /*
+ * We are holding the interlock, so
+ * we can safely dereference owner.
+ */
+ if (!machine_thread_on_core(owner) || (owner->state & TH_IDLE)) {
+ retval = SPINWAIT_DID_NOT_SPIN;
+ goto done_spinning;
+ }
+ }
+ interlock_unlock(lock);
+ has_interlock = 0;
+ }
/*
* Spin while:
* - mutex is locked, and
* - it's locked as a spin lock, and
* - owner is running on another processor, and
- * - owner (processor) is not idling, and
* - we haven't spun for long enough.
*/
do {
- if (!(state & LCK_ILOCK) || has_interlock) {
- if (!has_interlock) {
- has_interlock = interlock_try_disable_interrupts(lock, &istate);
+ /*
+ * Try to acquire the lock.
+ */
+ owner = LCK_MTX_STATE_TO_THREAD(state);
+ if (owner == NULL) {
+ waiters = state & ARM_LCK_WAITERS;
+ if (waiters) {
+ /*
+ * preserve the waiter bit
+ * and try acquire the interlock.
+ * Note: we will successfully acquire
+ * the interlock only if we can also
+ * acquire the lock.
+ */
+ new_state = ARM_LCK_WAITERS | LCK_ILOCK;
+ has_interlock = 1;
+ retval = SPINWAIT_INTERLOCK;
+ disable_preemption();
+ } else {
+ new_state = LCK_MTX_THREAD_TO_STATE(thread);
+ retval = SPINWAIT_ACQUIRED;
}
- if (has_interlock) {
- state = ordered_load_mtx(lock);
- holder = LCK_MTX_STATE_TO_THREAD(state);
+ /*
+ * The cmpxchg will succed only if the lock
+ * is not owned (doesn't have an owner set)
+ * and it is not interlocked.
+ * It will not fail if there are waiters.
+ */
+ if (os_atomic_cmpxchgv(&lock->lck_mtx_data,
+ waiters, new_state, &state, acquire)) {
+ goto done_spinning;
+ } else {
+ if (waiters) {
+ has_interlock = 0;
+ enable_preemption();
+ }
+ }
+ }
- if (holder == NULL) {
- retval = SPINWAIT_INTERLOCK;
+ cur_time = mach_absolute_time();
- if (istate) {
- ml_set_interrupts_enabled(istate);
- }
+ /*
+ * Never spin past high_deadline.
+ */
+ if (cur_time >= high_deadline) {
+ retval = SPINWAIT_DID_SPIN_HIGH_THR;
+ break;
+ }
- break;
- }
+ /*
+ * Check if owner is on core. If not block.
+ */
+ owner = LCK_MTX_STATE_TO_THREAD(state);
+ if (owner) {
+ i = prev_owner_cpu;
+ owner_on_core = FALSE;
- if (!(holder->machine.machine_thread_flags & MACHINE_THREAD_FLAGS_ON_CPU) ||
- (holder->state & TH_IDLE)) {
- if (loopcount == 0) {
- retval = SPINWAIT_DID_NOT_SPIN;
- }
+ disable_preemption();
+ state = ordered_load_mtx(lock);
+ owner = LCK_MTX_STATE_TO_THREAD(state);
- if (istate) {
- ml_set_interrupts_enabled(istate);
+ /*
+ * For scalability we want to check if the owner is on core
+ * without locking the mutex interlock.
+ * If we do not lock the mutex interlock, the owner that we see might be
+ * invalid, so we cannot dereference it. Therefore we cannot check
+ * any field of the thread to tell us if it is on core.
+ * Check if the thread that is running on the other cpus matches the owner.
+ */
+ if (owner) {
+ do {
+ cpu_data_t *cpu_data_ptr = CpuDataEntries[i].cpu_data_vaddr;
+ if ((cpu_data_ptr != NULL) && (cpu_data_ptr->cpu_active_thread == owner)) {
+ owner_on_core = TRUE;
+ break;
}
-
- break;
+ if (++i >= real_ncpus) {
+ i = 0;
+ }
+ } while (i != prev_owner_cpu);
+ enable_preemption();
+
+ if (owner_on_core) {
+ prev_owner_cpu = i;
+ } else {
+ prev_owner = owner;
+ state = ordered_load_mtx(lock);
+ owner = LCK_MTX_STATE_TO_THREAD(state);
+ if (owner == prev_owner) {
+ /*
+ * Owner is not on core.
+ * Stop spinning.
+ */
+ if (loopcount == 0) {
+ retval = SPINWAIT_DID_NOT_SPIN;
+ } else {
+ retval = SPINWAIT_DID_SPIN_OWNER_NOT_CORE;
+ }
+ break;
+ }
+ /*
+ * Fall through if the owner changed while we were scanning.
+ * The new owner could potentially be on core, so loop
+ * again.
+ */
}
-
- interlock_unlock_enable_interrupts(lock, istate);
- has_interlock = 0;
+ } else {
+ enable_preemption();
}
}
- cur_time = mach_absolute_time();
-
- if (cur_time >= overall_deadline) {
- break;
+ /*
+ * Save how many times we see the owner changing.
+ * We can roughly estimate the the mutex hold
+ * time and the fairness with that.
+ */
+ if (owner != prev_owner) {
+ prev_owner = owner;
+ total_hold_time_samples++;
+ window_hold_time_samples++;
}
- check_owner_deadline = cur_time + (MutexSpin / SPINWAIT_OWNER_CHECK_COUNT);
+ /*
+ * Learning window expired.
+ * Try to adjust the sliding_deadline.
+ */
+ if (cur_time >= window_deadline) {
+ /*
+ * If there was not contention during the window
+ * stop spinning.
+ */
+ if (window_hold_time_samples < 1) {
+ retval = SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION;
+ break;
+ }
- if (cur_time < check_owner_deadline) {
- machine_delay_until(check_owner_deadline - cur_time, check_owner_deadline);
+ if (adjust) {
+ /*
+ * For a fair lock, we'd wait for at most (NCPU-1) periods,
+ * but the lock is unfair, so let's try to estimate by how much.
+ */
+ unfairness = total_hold_time_samples / real_ncpus;
+
+ if (unfairness == 0) {
+ /*
+ * We observed the owner changing `total_hold_time_samples` times which
+ * let us estimate the average hold time of this mutex for the duration
+ * of the spin time.
+ * avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
+ *
+ * In this case spin at max avg_hold_time * (real_ncpus - 1)
+ */
+ delta = cur_time - start_time;
+ sliding_deadline = start_time + (delta * (real_ncpus - 1)) / total_hold_time_samples;
+ } else {
+ /*
+ * In this case at least one of the other cpus was able to get the lock twice
+ * while I was spinning.
+ * We could spin longer but it won't necessarily help if the system is unfair.
+ * Try to randomize the wait to reduce contention.
+ *
+ * We compute how much time we could potentially spin
+ * and distribute it over the cpus.
+ *
+ * bias is an integer between 0 and real_ncpus.
+ * distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
+ */
+ delta = high_deadline - cur_time;
+ sliding_deadline = cur_time + ((delta * bias) / real_ncpus);
+ adjust = FALSE;
+ }
+ }
+
+ window_deadline += low_MutexSpin;
+ window_hold_time_samples = 0;
}
- /* Snoop the lock state */
- state = ordered_load_mtx(lock);
+ /*
+ * Stop spinning if we past
+ * the adjusted deadline.
+ */
+ if (cur_time >= sliding_deadline) {
+ retval = SPINWAIT_DID_SPIN_SLIDING_THR;
+ break;
+ }
- if (state == 0) {
- /* Try to grab the lock. */
- if (os_atomic_cmpxchg(&lock->lck_mtx_data,
- 0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
- retval = SPINWAIT_ACQUIRED;
- break;
- }
+ /*
+ * We want to arm the monitor for wfe,
+ * so load exclusively the lock.
+ *
+ * NOTE:
+ * we rely on the fact that wfe will
+ * eventually return even if the cache line
+ * is not modified. This way we will keep
+ * looping and checking if the deadlines expired.
+ */
+ state = os_atomic_load_exclusive(&lock->lck_mtx_data, relaxed);
+ owner = LCK_MTX_STATE_TO_THREAD(state);
+ if (owner != NULL) {
+ wait_for_event();
+ state = ordered_load_mtx(lock);
+ } else {
+ atomic_exchange_abort();
}
loopcount++;
} while (TRUE);
+done_spinning:
#if CONFIG_DTRACE
/*
- * We've already kept a count via overall_deadline of how long we spun.
- * If dtrace is active, then we compute backwards to decide how
- * long we spun.
- *
* Note that we record a different probe id depending on whether
* this is a direct or indirect mutex. This allows us to
* penalize only lock groups that have debug/stats enabled
*/
if (__probable(lock->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)) {
LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, lock,
- mach_absolute_time() - (overall_deadline - MutexSpin));
+ mach_absolute_time() - start_time);
} else {
LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, lock,
- mach_absolute_time() - (overall_deadline - MutexSpin));
+ mach_absolute_time() - start_time);
}
/* The lockstat acquire event is recorded by the caller. */
#endif
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, retval, 0);
-#else /* __SMP__ */
- /* Spinwaiting is not useful on UP systems. */
-#pragma unused(lock, thread)
- int retval = SPINWAIT_DID_NOT_SPIN;
-#endif /* __SMP__ */
if ((!has_interlock) && (retval != SPINWAIT_ACQUIRED)) {
/* We must own either the lock or the interlock on return. */
interlock_lock(lock);
return retval;
}
+
/*
* Common code for mutex locking as spinlock
*/
uintptr_t state;
int waiters;
-#if __SMP__
interlock_lock(lock);
state = ordered_load_mtx(lock);
holding_thread = LCK_MTX_STATE_TO_THREAD(state);
interlock_unlock(lock);
return FALSE;
}
-#else
- disable_preemption_for_thread(thread);
- state = ordered_load_mtx(lock);
- if (state & LCK_ILOCK) {
- panic("Unexpected interlock set (%p)", lock);
- }
- holding_thread = LCK_MTX_STATE_TO_THREAD(state);
- if (holding_thread) {
- enable_preemption();
- return FALSE;
- }
- state |= LCK_ILOCK;
- ordered_store_mtx(lock, state);
-#endif // __SMP__
waiters = lck_mtx_lock_acquire(lock, NULL);
state = LCK_MTX_THREAD_TO_STATE(thread);
if (waiters != 0) {
state |= ARM_LCK_WAITERS;
}
-#if __SMP__
state |= LCK_ILOCK; // Preserve interlock
ordered_store_mtx(lock, state); // Set ownership
interlock_unlock(lock); // Release interlock, enable preemption
-#else
- ordered_store_mtx(lock, state); // Set ownership
- enable_preemption();
-#endif
load_memory_barrier();
turnstile_cleanup();
if (ilk_held) {
state = ordered_load_mtx(lock);
} else {
-#if __SMP__
interlock_lock(lock);
state = ordered_load_mtx(lock);
if (thread != LCK_MTX_STATE_TO_THREAD(state)) {
panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
}
-#else
- disable_preemption_for_thread(thread);
- state = ordered_load_mtx(lock);
- if (state & LCK_ILOCK) {
- panic("lck_mtx_unlock(): Unexpected interlock set (%p)", lock);
- }
- if (thread != LCK_MTX_STATE_TO_THREAD(state)) {
- panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
- }
- state |= LCK_ILOCK;
- ordered_store_mtx(lock, state);
-#endif
if (state & ARM_LCK_WAITERS) {
if (lck_mtx_unlock_wakeup(lock, thread)) {
state = ARM_LCK_WAITERS;
}
state &= ARM_LCK_WAITERS; /* Clear state, retain waiters bit */
unlock:
-#if __SMP__
state |= LCK_ILOCK;
ordered_store_mtx(lock, state);
interlock_unlock(lock);
-#else
- ordered_store_mtx(lock, state);
- enable_preemption();
-#endif
if (cleanup) {
/*
* Do not do any turnstile operations outside of this block.
if (waiters != 0) {
state |= ARM_LCK_WAITERS;
}
-#if __SMP__
state |= LCK_ILOCK;
ordered_store_mtx(lock, state); // Set ownership
interlock_unlock(lock); // Release interlock, enable preemption
-#else
- ordered_store_mtx(lock, state); // Set ownership
- enable_preemption();
-#endif
turnstile_cleanup();
}