/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* any improvements or extensions that they make and grant Carnegie Mellon
* the rights to redistribute these changes.
*/
+
+#define ATOMIC_PRIVATE 1
+#define LOCK_PRIVATE 1
+
#include <mach_ldebug.h>
#include <debug.h>
#include <kern/processor.h>
#include <kern/sched_prim.h>
#include <kern/debug.h>
+#include <machine/atomic.h>
+#include <machine/machine_cpu.h>
#include <string.h>
#define LCK_MTX_LCK_WAIT_CODE 2
#define LCK_MTX_UNLCK_WAKEUP_CODE 3
+#if MACH_LDEBUG
+#define ALIGN_TEST(p,t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
+#else
+#define ALIGN_TEST(p,t) do{}while(0)
+#endif
+
+/* Silence the volatile to _Atomic cast warning */
+#define ATOMIC_CAST(t,p) ((_Atomic t*)(uintptr_t)(p))
+
+/* Enforce program order of loads and stores. */
+#define ordered_load(target, type) \
+ __c11_atomic_load((_Atomic type *)(target), memory_order_relaxed)
+#define ordered_store(target, type, value) \
+ __c11_atomic_store((_Atomic type *)(target), value, memory_order_relaxed)
+
+#define ordered_load_hw(lock) ordered_load(&(lock)->lock_data, uintptr_t)
+#define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, uintptr_t, (value))
+
+#define NOINLINE __attribute__((noinline))
+
+
static queue_head_t lck_grp_queue;
static unsigned int lck_grp_cnt;
lck_grp_t LockCompatGroup;
lck_attr_t LockDefaultLckAttr;
+#if CONFIG_DTRACE && __SMP__
+#if defined (__x86_64__)
+uint64_t dtrace_spin_threshold = 500; // 500ns
+#elif defined(__arm__) || defined(__arm64__)
+uint64_t dtrace_spin_threshold = LOCK_PANIC_TIMEOUT / 1000000; // 500ns
+#endif
+#endif
+
/*
* Routine: lck_mod_init
*/
if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts)))
LcksOpts = 0;
+
+#if (DEVELOPMENT || DEBUG) && defined(__x86_64__)
+ if (!PE_parse_boot_argn("-disable_mtx_chk", &LckDisablePreemptCheck, sizeof (LckDisablePreemptCheck)))
+ LckDisablePreemptCheck = 0;
+#endif /* (DEVELOPMENT || DEBUG) && defined(__x86_64__) */
+
queue_init(&lck_grp_queue);
/*
lck_attr_setdefault(&LockDefaultLckAttr);
lck_mtx_init_ext(&lck_grp_lock, &lck_grp_lock_ext, &LockCompatGroup, &LockDefaultLckAttr);
-
}
/*
void
lck_grp_init(lck_grp_t * grp, const char * grp_name, lck_grp_attr_t * attr)
{
+ /* make sure locking infrastructure has been initialized */
+ assert(lck_grp_cnt > 0);
+
bzero((void *)grp, sizeof(lck_grp_t));
(void)strlcpy(grp->lck_grp_name, grp_name, LCK_GRP_MAX_NAME);
lck_type_t lck_type)
{
unsigned int *lckcnt;
+ int updated;
switch (lck_type) {
case LCK_TYPE_SPIN:
lckcnt = &grp->lck_grp_rwcnt;
break;
default:
- return panic("lck_grp_lckcnt_decr(): invalid lock type: %d\n", lck_type);
+ panic("lck_grp_lckcnt_decr(): invalid lock type: %d\n", lck_type);
+ return;
}
- (void)hw_atomic_sub(lckcnt, 1);
+ updated = (int)hw_atomic_sub(lckcnt, 1);
+ assert(updated >= 0);
}
/*
lck_attr_setdefault(
lck_attr_t *attr)
{
-#if __i386__ || __x86_64__
+#if __arm__ || __arm64__
+ /* <rdar://problem/4404579>: Using LCK_ATTR_DEBUG here causes panic at boot time for arm */
+ attr->lck_attr_val = LCK_ATTR_NONE;
+#elif __i386__ || __x86_64__
#if !DEBUG
if (LcksOpts & enaLkDeb)
attr->lck_attr_val = LCK_ATTR_DEBUG;
kfree(attr, sizeof(lck_attr_t));
}
+/*
+ * Routine: hw_lock_init
+ *
+ * Initialize a hardware lock.
+ */
+void
+hw_lock_init(hw_lock_t lock)
+{
+ ordered_store_hw(lock, 0);
+}
+
+/*
+ * Routine: hw_lock_lock_contended
+ *
+ * Spin until lock is acquired or timeout expires.
+ * timeout is in mach_absolute_time ticks. Called with
+ * preemption disabled.
+ */
+
+#if __SMP__
+static unsigned int NOINLINE
+hw_lock_lock_contended(hw_lock_t lock, uintptr_t data, uint64_t timeout, boolean_t do_panic)
+{
+ uint64_t end = 0;
+ uintptr_t holder = lock->lock_data;
+ int i;
+
+ if (timeout == 0)
+ timeout = LOCK_PANIC_TIMEOUT;
+#if CONFIG_DTRACE
+ uint64_t begin;
+ boolean_t dtrace_enabled = lockstat_probemap[LS_LCK_SPIN_LOCK_SPIN] != 0;
+ if (__improbable(dtrace_enabled))
+ begin = mach_absolute_time();
+#endif
+ for ( ; ; ) {
+ for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
+ cpu_pause();
+#if (!__ARM_ENABLE_WFE_) || (LOCK_PRETEST)
+ holder = ordered_load_hw(lock);
+ if (holder != 0)
+ continue;
+#endif
+ if (atomic_compare_exchange(&lock->lock_data, 0, data,
+ memory_order_acquire_smp, TRUE)) {
+#if CONFIG_DTRACE
+ if (__improbable(dtrace_enabled)) {
+ uint64_t spintime = mach_absolute_time() - begin;
+ if (spintime > dtrace_spin_threshold)
+ LOCKSTAT_RECORD2(LS_LCK_SPIN_LOCK_SPIN, lock, spintime, dtrace_spin_threshold);
+ }
+#endif
+ return 1;
+ }
+ }
+ if (end == 0) {
+ end = ml_get_timebase() + timeout;
+ }
+ else if (ml_get_timebase() >= end)
+ break;
+ }
+ if (do_panic) {
+ // Capture the actual time spent blocked, which may be higher than the timeout
+ // if a misbehaving interrupt stole this thread's CPU time.
+ panic("Spinlock timeout after %llu ticks, %p = %lx",
+ (ml_get_timebase() - end + timeout), lock, holder);
+ }
+ return 0;
+}
+#endif // __SMP__
+
+/*
+ * Routine: hw_lock_lock
+ *
+ * Acquire lock, spinning until it becomes available,
+ * return with preemption disabled.
+ */
+void
+hw_lock_lock(hw_lock_t lock)
+{
+ thread_t thread;
+ uintptr_t state;
+
+ thread = current_thread();
+ disable_preemption_for_thread(thread);
+ state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
+#if __SMP__
+
+#if LOCK_PRETEST
+ if (ordered_load_hw(lock))
+ goto contended;
+#endif // LOCK_PRETEST
+ if (atomic_compare_exchange(&lock->lock_data, 0, state,
+ memory_order_acquire_smp, TRUE)) {
+ goto end;
+ }
+#if LOCK_PRETEST
+contended:
+#endif // LOCK_PRETEST
+ hw_lock_lock_contended(lock, state, 0, TRUE);
+end:
+#else // __SMP__
+ if (lock->lock_data)
+ panic("Spinlock held %p", lock);
+ lock->lock_data = state;
+#endif // __SMP__
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0);
+#endif
+ return;
+}
+
+/*
+ * Routine: hw_lock_to
+ *
+ * Acquire lock, spinning until it becomes available or timeout.
+ * Timeout is in mach_absolute_time ticks, return with
+ * preemption disabled.
+ */
+unsigned int
+hw_lock_to(hw_lock_t lock, uint64_t timeout)
+{
+ thread_t thread;
+ uintptr_t state;
+ unsigned int success = 0;
+
+ thread = current_thread();
+ disable_preemption_for_thread(thread);
+ state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
+#if __SMP__
+
+#if LOCK_PRETEST
+ if (ordered_load_hw(lock))
+ goto contended;
+#endif // LOCK_PRETEST
+ if (atomic_compare_exchange(&lock->lock_data, 0, state,
+ memory_order_acquire_smp, TRUE)) {
+ success = 1;
+ goto end;
+ }
+#if LOCK_PRETEST
+contended:
+#endif // LOCK_PRETEST
+ success = hw_lock_lock_contended(lock, state, timeout, FALSE);
+end:
+#else // __SMP__
+ (void)timeout;
+ if (ordered_load_hw(lock) == 0) {
+ ordered_store_hw(lock, state);
+ success = 1;
+ }
+#endif // __SMP__
+#if CONFIG_DTRACE
+ if (success)
+ LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0);
+#endif
+ return success;
+}
+
+/*
+ * Routine: hw_lock_try
+ *
+ * returns with preemption disabled on success.
+ */
+unsigned int
+hw_lock_try(hw_lock_t lock)
+{
+ thread_t thread = current_thread();
+ int success = 0;
+#if LOCK_TRY_DISABLE_INT
+ long intmask;
+
+ intmask = disable_interrupts();
+#else
+ disable_preemption_for_thread(thread);
+#endif // LOCK_TRY_DISABLE_INT
+
+#if __SMP__
+#if LOCK_PRETEST
+ if (ordered_load_hw(lock))
+ goto failed;
+#endif // LOCK_PRETEST
+ success = atomic_compare_exchange(&lock->lock_data, 0, LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK,
+ memory_order_acquire_smp, FALSE);
+#else
+ if (lock->lock_data == 0) {
+ lock->lock_data = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
+ success = 1;
+ }
+#endif // __SMP__
+
+#if LOCK_TRY_DISABLE_INT
+ if (success)
+ disable_preemption_for_thread(thread);
+#if LOCK_PRETEST
+failed:
+#endif // LOCK_PRETEST
+ restore_interrupts(intmask);
+#else
+#if LOCK_PRETEST
+failed:
+#endif // LOCK_PRETEST
+ if (!success)
+ enable_preemption();
+#endif // LOCK_TRY_DISABLE_INT
+#if CONFIG_DTRACE
+ if (success)
+ LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0);
+#endif
+ return success;
+}
+
+/*
+ * Routine: hw_lock_unlock
+ *
+ * Unconditionally release lock, release preemption level.
+ */
+void
+hw_lock_unlock(hw_lock_t lock)
+{
+ __c11_atomic_store((_Atomic uintptr_t *)&lock->lock_data, 0, memory_order_release_smp);
+#if __arm__ || __arm64__
+ // ARM tests are only for open-source exclusion
+ set_event();
+#endif // __arm__ || __arm64__
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
+#endif /* CONFIG_DTRACE */
+ enable_preemption();
+}
+
+/*
+ * Routine hw_lock_held, doesn't change preemption state.
+ * N.B. Racy, of course.
+ */
+unsigned int
+hw_lock_held(hw_lock_t lock)
+{
+ return (ordered_load_hw(lock) != 0);
+}
/*
* Routine: lck_spin_sleep
if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
if ((lck_sleep_action & LCK_SLEEP_SPIN))
lck_mtx_lock_spin(lck);
+ else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS))
+ lck_mtx_lock_spin_always(lck);
else
lck_mtx_lock(lck);
}
priority = MIN(priority, MAXPRI_PROMOTE);
thread_lock(holder);
- if (mutex->lck_mtx_pri == 0)
+ if (mutex->lck_mtx_pri == 0) {
holder->promotions++;
- holder->sched_flags |= TH_SFLAG_PROMOTED;
+ holder->sched_flags |= TH_SFLAG_PROMOTED;
+ }
+
if (mutex->lck_mtx_pri < priority && holder->sched_pri < priority) {
KERNEL_DEBUG_CONSTANT(
MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
mutex->lck_mtx_waiters++;
}
+ thread_set_pending_block_hint(self, kThreadWaitKernelMutex);
assert_wait(LCK_MTX_EVENT(mutex), THREAD_UNINT);
lck_mtx_ilk_unlock(mutex);
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
#if CONFIG_DTRACE
/*
- * Record the Dtrace lockstat probe for blocking, block time
+ * Record the DTrace lockstat probe for blocking, block time
* measured from when we were entered.
*/
if (sleep_start) {
/* Thread still has a mutex promotion */
} else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE,
- thread->sched_pri, DEPRESSPRI, 0, 0, 0);
-
+ (uintptr_t)thread_tid(thread), thread->sched_pri, DEPRESSPRI, 0, 0);
+
set_sched_pri(thread, DEPRESSPRI);
} else {
KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE,
- thread->sched_pri, thread->base_pri, 0, 0, 0);
-
+ (uintptr_t)thread_tid(thread), thread->sched_pri, thread->base_pri, 0, 0);
+
thread_recompute_sched_pri(thread, FALSE);
}
}
splx(s);
}
+/*
+ * Callout from context switch if the thread goes
+ * off core with a positive rwlock_count
+ *
+ * Called at splsched with the thread locked
+ */
+void
+lck_rw_set_promotion_locked(thread_t thread)
+{
+ if (LcksOpts & disLkRWPrio)
+ return;
+
+ integer_t priority;
+
+ priority = thread->sched_pri;
+
+ if (priority < thread->base_pri)
+ priority = thread->base_pri;
+ if (priority < BASEPRI_BACKGROUND)
+ priority = BASEPRI_BACKGROUND;
+
+ if ((thread->sched_pri < priority) ||
+ !(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
+ KERNEL_DEBUG_CONSTANT(
+ MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_PROMOTE) | DBG_FUNC_NONE,
+ (uintptr_t)thread_tid(thread), thread->sched_pri,
+ thread->base_pri, priority, 0);
+
+ thread->sched_flags |= TH_SFLAG_RW_PROMOTED;
+
+ if (thread->sched_pri < priority)
+ set_sched_pri(thread, priority);
+ }
+}
+
kern_return_t
host_lockgroup_info(
host_t host,
lockgroup_info_t *lockgroup_info;
vm_offset_t lockgroup_info_addr;
vm_size_t lockgroup_info_size;
+ vm_size_t lockgroup_info_vmsize;
lck_grp_t *lck_grp;
unsigned int i;
- vm_size_t used;
vm_map_copy_t copy;
kern_return_t kr;
lck_mtx_lock(&lck_grp_lock);
- lockgroup_info_size = round_page(lck_grp_cnt * sizeof *lockgroup_info);
+ lockgroup_info_size = lck_grp_cnt * sizeof(*lockgroup_info);
+ lockgroup_info_vmsize = round_page(lockgroup_info_size);
kr = kmem_alloc_pageable(ipc_kernel_map,
- &lockgroup_info_addr, lockgroup_info_size, VM_KERN_MEMORY_IPC);
+ &lockgroup_info_addr, lockgroup_info_vmsize, VM_KERN_MEMORY_IPC);
if (kr != KERN_SUCCESS) {
lck_mtx_unlock(&lck_grp_lock);
return(kr);
*lockgroup_infoCntp = lck_grp_cnt;
lck_mtx_unlock(&lck_grp_lock);
- used = (*lockgroup_infoCntp) * sizeof *lockgroup_info;
-
- if (used != lockgroup_info_size)
- bzero((char *) lockgroup_info, lockgroup_info_size - used);
+ if (lockgroup_info_size != lockgroup_info_vmsize)
+ bzero((char *)lockgroup_info, lockgroup_info_vmsize - lockgroup_info_size);
kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)lockgroup_info_addr,
(vm_map_size_t)lockgroup_info_size, TRUE, ©);
return(KERN_SUCCESS);
}
+/*
+ * Atomic primitives, prototyped in kern/simple_lock.h
+ * Noret versions are more efficient on some architectures
+ */
+
+uint32_t
+hw_atomic_add(volatile uint32_t *dest, uint32_t delt)
+{
+ ALIGN_TEST(dest,uint32_t);
+ return __c11_atomic_fetch_add(ATOMIC_CAST(uint32_t,dest), delt, memory_order_relaxed) + delt;
+}
+
+uint32_t
+hw_atomic_sub(volatile uint32_t *dest, uint32_t delt)
+{
+ ALIGN_TEST(dest,uint32_t);
+ return __c11_atomic_fetch_sub(ATOMIC_CAST(uint32_t,dest), delt, memory_order_relaxed) - delt;
+}
+
+uint32_t
+hw_atomic_or(volatile uint32_t *dest, uint32_t mask)
+{
+ ALIGN_TEST(dest,uint32_t);
+ return __c11_atomic_fetch_or(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed) | mask;
+}
+
+void
+hw_atomic_or_noret(volatile uint32_t *dest, uint32_t mask)
+{
+ ALIGN_TEST(dest,uint32_t);
+ __c11_atomic_fetch_or(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed);
+}
+
+uint32_t
+hw_atomic_and(volatile uint32_t *dest, uint32_t mask)
+{
+ ALIGN_TEST(dest,uint32_t);
+ return __c11_atomic_fetch_and(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed) & mask;
+}
+
+void
+hw_atomic_and_noret(volatile uint32_t *dest, uint32_t mask)
+{
+ ALIGN_TEST(dest,uint32_t);
+ __c11_atomic_fetch_and(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed);
+}
+
+uint32_t
+hw_compare_and_store(uint32_t oldval, uint32_t newval, volatile uint32_t *dest)
+{
+ ALIGN_TEST(dest,uint32_t);
+ return __c11_atomic_compare_exchange_strong(ATOMIC_CAST(uint32_t,dest), &oldval, newval,
+ memory_order_acq_rel_smp, memory_order_relaxed);
+}
+