/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* Locking primitives implementation
*/
-#include <mach_kdb.h>
#include <mach_ldebug.h>
-#include <kern/lock.h>
#include <kern/locks.h>
#include <kern/kalloc.h>
#include <kern/misc_protos.h>
#include <kern/debug.h>
#include <string.h>
-#if MACH_KDB
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_print.h>
-#endif /* MACH_KDB */
#include <i386/machine_routines.h> /* machine_timeout_suspended() */
#include <machine/machine_cpu.h>
#include <i386/mp.h>
/* Forwards */
-#if MACH_KDB
-void db_print_simple_lock(
- simple_lock_t addr);
-#endif /* MACH_KDB */
-
-
#if USLOCK_DEBUG
/*
* Perform simple lock checks.
decl_simple_lock_data(extern , panic_lock)
#endif /* USLOCK_DEBUG */
+extern unsigned int not_in_kdp;
/*
* We often want to know the addresses of the callers
lck_rw_t *lck,
int prior_lock_state);
+void lck_rw_clear_promotions_x86(thread_t thread);
+
/*
* Routine: lck_spin_alloc_init
*/
return((boolean_t)usimple_lock_try((usimple_lock_t) lck));
}
+/*
+ * Routine: lck_spin_is_acquired
+ * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
+ * Returns: TRUE if lock is acquired.
+ */
+boolean_t
+lck_spin_is_acquired(lck_spin_t *lck) {
+ if (not_in_kdp) {
+ panic("panic: spinlock acquired check done outside of kernel debugger");
+ }
+ return (lck->interlock != 0)? TRUE : FALSE;
+}
+
/*
* Initialize a usimple_lock.
*
volatile uint32_t spinlock_owner_cpu = ~0;
volatile usimple_lock_t spinlock_timed_out;
-static uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
+uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
uint64_t deadline;
uint32_t i;
if (uslock_acquired == FALSE) {
uint32_t lock_cpu;
+ uintptr_t lowner = (uintptr_t)l->interlock.lock_data;
spinlock_timed_out = l;
- lock_cpu = spinlock_timeout_NMI((uintptr_t)l->interlock.lock_data);
- panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x", l, (uintptr_t)l->interlock.lock_data, current_thread(), lock_cpu);
+ lock_cpu = spinlock_timeout_NMI(lowner);
+ panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx", l, lowner, current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data);
}
}
USLDBG(usld_lock_post(l, pc));
if (l == USIMPLE_LOCK_NULL)
panic("%s: null lock pointer", caller);
if (l->lock_type != USLOCK_TAG)
- panic("%s: 0x%p is not a usimple lock", caller, l);
+ panic("%s: %p is not a usimple lock, 0x%x", caller, l, l->lock_type);
if (!(l->debug.state & USLOCK_INIT))
- panic("%s: %p is not an initialized lock",
- caller, l);
+ panic("%s: %p is not an initialized lock, 0x%x", caller, l, l->debug.state);
return USLOCK_CHECKING(l);
}
#endif /* USLOCK_DEBUG */
-/*
- * Routine: lock_alloc
- * Function:
- * Allocate a lock for external users who cannot
- * hard-code the structure definition into their
- * objects.
- * For now just use kalloc, but a zone is probably
- * warranted.
- */
-lock_t *
-lock_alloc(
- boolean_t can_sleep,
- unsigned short tag,
- unsigned short tag1)
-{
- lock_t *l;
-
- if ((l = (lock_t *)kalloc(sizeof(lock_t))) != 0)
- lock_init(l, can_sleep, tag, tag1);
- return(l);
-}
-
-/*
- * Routine: lock_free
- * Function:
- * Free a lock allocated for external users.
- * For now just use kfree, but a zone is probably
- * warranted.
- */
-void
-lock_free(
- lock_t *l)
-{
- kfree(l, sizeof(lock_t));
-}
-
-
-/*
- * Routine: lock_init
- * Function:
- * Initialize a lock; required before use.
- * Note that clients declare the "struct lock"
- * variables and then initialize them, rather
- * than getting a new one from this module.
- */
-void
-lock_init(
- lock_t *l,
- boolean_t can_sleep,
- __unused unsigned short tag,
- __unused unsigned short tag1)
-{
- hw_lock_byte_init(&l->lck_rw_interlock);
- l->lck_rw_want_write = FALSE;
- l->lck_rw_want_upgrade = FALSE;
- l->lck_rw_shared_count = 0;
- l->lck_rw_can_sleep = can_sleep;
- l->lck_rw_tag = tag;
- l->lck_rw_priv_excl = 1;
- l->lck_r_waiting = l->lck_w_waiting = 0;
-}
-
-
-/*
- * Sleep locks. These use the same data structure and algorithm
- * as the spin locks, but the process sleeps while it is waiting
- * for the lock. These work on uniprocessor systems.
- */
-
-#define DECREMENTER_TIMEOUT 1000000
-
-void
-lock_write(
- register lock_t * l)
-{
- lck_rw_lock_exclusive(l);
-}
-
-void
-lock_done(
- register lock_t * l)
-{
- (void) lck_rw_done(l);
-}
-
-void
-lock_read(
- register lock_t * l)
-{
- lck_rw_lock_shared(l);
-}
-
-
-/*
- * Routine: lock_read_to_write
- * Function:
- * Improves a read-only lock to one with
- * write permission. If another reader has
- * already requested an upgrade to a write lock,
- * no lock is held upon return.
- *
- * Returns FALSE if the upgrade *failed*.
- */
-
-boolean_t
-lock_read_to_write(
- register lock_t * l)
-{
- return lck_rw_lock_shared_to_exclusive(l);
-}
-
-void
-lock_write_to_read(
- register lock_t * l)
-{
- lck_rw_lock_exclusive_to_shared(l);
-}
-
-
-
/*
* Routine: lck_rw_alloc_init
*/
{
if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
return;
+#if MACH_LDEBUG
+ lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
+#endif
lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
lck_grp_deallocate(grp);
{
lck_rw_t *fake_lck;
lck_rw_type_t lock_type;
+ thread_t thread;
+ uint32_t rwlock_count;
/*
* prior_lock state is a snapshot of the 1st word of the
else
lock_type = LCK_RW_TYPE_EXCLUSIVE;
+ /* Check if dropping the lock means that we need to unpromote */
+ thread = current_thread();
+ rwlock_count = thread->rwlock_count--;
+#if MACH_LDEBUG
+ if (rwlock_count == 0) {
+ panic("rw lock count underflow for thread %p", thread);
+ }
+#endif
+ if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
+ /* sched_flags checked without lock, but will be rechecked while clearing */
+ lck_rw_clear_promotion(thread);
+ }
+
#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
#endif
int prior_lock_state)
{
lck_rw_t *fake_lck;
+ thread_t thread = current_thread();
+ uint32_t rwlock_count;
+
+ /* Check if dropping the lock means that we need to unpromote */
+ rwlock_count = thread->rwlock_count--;
+#if MACH_LDEBUG
+ if (rwlock_count == 0) {
+ panic("rw lock count underflow for thread %p", thread);
+ }
+#endif
+ if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
+ /* sched_flags checked without lock, but will be rechecked while clearing */
+ lck_rw_clear_promotion(thread);
+ }
/*
* prior_lock state is a snapshot of the 1st word of the
return;
}
break;
+ case LCK_RW_ASSERT_NOTHELD:
+ if (!(lck->lck_rw_want_write ||
+ lck->lck_rw_want_upgrade ||
+ lck->lck_rw_shared_count != 0)) {
+ return;
+ }
+ break;
default:
break;
}
- panic("rw lock (%p) not held (mode=%u), first word %08x\n", lck, type, *(uint32_t *)lck);
+ panic("rw lock (%p)%s held (mode=%u), first word %08x\n", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type, *(uint32_t *)lck);
+}
+
+/* On return to userspace, this routine is called if the rwlock_count is somehow imbalanced */
+void
+lck_rw_clear_promotions_x86(thread_t thread)
+{
+#if MACH_LDEBUG
+ /* It's fatal to leave a RW lock locked and return to userspace */
+ panic("%u rw lock(s) held on return to userspace for thread %p", thread->rwlock_count, thread);
+#else
+ /* Paper over the issue */
+ thread->rwlock_count = 0;
+ lck_rw_clear_promotion(thread);
+#endif
}
+
#ifdef MUTEX_ZONE
extern zone_t lck_mtx_zone;
#endif
lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
lck->lck_mtx.lck_mtx_is_ext = 1;
-#if defined(__x86_64__)
lck->lck_mtx.lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
-#endif
}
/*
lck->lck_mtx_owner = 0;
lck->lck_mtx_state = 0;
}
-#if defined(__x86_64__)
lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
-#endif
lck_grp_reference(grp);
lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
}
lck->lck_mtx_owner = 0;
lck->lck_mtx_state = 0;
}
-#if defined(__x86_64__)
lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
-#endif
lck_grp_reference(grp);
lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
return;
+#if MACH_LDEBUG
+ lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
+#endif
lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
lck_mtx_lock_mark_destroyed(lck);
mutex, fake_lck.lck_mtx_promoted, fake_lck.lck_mtx_waiters, fake_lck.lck_mtx_pri, 0);
if (__probable(fake_lck.lck_mtx_waiters)) {
-
if (fake_lck.lck_mtx_waiters > 1)
thread_wakeup_one_with_pri((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)), fake_lck.lck_mtx_pri);
else
thread->sched_flags &= ~TH_SFLAG_PROMOTED;
- if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
+ if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
+ /* Thread still has a RW lock promotion */
+ } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
thread->sched_pri, DEPRESSPRI, 0, mutex, 0);
s = splsched();
thread_lock(thread);
- if (thread->sched_pri < priority)
+ if (thread->sched_pri < priority) {
+ /* Do not promote past promotion ceiling */
+ assert(priority <= MAXPRI_PROMOTE);
set_sched_pri(thread, priority);
-
+ }
if (mutex->lck_mtx_promoted == 0) {
mutex->lck_mtx_promoted = 1;
if (priority < BASEPRI_DEFAULT)
priority = BASEPRI_DEFAULT;
+ /* Do not promote past promotion ceiling */
+ priority = MIN(priority, MAXPRI_PROMOTE);
+
if (mutex->lck_mtx_waiters == 0 || priority > mutex->lck_mtx_pri)
mutex->lck_mtx_pri = priority;
mutex->lck_mtx_waiters++;
if ( (holder = (thread_t)mutex->lck_mtx_owner) &&
holder->sched_pri < mutex->lck_mtx_pri ) {
-
s = splsched();
thread_lock(holder);
+ /* holder priority may have been bumped by another thread
+ * before thread_lock was taken
+ */
if (holder->sched_pri < mutex->lck_mtx_pri) {
KERNEL_DEBUG_CONSTANT(
MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
holder->sched_pri, priority, thread_tid(holder), mutex, 0);
-
+ /* Assert that we're not altering the priority of a
+ * thread above the MAXPRI_PROMOTE band
+ */
+ assert(holder->sched_pri < MAXPRI_PROMOTE);
set_sched_pri(holder, priority);
if (mutex->lck_mtx_promoted == 0) {
}
#endif
}
-
-
-#if MACH_KDB
-
-void
-db_show_one_lock(
- lock_t *lock)
-{
- db_printf("Read_count = 0x%x, %swant_upgrade, %swant_write, ",
- lock->lck_rw_shared_count,
- lock->lck_rw_want_upgrade ? "" : "!",
- lock->lck_rw_want_write ? "" : "!");
- db_printf("%swaiting, %scan_sleep\n",
- (lock->lck_r_waiting || lock->lck_w_waiting) ? "" : "!",
- lock->lck_rw_can_sleep ? "" : "!");
- db_printf("Interlock:\n");
- db_show_one_simple_lock((db_expr_t) ((vm_offset_t)simple_lock_addr(lock->lck_rw_interlock)),
- TRUE, (db_expr_t)0, (char *)0);
-}
-
-/*
- * Routines to print out simple_locks and mutexes in a nicely-formatted
- * fashion.
- */
-
-const char *simple_lock_labels = "ENTRY ILK THREAD DURATION CALLER";
-
-void
-db_show_one_simple_lock (
- db_expr_t addr,
- boolean_t have_addr,
- __unused db_expr_t count,
- __unused char * modif)
-{
- simple_lock_t saddr = (simple_lock_t) ((vm_offset_t) addr);
-
- if (saddr == (simple_lock_t)0 || !have_addr) {
- db_error ("No simple_lock\n");
- }
-#if USLOCK_DEBUG
- else if (saddr->lock_type != USLOCK_TAG)
- db_error ("Not a simple_lock\n");
-#endif /* USLOCK_DEBUG */
-
- db_printf ("%s\n", simple_lock_labels);
- db_print_simple_lock (saddr);
-}
-
-void
-db_print_simple_lock (
- simple_lock_t addr)
-{
-
- db_printf ("%08x %3d", addr, *hw_lock_addr(addr->interlock));
-#if USLOCK_DEBUG
- db_printf (" %08x", addr->debug.lock_thread);
- db_printf (" %08x ", addr->debug.duration[1]);
- db_printsym ((int)addr->debug.lock_pc, DB_STGY_ANY);
-#endif /* USLOCK_DEBUG */
- db_printf ("\n");
-}
-
-#endif /* MACH_KDB */