]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/arm/locks_arm.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / arm / locks_arm.c
index b43f665db1d83b0de86aacc1309138557257efee..8246489dc7d529e63b5fe2f9ae39fd0aee5dd8ce 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2018 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 /*
  * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
  * Mellon University All Rights Reserved.
- * 
+ *
  * Permission to use, copy, modify and distribute this software and its
  * documentation is hereby granted, provided that both the copyright notice
  * and this permission notice appear in all copies of the software,
  * derivative works or modified versions, and any portions thereof, and that
  * both notices appear in supporting documentation.
- * 
+ *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
  * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
  * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
+ *
  * Carnegie Mellon requests users of this software to return to
- * 
+ *
  * Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  * School of Computer Science Carnegie Mellon University Pittsburgh PA
  * 15213-3890
- * 
+ *
  * any improvements or extensions that they make and grant Carnegie Mellon the
  * rights to redistribute these changes.
  */
  *     Locking primitives implementation
  */
 
-#define ATOMIC_PRIVATE 1
 #define LOCK_PRIVATE 1
 
 #include <mach_ldebug.h>
 
-#include <kern/kalloc.h>
+#include <kern/zalloc.h>
+#include <kern/lock_stat.h>
 #include <kern/locks.h>
 #include <kern/misc_protos.h>
 #include <kern/thread.h>
 #include <kern/processor.h>
 #include <kern/sched_prim.h>
-#include <kern/xpr.h>
 #include <kern/debug.h>
 #include <kern/kcdata.h>
 #include <string.h>
+#include <arm/cpu_internal.h>
+#include <os/hash.h>
+#include <arm/cpu_data.h>
 
 #include <arm/cpu_data_internal.h>
 #include <arm/proc_reg.h>
 
 #include <sys/kdebug.h>
 
-/*
- * We need only enough declarations from the BSD-side to be able to
- * test if our probe is active, and to call __dtrace_probe().  Setting
- * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
- */
-#if    CONFIG_DTRACE
-#define NEED_DTRACE_DEFS
-#include <../bsd/sys/lockstat.h>
-
-#define DTRACE_RW_SHARED       0x0     //reader
-#define DTRACE_RW_EXCL         0x1     //writer
-#define DTRACE_NO_FLAG         0x0     //not applicable
-
-#endif /* CONFIG_DTRACE */
+#if CONFIG_DTRACE
+#define DTRACE_RW_SHARED        0x0     //reader
+#define DTRACE_RW_EXCL          0x1     //writer
+#define DTRACE_NO_FLAG          0x0     //not applicable
+#endif  /* CONFIG_DTRACE */
 
-#define        LCK_RW_LCK_EXCLUSIVE_CODE       0x100
-#define        LCK_RW_LCK_EXCLUSIVE1_CODE      0x101
-#define        LCK_RW_LCK_SHARED_CODE          0x102
-#define        LCK_RW_LCK_SH_TO_EX_CODE        0x103
-#define        LCK_RW_LCK_SH_TO_EX1_CODE       0x104
-#define        LCK_RW_LCK_EX_TO_SH_CODE        0x105
+#define LCK_RW_LCK_EXCLUSIVE_CODE       0x100
+#define LCK_RW_LCK_EXCLUSIVE1_CODE      0x101
+#define LCK_RW_LCK_SHARED_CODE          0x102
+#define LCK_RW_LCK_SH_TO_EX_CODE        0x103
+#define LCK_RW_LCK_SH_TO_EX1_CODE       0x104
+#define LCK_RW_LCK_EX_TO_SH_CODE        0x105
 
 
-#define        ANY_LOCK_DEBUG  (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
+#define ANY_LOCK_DEBUG  (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
 
 // Panic in tests that check lock usage correctness
 // These are undesirable when in a panic or a debugger is runnning.
 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
 
-unsigned int    LcksOpts = 0;
+#define ADAPTIVE_SPIN_ENABLE 0x1
+
+int lck_mtx_adaptive_spin_mode = ADAPTIVE_SPIN_ENABLE;
+
+#define SPINWAIT_OWNER_CHECK_COUNT 4
 
-#if CONFIG_DTRACE && __SMP__
+typedef enum {
+       SPINWAIT_ACQUIRED,     /* Got the lock. */
+       SPINWAIT_INTERLOCK,    /* Got the interlock, no owner, but caller must finish acquiring the lock. */
+       SPINWAIT_DID_SPIN_HIGH_THR, /* Got the interlock, spun, but failed to get the lock. */
+       SPINWAIT_DID_SPIN_OWNER_NOT_CORE, /* Got the interlock, spun, but failed to get the lock. */
+       SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION, /* Got the interlock, spun, but failed to get the lock. */
+       SPINWAIT_DID_SPIN_SLIDING_THR,/* Got the interlock, spun, but failed to get the lock. */
+       SPINWAIT_DID_NOT_SPIN, /* Got the interlock, did not spin. */
+} spinwait_result_t;
+
+#if CONFIG_DTRACE
 extern uint64_t dtrace_spin_threshold;
 #endif
 
 /* Forwards */
 
-
-#if    USLOCK_DEBUG
-/*
- *     Perform simple lock checks.
- */
-int             uslock_check = 1;
-int             max_lock_loops = 100000000;
-decl_simple_lock_data(extern, printf_lock)
-decl_simple_lock_data(extern, panic_lock)
-#endif                         /* USLOCK_DEBUG */
-
 extern unsigned int not_in_kdp;
 
 /*
@@ -139,75 +135,60 @@ extern unsigned int not_in_kdp;
  *     is only used for debugging and statistics.
  */
 typedef void   *pc_t;
-#define        INVALID_PC      ((void *) VM_MAX_KERNEL_ADDRESS)
-#define        INVALID_THREAD  ((void *) VM_MAX_KERNEL_ADDRESS)
+#define INVALID_PC      ((void *) VM_MAX_KERNEL_ADDRESS)
+#define INVALID_THREAD  ((void *) VM_MAX_KERNEL_ADDRESS)
 
-#ifdef lint
+#ifdef  lint
 /*
  *     Eliminate lint complaints about unused local pc variables.
  */
-#define        OBTAIN_PC(pc,l) ++pc
-#else                          /* lint */
-#define        OBTAIN_PC(pc,l)
-#endif                         /* lint */
+#define OBTAIN_PC(pc, l) ++pc
+#else                           /* lint */
+#define OBTAIN_PC(pc, l)
+#endif                          /* lint */
 
 
 /*
  *     Portable lock package implementation of usimple_locks.
  */
 
-#if    USLOCK_DEBUG
-#define        USLDBG(stmt)    stmt
-       void            usld_lock_init(usimple_lock_t, unsigned short);
-       void            usld_lock_pre(usimple_lock_t, pc_t);
-       void            usld_lock_post(usimple_lock_t, pc_t);
-       void            usld_unlock(usimple_lock_t, pc_t);
-       void            usld_lock_try_pre(usimple_lock_t, pc_t);
-       void            usld_lock_try_post(usimple_lock_t, pc_t);
-       int             usld_lock_common_checks(usimple_lock_t, const char *);
-#else                          /* USLOCK_DEBUG */
-#define        USLDBG(stmt)
-#endif                         /* USLOCK_DEBUG */
-
 /*
  * Owner thread pointer when lock held in spin mode
  */
 #define LCK_MTX_SPIN_TAG  0xfffffff0
 
 
-#define interlock_lock(lock)   hw_lock_bit    ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
-#define interlock_try(lock)            hw_lock_bit_try((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
-#define interlock_unlock(lock) hw_unlock_bit  ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
-#define lck_rw_ilk_lock(lock)  hw_lock_bit  ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
-#define lck_rw_ilk_unlock(lock)        hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
+#define interlock_lock(lock)    hw_lock_bit    ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
+#define interlock_try(lock)             hw_lock_bit_try((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
+#define interlock_unlock(lock)  hw_unlock_bit  ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
+#define lck_rw_ilk_lock(lock)   hw_lock_bit  ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT, LCK_GRP_NULL)
+#define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
 
-#define memory_barrier()       __c11_atomic_thread_fence(memory_order_acq_rel_smp)
-#define load_memory_barrier()  __c11_atomic_thread_fence(memory_order_acquire_smp)
-#define store_memory_barrier() __c11_atomic_thread_fence(memory_order_release_smp)
+#define load_memory_barrier()   os_atomic_thread_fence(acquire)
 
 // Enforce program order of loads and stores.
-#define ordered_load(target, type) \
-               __c11_atomic_load((_Atomic type *)(target), memory_order_relaxed)
-#define ordered_store(target, type, value) \
-               __c11_atomic_store((_Atomic type *)(target), value, memory_order_relaxed)
-
-#define ordered_load_mtx(lock)                 ordered_load(&(lock)->lck_mtx_data, uintptr_t)
-#define ordered_store_mtx(lock, value) ordered_store(&(lock)->lck_mtx_data, uintptr_t, (value))
-#define ordered_load_rw(lock)                  ordered_load(&(lock)->lck_rw_data, uint32_t)
-#define ordered_store_rw(lock, value)  ordered_store(&(lock)->lck_rw_data, uint32_t, (value))
-#define ordered_load_rw_owner(lock)            ordered_load(&(lock)->lck_rw_owner, thread_t)
-#define ordered_store_rw_owner(lock, value)    ordered_store(&(lock)->lck_rw_owner, thread_t, (value))
-#define ordered_load_hw(lock)                  ordered_load(&(lock)->lock_data, uintptr_t)
-#define ordered_store_hw(lock, value)  ordered_store(&(lock)->lock_data, uintptr_t, (value))
-#define ordered_load_bit(lock)                 ordered_load((lock), uint32_t)
-#define ordered_store_bit(lock, value) ordered_store((lock), uint32_t, (value))
+#define ordered_load(target) \
+               os_atomic_load(target, compiler_acq_rel)
+#define ordered_store(target, value) \
+               os_atomic_store(target, value, compiler_acq_rel)
+
+#define ordered_load_mtx(lock)                  ordered_load(&(lock)->lck_mtx_data)
+#define ordered_store_mtx(lock, value)  ordered_store(&(lock)->lck_mtx_data, (value))
+#define ordered_load_rw(lock)                   ordered_load(&(lock)->lck_rw_data)
+#define ordered_store_rw(lock, value)   ordered_store(&(lock)->lck_rw_data, (value))
+#define ordered_load_rw_owner(lock)             ordered_load(&(lock)->lck_rw_owner)
+#define ordered_store_rw_owner(lock, value)     ordered_store(&(lock)->lck_rw_owner, (value))
+#define ordered_load_hw(lock)                   ordered_load(&(lock)->lock_data)
+#define ordered_store_hw(lock, value)   ordered_store(&(lock)->lock_data, (value))
+#define ordered_load_bit(lock)                  ordered_load((lock))
+#define ordered_store_bit(lock, value)  ordered_store((lock), (value))
 
 
 // Prevent the compiler from reordering memory operations around this
-#define compiler_memory_fence()        __asm__ volatile ("" ::: "memory")
+#define compiler_memory_fence() __asm__ volatile ("" ::: "memory")
 
-#define LOCK_PANIC_TIMEOUT     0xc00000
-#define NOINLINE               __attribute__((noinline))
+#define LOCK_PANIC_TIMEOUT      0xc00000
+#define NOINLINE                __attribute__((noinline))
 
 
 #if __arm__
@@ -218,10 +199,22 @@ typedef void   *pc_t;
 
 
 #if __arm__
-#define enable_fiq()           __asm__ volatile ("cpsie  f" ::: "memory");
-#define enable_interrupts()    __asm__ volatile ("cpsie if" ::: "memory");
+#define enable_fiq()            __asm__ volatile ("cpsie  f" ::: "memory");
+#define enable_interrupts()     __asm__ volatile ("cpsie if" ::: "memory");
 #endif
 
+ZONE_VIEW_DEFINE(ZV_LCK_SPIN, "lck_spin",
+    KHEAP_ID_DEFAULT, sizeof(lck_spin_t));
+
+ZONE_VIEW_DEFINE(ZV_LCK_MTX, "lck_mtx",
+    KHEAP_ID_DEFAULT, sizeof(lck_mtx_t));
+
+ZONE_VIEW_DEFINE(ZV_LCK_MTX_EXT, "lck_mtx_ext",
+    KHEAP_ID_DEFAULT, sizeof(lck_mtx_ext_t));
+
+ZONE_VIEW_DEFINE(ZV_LCK_RW, "lck_rw",
+    KHEAP_ID_DEFAULT, sizeof(lck_rw_t));
+
 /*
  * Forward declarations
  */
@@ -244,11 +237,56 @@ static boolean_t lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait);
  * atomic_exchange_complete() - conclude an exchange
  * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
  */
+__unused static uint32_t
+load_exclusive32(uint32_t *target, enum memory_order ord)
+{
+       uint32_t        value;
+
+#if __arm__
+       if (_os_atomic_mo_has_release(ord)) {
+               // Pre-load release barrier
+               atomic_thread_fence(memory_order_release);
+       }
+       value = __builtin_arm_ldrex(target);
+#else
+       if (_os_atomic_mo_has_acquire(ord)) {
+               value = __builtin_arm_ldaex(target);    // ldaxr
+       } else {
+               value = __builtin_arm_ldrex(target);    // ldxr
+       }
+#endif  // __arm__
+       return value;
+}
+
+__unused static boolean_t
+store_exclusive32(uint32_t *target, uint32_t value, enum memory_order ord)
+{
+       boolean_t err;
+
+#if __arm__
+       err = __builtin_arm_strex(value, target);
+       if (_os_atomic_mo_has_acquire(ord)) {
+               // Post-store acquire barrier
+               atomic_thread_fence(memory_order_acquire);
+       }
+#else
+       if (_os_atomic_mo_has_release(ord)) {
+               err = __builtin_arm_stlex(value, target);       // stlxr
+       } else {
+               err = __builtin_arm_strex(value, target);       // stxr
+       }
+#endif  // __arm__
+       return !err;
+}
+
 static uint32_t
 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
 {
-       uint32_t        val;
+       uint32_t        val;
 
+#if __ARM_ATOMICS_8_1
+       ord = memory_order_relaxed;
+#endif
        val = load_exclusive32(target, ord);
        *previous = val;
        return val;
@@ -257,278 +295,174 @@ atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order
 static boolean_t
 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
 {
-       (void)previous;         // Previous not needed, monitor is held
+#if __ARM_ATOMICS_8_1
+       return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
+#else
+       (void)previous;         // Previous not needed, monitor is held
        return store_exclusive32(target, newval, ord);
+#endif
 }
 
 static void
 atomic_exchange_abort(void)
 {
-       clear_exclusive();
+       os_atomic_clear_exclusive();
 }
 
 static boolean_t
 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
 {
-       uint32_t                value, prev;
+       uint32_t                value, prev;
 
-       for ( ; ; ) {
+       for (;;) {
                value = atomic_exchange_begin32(target, &prev, ord);
                if (value & test_mask) {
-                       if (wait)
-                               wait_for_event();       // Wait with monitor held
-                       else
-                               atomic_exchange_abort();        // Clear exclusive monitor
+                       if (wait) {
+                               wait_for_event();       // Wait with monitor held
+                       } else {
+                               atomic_exchange_abort();        // Clear exclusive monitor
+                       }
                        return FALSE;
                }
                value |= set_mask;
-               if (atomic_exchange_complete32(target, prev, value, ord))
+               if (atomic_exchange_complete32(target, prev, value, ord)) {
                        return TRUE;
+               }
        }
 }
 
-void _disable_preemption(void)
+inline boolean_t
+hw_atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
 {
-       thread_t        thread = current_thread();
-       unsigned int    count;
-
-       count = thread->machine.preemption_count + 1;
-       ordered_store(&thread->machine.preemption_count, unsigned int, count);
+       return atomic_test_and_set32(target, test_mask, set_mask, ord, wait);
 }
 
-void _enable_preemption(void)
+/*
+ * To help _disable_preemption() inline everywhere with LTO,
+ * we keep these nice non inlineable functions as the panic()
+ * codegen setup is quite large and for weird reasons causes a frame.
+ */
+__abortlike
+static void
+_disable_preemption_overflow(void)
 {
-       thread_t        thread = current_thread();
-       long            state;
-       unsigned int    count;
-#if __arm__
-#define INTERRUPT_MASK PSR_IRQF
-#else  // __arm__
-#define INTERRUPT_MASK DAIF_IRQF
-#endif // __arm__
-
-       count = thread->machine.preemption_count;
-       if (count == 0)
-               panic("Preemption count negative");     // Count will go negative when released
-       count--;
-       if (count > 0)
-               goto update_count;                      // Preemption is still disabled, just update
-       state = get_interrupts();                       // Get interrupt state
-       if (state & INTERRUPT_MASK)
-               goto update_count;                      // Interrupts are already masked, can't take AST here
-
-       disable_interrupts_noread();                    // Disable interrupts
-       ordered_store(&thread->machine.preemption_count, unsigned int, count);
-       if (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
-#if __arm__
-#if __ARM_USER_PROTECT__
-        uintptr_t up = arm_user_protect_begin(thread);
-#endif // __ARM_USER_PROTECT__
-               enable_fiq();
-#endif // __arm__
-               ast_taken_kernel();                     // Handle urgent AST
-#if __arm__
-#if __ARM_USER_PROTECT__
-               arm_user_protect_end(thread, up, TRUE);
-#endif // __ARM_USER_PROTECT__
-               enable_interrupts();
-               return;                                 // Return early on arm only due to FIQ enabling
-#endif // __arm__
-       }
-       restore_interrupts(state);                      // Enable interrupts
-       return;
-
-update_count:
-       ordered_store(&thread->machine.preemption_count, unsigned int, count);
-       return;
+       panic("Preemption count overflow");
 }
 
-int get_preemption_level(void)
+void
+_disable_preemption(void)
 {
-       return current_thread()->machine.preemption_count;
-}
+       thread_t     thread = current_thread();
+       unsigned int count  = thread->machine.preemption_count;
 
-/* Forward declarations for unexported functions that are used externally */
-void hw_lock_bit(hw_lock_bit_t *lock, unsigned int bit);
-void hw_unlock_bit(hw_lock_bit_t *lock, unsigned int bit);
+       if (__improbable(++count == 0)) {
+               _disable_preemption_overflow();
+       }
 
-#if    __SMP__
-static unsigned int
-hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout);
-#endif
+       os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
+}
 
-static inline unsigned int
-hw_lock_bit_to_internal(hw_lock_bit_t *lock, unsigned int bit, uint32_t timeout)
+/*
+ * This function checks whether an AST_URGENT has been pended.
+ *
+ * It is called once the preemption has been reenabled, which means the thread
+ * may have been preempted right before this was called, and when this function
+ * actually performs the check, we've changed CPU.
+ *
+ * This race is however benign: the point of AST_URGENT is to trigger a context
+ * switch, so if one happened, there's nothing left to check for, and AST_URGENT
+ * was cleared in the process.
+ *
+ * It follows that this check cannot have false negatives, which allows us
+ * to avoid fiddling with interrupt state for the vast majority of cases
+ * when the check will actually be negative.
+ */
+static NOINLINE void
+kernel_preempt_check(thread_t thread)
 {
-       unsigned int success = 0;
-       uint32_t        mask = (1 << bit);
-#if    !__SMP__
-       uint32_t        state;
-#endif
-
-#if    __SMP__
-       if (__improbable(!atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE)))
-               success = hw_lock_bit_to_contended(lock, mask, timeout);
-       else
-               success = 1;
-#else  // __SMP__
-       (void)timeout;
-       state = ordered_load_bit(lock);
-       if (!(mask & state)) {
-               ordered_store_bit(lock, state | mask);
-               success = 1;
-       }
-#endif // __SMP__
+       cpu_data_t *cpu_data_ptr;
+       long        state;
 
-#if CONFIG_DTRACE
-       if (success)
-               LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, bit);
-#endif
+#if __arm__
+#define INTERRUPT_MASK PSR_IRQF
+#else   // __arm__
+#define INTERRUPT_MASK DAIF_IRQF
+#endif  // __arm__
 
-       return success;
-}
+       /*
+        * This check is racy and could load from another CPU's pending_ast mask,
+        * but as described above, this can't have false negatives.
+        */
+       cpu_data_ptr = os_atomic_load(&thread->machine.CpuDatap, compiler_acq_rel);
+       if (__probable((cpu_data_ptr->cpu_pending_ast & AST_URGENT) == 0)) {
+               return;
+       }
 
-unsigned int
-hw_lock_bit_to(hw_lock_bit_t *lock, unsigned int bit, uint32_t timeout)
-{
-       _disable_preemption();
-       return hw_lock_bit_to_internal(lock, bit, timeout);
-}
+       /* If interrupts are masked, we can't take an AST here */
+       state = get_interrupts();
+       if ((state & INTERRUPT_MASK) == 0) {
+               disable_interrupts_noread();                    // Disable interrupts
 
-#if    __SMP__
-static unsigned int NOINLINE
-hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout)
-{
-       uint64_t        end = 0;
-       int             i;
-#if CONFIG_DTRACE
-       uint64_t begin;
-       boolean_t dtrace_enabled = lockstat_probemap[LS_LCK_SPIN_LOCK_SPIN] != 0;
-       if (__improbable(dtrace_enabled))
-               begin = mach_absolute_time();
-#endif
-       for ( ; ; ) {   
-               for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
-                       // Always load-exclusive before wfe
-                       // This grabs the monitor and wakes up on a release event
-                       if (atomic_test_and_set32(lock, mask, mask, memory_order_acquire, TRUE)) {
-                               goto end;
-                       }
+               /*
+                * Reload cpu_data_ptr: a context switch would cause it to change.
+                * Now that interrupts are disabled, this will debounce false positives.
+                */
+               cpu_data_ptr = os_atomic_load(&thread->machine.CpuDatap, compiler_acq_rel);
+               if (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
+#if __arm__
+#if __ARM_USER_PROTECT__
+                       uintptr_t up = arm_user_protect_begin(thread);
+#endif  // __ARM_USER_PROTECT__
+                       enable_fiq();
+#endif  // __arm__
+                       ast_taken_kernel();                 // Handle urgent AST
+#if __arm__
+#if __ARM_USER_PROTECT__
+                       arm_user_protect_end(thread, up, TRUE);
+#endif  // __ARM_USER_PROTECT__
+                       enable_interrupts();
+                       return;                             // Return early on arm only due to FIQ enabling
+#endif  // __arm__
                }
-               if (end == 0)
-                       end = ml_get_timebase() + timeout;
-               else if (ml_get_timebase() >= end)
-                       break;
+               restore_interrupts(state);              // Enable interrupts
        }
-       return 0;
-end:
-#if CONFIG_DTRACE
-       if (__improbable(dtrace_enabled)) {
-               uint64_t spintime = mach_absolute_time() - begin;
-               if (spintime > dtrace_spin_threshold)
-                       LOCKSTAT_RECORD2(LS_LCK_SPIN_LOCK_SPIN, lock, spintime, mask);
-       }
-#endif
-       return 1;
 }
-#endif // __SMP__
 
-void
-hw_lock_bit(hw_lock_bit_t *lock, unsigned int bit)
+/*
+ * To help _enable_preemption() inline everywhere with LTO,
+ * we keep these nice non inlineable functions as the panic()
+ * codegen setup is quite large and for weird reasons causes a frame.
+ */
+__abortlike
+static void
+_enable_preemption_underflow(void)
 {
-       if (hw_lock_bit_to(lock, bit, LOCK_PANIC_TIMEOUT))
-               return;
-#if    __SMP__
-       panic("hw_lock_bit(): timed out (%p)", lock);
-#else
-       panic("hw_lock_bit(): interlock held (%p)", lock);
-#endif
+       panic("Preemption count underflow");
 }
 
 void
-hw_lock_bit_nopreempt(hw_lock_bit_t *lock, unsigned int bit)
+_enable_preemption(void)
 {
-       if (__improbable(get_preemption_level() == 0))
-               panic("Attempt to take no-preempt bitlock %p in preemptible context", lock);
-       if (hw_lock_bit_to_internal(lock, bit, LOCK_PANIC_TIMEOUT))
-               return;
-#if    __SMP__
-       panic("hw_lock_bit_nopreempt(): timed out (%p)", lock);
-#else
-       panic("hw_lock_bit_nopreempt(): interlock held (%p)", lock);
-#endif
-}
-
-unsigned int
-hw_lock_bit_try(hw_lock_bit_t *lock, unsigned int bit)
-{
-       uint32_t        mask = (1 << bit);
-#if    !__SMP__
-       uint32_t        state;
-#endif
-       boolean_t       success = FALSE;
+       thread_t     thread = current_thread();
+       unsigned int count  = thread->machine.preemption_count;
 
-       _disable_preemption();
-#if    __SMP__
-       // TODO: consider weak (non-looping) atomic test-and-set
-       success = atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE);
-#else
-       state = ordered_load_bit(lock);
-       if (!(mask & state)) {
-               ordered_store_bit(lock, state | mask);
-               success = TRUE;
+       if (__improbable(count == 0)) {
+               _enable_preemption_underflow();
        }
-#endif // __SMP__
-       if (!success)
-               _enable_preemption();
-
-#if CONFIG_DTRACE
-       if (success)
-               LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, bit);
-#endif
-
-       return success;
-}
+       count -= 1;
 
-static inline void
-hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
-{
-       uint32_t        mask = (1 << bit);
-#if    !__SMP__
-       uint32_t        state;
-#endif
-
-#if    __SMP__
-       __c11_atomic_fetch_and((_Atomic uint32_t *)lock, ~mask, memory_order_release);
-       set_event();
-#else  // __SMP__
-       state = ordered_load_bit(lock);
-       ordered_store_bit(lock, state & ~mask);
-#endif // __SMP__
-#if CONFIG_DTRACE
-       LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
-#endif
-}
+       os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
+       if (count == 0) {
+               kernel_preempt_check(thread);
+       }
 
-/*
- *     Routine:        hw_unlock_bit
- *
- *             Release spin-lock. The second parameter is the bit number to test and set.
- *             Decrement the preemption level.
- */
-void
-hw_unlock_bit(hw_lock_bit_t *lock, unsigned int bit)
-{
-       hw_unlock_bit_internal(lock, bit);
-       _enable_preemption();
+       os_compiler_barrier();
 }
 
-void
-hw_unlock_bit_nopreempt(hw_lock_bit_t *lock, unsigned int bit)
+int
+get_preemption_level(void)
 {
-       if (__improbable(get_preemption_level() == 0))
-               panic("Attempt to release no-preempt bitlock %p in preemptible context", lock);
-       hw_unlock_bit_internal(lock, bit);
+       return current_thread()->machine.preemption_count;
 }
 
 /*
@@ -536,15 +470,14 @@ hw_unlock_bit_nopreempt(hw_lock_bit_t *lock, unsigned int bit)
  */
 lck_spin_t     *
 lck_spin_alloc_init(
-               lck_grp_t * grp,
-               lck_attr_t * attr)
+       lck_grp_t * grp,
+       lck_attr_t * attr)
 {
-       lck_spin_t     *lck;
-
-       if ((lck = (lck_spin_t *) kalloc(sizeof(lck_spin_t))) != 0)
-               lck_spin_init(lck, grp, attr);
+       lck_spin_t *lck;
 
-       return (lck);
+       lck = zalloc(ZV_LCK_SPIN);
+       lck_spin_init(lck, grp, attr);
+       return lck;
 }
 
 /*
@@ -552,11 +485,11 @@ lck_spin_alloc_init(
  */
 void
 lck_spin_free(
-             lck_spin_t * lck,
-             lck_grp_t * grp)
+       lck_spin_t * lck,
+       lck_grp_t * grp)
 {
        lck_spin_destroy(lck, grp);
-       kfree((void *) lck, sizeof(lck_spin_t));
+       zfree(ZV_LCK_SPIN, lck);
 }
 
 /*
@@ -564,26 +497,26 @@ lck_spin_free(
  */
 void
 lck_spin_init(
-             lck_spin_t * lck,
-             lck_grp_t * grp,
-             __unused lck_attr_t * attr)
+       lck_spin_t * lck,
+       lck_grp_t * grp,
+       __unused lck_attr_t * attr)
 {
-       hw_lock_init(&lck->hwlock);
        lck->type = LCK_SPIN_TYPE;
-       lck_grp_reference(grp);
-       lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
-       store_memory_barrier();
+       hw_lock_init(&lck->hwlock);
+       if (grp) {
+               lck_grp_reference(grp);
+               lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
+       }
 }
 
 /*
  * arm_usimple_lock is a lck_spin_t without a group or attributes
  */
-void inline
+MARK_AS_HIBERNATE_TEXT void inline
 arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
 {
        lck->type = LCK_SPIN_TYPE;
        hw_lock_init(&lck->hwlock);
-       store_memory_barrier();
 }
 
 
@@ -593,11 +526,24 @@ arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
 void
 lck_spin_lock(lck_spin_t *lock)
 {
-#if    DEVELOPMENT || DEBUG
-       if (lock->type != LCK_SPIN_TYPE)
+#if     DEVELOPMENT || DEBUG
+       if (lock->type != LCK_SPIN_TYPE) {
+               panic("Invalid spinlock %p", lock);
+       }
+#endif  // DEVELOPMENT || DEBUG
+       hw_lock_lock(&lock->hwlock, LCK_GRP_NULL);
+}
+
+void
+lck_spin_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
+{
+#pragma unused(grp)
+#if     DEVELOPMENT || DEBUG
+       if (lock->type != LCK_SPIN_TYPE) {
                panic("Invalid spinlock %p", lock);
-#endif // DEVELOPMENT || DEBUG
-       hw_lock_lock(&lock->hwlock);
+       }
+#endif  // DEVELOPMENT || DEBUG
+       hw_lock_lock(&lock->hwlock, grp);
 }
 
 /*
@@ -606,11 +552,24 @@ lck_spin_lock(lck_spin_t *lock)
 void
 lck_spin_lock_nopreempt(lck_spin_t *lock)
 {
-#if    DEVELOPMENT || DEBUG
-       if (lock->type != LCK_SPIN_TYPE)
+#if     DEVELOPMENT || DEBUG
+       if (lock->type != LCK_SPIN_TYPE) {
                panic("Invalid spinlock %p", lock);
-#endif // DEVELOPMENT || DEBUG
-       hw_lock_lock_nopreempt(&lock->hwlock);
+       }
+#endif  // DEVELOPMENT || DEBUG
+       hw_lock_lock_nopreempt(&lock->hwlock, LCK_GRP_NULL);
+}
+
+void
+lck_spin_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
+{
+#pragma unused(grp)
+#if     DEVELOPMENT || DEBUG
+       if (lock->type != LCK_SPIN_TYPE) {
+               panic("Invalid spinlock %p", lock);
+       }
+#endif  // DEVELOPMENT || DEBUG
+       hw_lock_lock_nopreempt(&lock->hwlock, grp);
 }
 
 /*
@@ -619,7 +578,14 @@ lck_spin_lock_nopreempt(lck_spin_t *lock)
 int
 lck_spin_try_lock(lck_spin_t *lock)
 {
-       return hw_lock_try(&lock->hwlock);
+       return hw_lock_try(&lock->hwlock, LCK_GRP_NULL);
+}
+
+int
+lck_spin_try_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
+{
+#pragma unused(grp)
+       return hw_lock_try(&lock->hwlock, grp);
 }
 
 /*
@@ -628,7 +594,14 @@ lck_spin_try_lock(lck_spin_t *lock)
 int
 lck_spin_try_lock_nopreempt(lck_spin_t *lock)
 {
-       return hw_lock_try_nopreempt(&lock->hwlock);
+       return hw_lock_try_nopreempt(&lock->hwlock, LCK_GRP_NULL);
+}
+
+int
+lck_spin_try_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
+{
+#pragma unused(grp)
+       return hw_lock_try_nopreempt(&lock->hwlock, grp);
 }
 
 /*
@@ -637,12 +610,14 @@ lck_spin_try_lock_nopreempt(lck_spin_t *lock)
 void
 lck_spin_unlock(lck_spin_t *lock)
 {
-#if    DEVELOPMENT || DEBUG
-       if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC())
+#if     DEVELOPMENT || DEBUG
+       if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
                panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
-       if (lock->type != LCK_SPIN_TYPE)
+       }
+       if (lock->type != LCK_SPIN_TYPE) {
                panic("Invalid spinlock type %p", lock);
-#endif // DEVELOPMENT || DEBUG
+       }
+#endif  // DEVELOPMENT || DEBUG
        hw_lock_unlock(&lock->hwlock);
 }
 
@@ -652,12 +627,14 @@ lck_spin_unlock(lck_spin_t *lock)
 void
 lck_spin_unlock_nopreempt(lck_spin_t *lock)
 {
-#if    DEVELOPMENT || DEBUG
-       if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC())
+#if     DEVELOPMENT || DEBUG
+       if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
                panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
-       if (lock->type != LCK_SPIN_TYPE)
+       }
+       if (lock->type != LCK_SPIN_TYPE) {
                panic("Invalid spinlock type %p", lock);
-#endif // DEVELOPMENT || DEBUG
+       }
+#endif  // DEVELOPMENT || DEBUG
        hw_lock_unlock_nopreempt(&lock->hwlock);
 }
 
@@ -666,14 +643,17 @@ lck_spin_unlock_nopreempt(lck_spin_t *lock)
  */
 void
 lck_spin_destroy(
-                lck_spin_t * lck,
-                lck_grp_t * grp)
+       lck_spin_t * lck,
+       lck_grp_t * grp)
 {
-       if (lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED)
+       if (lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED) {
                return;
+       }
        lck->lck_spin_data = LCK_SPIN_TAG_DESTROYED;
-       lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
-       lck_grp_deallocate(grp);
+       if (grp) {
+               lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
+               lck_grp_deallocate(grp);
+       }
 }
 
 /*
@@ -681,7 +661,8 @@ lck_spin_destroy(
  * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
  */
 boolean_t
-kdp_lck_spin_is_acquired(lck_spin_t *lck) {
+kdp_lck_spin_is_acquired(lck_spin_t *lck)
+{
        if (not_in_kdp) {
                panic("panic: spinlock acquired check done outside of kernel debugger");
        }
@@ -695,15 +676,10 @@ kdp_lck_spin_is_acquired(lck_spin_t *lck) {
  */
 void
 usimple_lock_init(
-                 usimple_lock_t l,
-                 unsigned short tag)
+       usimple_lock_t l,
+       unsigned short tag)
 {
-#ifndef        MACHINE_SIMPLE_LOCK
-       USLDBG(usld_lock_init(l, tag));
-       hw_lock_init(&l->lck_spin_data);
-#else
        simple_lock_init((simple_lock_t) l, tag);
-#endif
 }
 
 
@@ -715,23 +691,11 @@ usimple_lock_init(
  *     maintaining preemption state.
  */
 void
-usimple_lock(
-            usimple_lock_t l)
+(usimple_lock)(
+       usimple_lock_t l
+       LCK_GRP_ARG(lck_grp_t *grp))
 {
-#ifndef        MACHINE_SIMPLE_LOCK
-       pc_t            pc;
-
-       OBTAIN_PC(pc, l);
-       USLDBG(usld_lock_pre(l, pc));
-
-       if (!hw_lock_to(&l->lck_spin_data, LockTimeOut))        /* Try to get the lock
-                                                        * with a timeout */
-               panic("simple lock deadlock detection - l=%p, cpu=%d, ret=%p", &l, cpu_number(), pc);
-
-       USLDBG(usld_lock_post(l, pc));
-#else
-       simple_lock((simple_lock_t) l);
-#endif
+       simple_lock((simple_lock_t) l, LCK_GRP_PROBEARG(grp));
 }
 
 
@@ -745,19 +709,10 @@ extern void     sync(void);
  *     maintaining preemption state.
  */
 void
-usimple_unlock(
-              usimple_lock_t l)
+(usimple_unlock)(
+       usimple_lock_t l)
 {
-#ifndef        MACHINE_SIMPLE_LOCK
-       pc_t            pc;
-
-       OBTAIN_PC(pc, l);
-       USLDBG(usld_unlock(l, pc));
-       sync();
-       hw_lock_unlock(&l->lck_spin_data);
-#else
-       simple_unlock((simple_lock_t) l);
-#endif
+       simple_unlock((simple_lock_t)l);
 }
 
 
@@ -773,288 +728,15 @@ usimple_unlock(
  *     behavior from the original assembly-language code, but
  *     doesn't it make sense to log misses?  XXX
  */
-unsigned int
-usimple_lock_try(
-                usimple_lock_t l)
-{
-#ifndef        MACHINE_SIMPLE_LOCK
-       pc_t            pc;
-       unsigned int    success;
-
-       OBTAIN_PC(pc, l);
-       USLDBG(usld_lock_try_pre(l, pc));
-       if ((success = hw_lock_try(&l->lck_spin_data))) {
-               USLDBG(usld_lock_try_post(l, pc));
-       }
-       return success;
-#else
-       return (simple_lock_try((simple_lock_t) l));
-#endif
-}
-
-#if    USLOCK_DEBUG
-/*
- *     States of a usimple_lock.  The default when initializing
- *     a usimple_lock is setting it up for debug checking.
- */
-#define        USLOCK_CHECKED          0x0001  /* lock is being checked */
-#define        USLOCK_TAKEN            0x0002  /* lock has been taken */
-#define        USLOCK_INIT             0xBAA0  /* lock has been initialized */
-#define        USLOCK_INITIALIZED      (USLOCK_INIT|USLOCK_CHECKED)
-#define        USLOCK_CHECKING(l)      (uslock_check &&                        \
-                                ((l)->debug.state & USLOCK_CHECKED))
-
-/*
- *     Trace activities of a particularly interesting lock.
- */
-void            usl_trace(usimple_lock_t, int, pc_t, const char *);
-
-
-/*
- *     Initialize the debugging information contained
- *     in a usimple_lock.
- */
-void
-usld_lock_init(
-              usimple_lock_t l,
-              __unused unsigned short tag)
-{
-       if (l == USIMPLE_LOCK_NULL)
-               panic("lock initialization:  null lock pointer");
-       l->lock_type = USLOCK_TAG;
-       l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
-       l->debug.lock_cpu = l->debug.unlock_cpu = 0;
-       l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
-       l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
-       l->debug.duration[0] = l->debug.duration[1] = 0;
-       l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
-       l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
-       l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
-}
-
-
-/*
- *     These checks apply to all usimple_locks, not just
- *     those with USLOCK_CHECKED turned on.
- */
+unsigned
 int
-usld_lock_common_checks(
-                       usimple_lock_t l,
-                       const char *caller)
-{
-       if (l == USIMPLE_LOCK_NULL)
-               panic("%s:  null lock pointer", caller);
-       if (l->lock_type != USLOCK_TAG)
-               panic("%s:  0x%x is not a usimple lock", caller, (integer_t) l);
-       if (!(l->debug.state & USLOCK_INIT))
-               panic("%s:  0x%x is not an initialized lock",
-                     caller, (integer_t) l);
-       return USLOCK_CHECKING(l);
-}
-
-
-/*
- *     Debug checks on a usimple_lock just before attempting
- *     to acquire it.
- */
-/* ARGSUSED */
-void
-usld_lock_pre(
-             usimple_lock_t l,
-             pc_t pc)
-{
-       const char     *caller = "usimple_lock";
-
-
-       if (!usld_lock_common_checks(l, caller))
-               return;
-
-       /*
-        *      Note that we have a weird case where we are getting a lock when we are]
-        *      in the process of putting the system to sleep. We are running with no
-        *      current threads, therefore we can't tell if we are trying to retake a lock
-        *      we have or someone on the other processor has it.  Therefore we just
-        *      ignore this test if the locking thread is 0.
-        */
-
-       if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
-           l->debug.lock_thread == (void *) current_thread()) {
-               printf("%s:  lock 0x%x already locked (at %p) by",
-                      caller, (integer_t) l, l->debug.lock_pc);
-               printf(" current thread %p (new attempt at pc %p)\n",
-                      l->debug.lock_thread, pc);
-               panic("%s", caller);
-       }
-       mp_disable_preemption();
-       usl_trace(l, cpu_number(), pc, caller);
-       mp_enable_preemption();
-}
-
-
-/*
- *     Debug checks on a usimple_lock just after acquiring it.
- *
- *     Pre-emption has been disabled at this point,
- *     so we are safe in using cpu_number.
- */
-void
-usld_lock_post(
-              usimple_lock_t l,
-              pc_t pc)
-{
-       int             mycpu;
-       const char     *caller = "successful usimple_lock";
-
-
-       if (!usld_lock_common_checks(l, caller))
-               return;
-
-       if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
-               panic("%s:  lock 0x%x became uninitialized",
-                     caller, (integer_t) l);
-       if ((l->debug.state & USLOCK_TAKEN))
-               panic("%s:  lock 0x%x became TAKEN by someone else",
-                     caller, (integer_t) l);
-
-       mycpu = cpu_number();
-       l->debug.lock_thread = (void *) current_thread();
-       l->debug.state |= USLOCK_TAKEN;
-       l->debug.lock_pc = pc;
-       l->debug.lock_cpu = mycpu;
-
-       usl_trace(l, mycpu, pc, caller);
-}
-
-
-/*
- *     Debug checks on a usimple_lock just before
- *     releasing it.  Note that the caller has not
- *     yet released the hardware lock.
- *
- *     Preemption is still disabled, so there's
- *     no problem using cpu_number.
- */
-void
-usld_unlock(
-           usimple_lock_t l,
-           pc_t pc)
-{
-       int             mycpu;
-       const char     *caller = "usimple_unlock";
-
-
-       if (!usld_lock_common_checks(l, caller))
-               return;
-
-       mycpu = cpu_number();
-
-       if (!(l->debug.state & USLOCK_TAKEN))
-               panic("%s:  lock 0x%x hasn't been taken",
-                     caller, (integer_t) l);
-       if (l->debug.lock_thread != (void *) current_thread())
-               panic("%s:  unlocking lock 0x%x, owned by thread %p",
-                     caller, (integer_t) l, l->debug.lock_thread);
-       if (l->debug.lock_cpu != mycpu) {
-               printf("%s:  unlocking lock 0x%x on cpu 0x%x",
-                      caller, (integer_t) l, mycpu);
-               printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
-               panic("%s", caller);
-       }
-       usl_trace(l, mycpu, pc, caller);
-
-       l->debug.unlock_thread = l->debug.lock_thread;
-       l->debug.lock_thread = INVALID_PC;
-       l->debug.state &= ~USLOCK_TAKEN;
-       l->debug.unlock_pc = pc;
-       l->debug.unlock_cpu = mycpu;
-}
-
-
-/*
- *     Debug checks on a usimple_lock just before
- *     attempting to acquire it.
- *
- *     Preemption isn't guaranteed to be disabled.
- */
-void
-usld_lock_try_pre(
-                 usimple_lock_t l,
-                 pc_t pc)
-{
-       const char     *caller = "usimple_lock_try";
-
-       if (!usld_lock_common_checks(l, caller))
-               return;
-       mp_disable_preemption();
-       usl_trace(l, cpu_number(), pc, caller);
-       mp_enable_preemption();
-}
-
-
-/*
- *     Debug checks on a usimple_lock just after
- *     successfully attempting to acquire it.
- *
- *     Preemption has been disabled by the
- *     lock acquisition attempt, so it's safe
- *     to use cpu_number.
- */
-void
-usld_lock_try_post(
-                  usimple_lock_t l,
-                  pc_t pc)
-{
-       int             mycpu;
-       const char     *caller = "successful usimple_lock_try";
-
-       if (!usld_lock_common_checks(l, caller))
-               return;
-
-       if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
-               panic("%s:  lock 0x%x became uninitialized",
-                     caller, (integer_t) l);
-       if ((l->debug.state & USLOCK_TAKEN))
-               panic("%s:  lock 0x%x became TAKEN by someone else",
-                     caller, (integer_t) l);
-
-       mycpu = cpu_number();
-       l->debug.lock_thread = (void *) current_thread();
-       l->debug.state |= USLOCK_TAKEN;
-       l->debug.lock_pc = pc;
-       l->debug.lock_cpu = mycpu;
-
-       usl_trace(l, mycpu, pc, caller);
-}
-
-
-/*
- *     For very special cases, set traced_lock to point to a
- *     specific lock of interest.  The result is a series of
- *     XPRs showing lock operations on that lock.  The lock_seq
- *     value is used to show the order of those operations.
- */
-usimple_lock_t  traced_lock;
-unsigned int    lock_seq;
-
-void
-usl_trace(
-         usimple_lock_t l,
-         int mycpu,
-         pc_t pc,
-         const char *op_name)
+(usimple_lock_try)(
+       usimple_lock_t l
+       LCK_GRP_ARG(lck_grp_t *grp))
 {
-       if (traced_lock == l) {
-               XPR(XPR_SLOCK,
-                   "seq %d, cpu %d, %s @ %x\n",
-                   (integer_t) lock_seq, (integer_t) mycpu,
-                   (integer_t) op_name, (integer_t) pc, 0);
-               lock_seq++;
-       }
+       return simple_lock_try((simple_lock_t) l, grp);
 }
 
-
-#endif                         /* USLOCK_DEBUG */
-
 /*
  * The C portion of the shared/exclusive locks package.
  */
@@ -1063,11 +745,10 @@ usl_trace(
  * compute the deadline to spin against when
  * waiting for a change of state on a lck_rw_t
  */
-#if    __SMP__
 static inline uint64_t
 lck_rw_deadline_for_spin(lck_rw_t *lck)
 {
-       lck_rw_word_t   word;
+       lck_rw_word_t   word;
 
        word.data = ordered_load_rw(lck);
        if (word.can_sleep) {
@@ -1082,46 +763,40 @@ lck_rw_deadline_for_spin(lck_rw_t *lck)
                         * to be at 0, we'll not bother spinning since the latency for this to happen is
                         * unpredictable...
                         */
-                       return (mach_absolute_time());
+                       return mach_absolute_time();
                }
-               return (mach_absolute_time() + MutexSpin);
-       } else
-               return (mach_absolute_time() + (100000LL * 1000000000LL));
+               return mach_absolute_time() + MutexSpin;
+       } else {
+               return mach_absolute_time() + (100000LL * 1000000000LL);
+       }
 }
-#endif // __SMP__
 
 static boolean_t
 lck_rw_drain_status(lck_rw_t *lock, uint32_t status_mask, boolean_t wait __unused)
 {
-#if    __SMP__
-       uint64_t        deadline = 0;
-       uint32_t        data;
+       uint64_t        deadline = 0;
+       uint32_t        data;
 
-       if (wait)
+       if (wait) {
                deadline = lck_rw_deadline_for_spin(lock);
+       }
 
-       for ( ; ; ) {
+       for (;;) {
                data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp);
-               if ((data & status_mask) == 0)
+               if ((data & status_mask) == 0) {
                        break;
-               if (wait)
+               }
+               if (wait) {
                        wait_for_event();
-               else
-                       clear_exclusive();
-               if (!wait || (mach_absolute_time() >= deadline))
+               } else {
+                       os_atomic_clear_exclusive();
+               }
+               if (!wait || (mach_absolute_time() >= deadline)) {
                        return FALSE;
+               }
        }
-       clear_exclusive();
+       os_atomic_clear_exclusive();
        return TRUE;
-#else
-       uint32_t        data;
-
-       data = ordered_load_rw(lock);
-       if ((data & status_mask) == 0)
-               return TRUE;
-       else
-               return FALSE;
-#endif // __SMP__
 }
 
 /*
@@ -1130,21 +805,17 @@ lck_rw_drain_status(lck_rw_t *lock, uint32_t status_mask, boolean_t wait __unuse
 static inline void
 lck_rw_interlock_spin(lck_rw_t *lock)
 {
-#if __SMP__
-       uint32_t        data;
+       uint32_t        data;
 
-       for ( ; ; ) {
+       for (;;) {
                data = load_exclusive32(&lock->lck_rw_data, memory_order_relaxed);
-               if (data & LCK_RW_INTERLOCK)
+               if (data & LCK_RW_INTERLOCK) {
                        wait_for_event();
-               else {
-                       clear_exclusive();
+               else {
+                       os_atomic_clear_exclusive();
                        return;
                }
        }
-#else
-       panic("lck_rw_interlock_spin(): Interlock locked %p %x", lock, lock->lck_rw_data);
-#endif
 }
 
 /*
@@ -1155,9 +826,9 @@ lck_rw_interlock_spin(lck_rw_t *lock)
 static inline boolean_t
 lck_interlock_lock(lck_rw_t *lck)
 {
-       boolean_t       istate;
+       boolean_t       istate;
 
-       istate = ml_set_interrupts_enabled(FALSE);      
+       istate = ml_set_interrupts_enabled(FALSE);
        lck_rw_ilk_lock(lck);
        return istate;
 }
@@ -1170,24 +841,21 @@ lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
 }
 
 
-#define LCK_RW_GRAB_WANT       0
-#define LCK_RW_GRAB_SHARED     1
+#define LCK_RW_GRAB_WANT        0
+#define LCK_RW_GRAB_SHARED      1
 
 static boolean_t
 lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait)
 {
-       uint64_t        deadline = 0;
-       uint32_t        data, prev;
-       boolean_t       do_exch;
+       uint64_t        deadline = 0;
+       uint32_t        data, prev;
+       boolean_t       do_exch;
 
-#if __SMP__
-       if (wait)
+       if (wait) {
                deadline = lck_rw_deadline_for_spin(lock);
-#else
-       wait = FALSE;   // Don't spin on UP systems
-#endif
+       }
 
-       for ( ; ; ) {
+       for (;;) {
                data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
                if (data & LCK_RW_INTERLOCK) {
                        atomic_exchange_abort();
@@ -1200,23 +868,26 @@ lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait)
                                data |= LCK_RW_WANT_EXCL;
                                do_exch = TRUE;
                        }
-               } else {        // LCK_RW_GRAB_SHARED
+               } else {        // LCK_RW_GRAB_SHARED
                        if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) ||
-                               (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
+                           (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
                                data += LCK_RW_SHARED_READER;
                                do_exch = TRUE;
                        }
                }
                if (do_exch) {
-                       if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
+                       if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
                                return TRUE;
+                       }
                } else {
-                       if (wait)                                               // Non-waiting
+                       if (wait) {                                             // Non-waiting
                                wait_for_event();
-                       else
+                       } else {
                                atomic_exchange_abort();
-                       if (!wait || (mach_absolute_time() >= deadline))
+                       }
+                       if (!wait || (mach_absolute_time() >= deadline)) {
                                return FALSE;
+                       }
                }
        }
 }
@@ -1227,14 +898,13 @@ lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait)
  */
 lck_rw_t *
 lck_rw_alloc_init(
-       lck_grp_t       *grp,
-       lck_attr_t      *attr)
+       lck_grp_t       *grp,
+       lck_attr_t      *attr)
 {
-       lck_rw_t        *lck;
-
-       if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0)
-               lck_rw_init(lck, grp, attr);
+       lck_rw_t *lck;
 
+       lck = zalloc_flags(ZV_LCK_RW, Z_WAITOK | Z_ZERO);
+       lck_rw_init(lck, grp, attr);
        return lck;
 }
 
@@ -1243,11 +913,11 @@ lck_rw_alloc_init(
  */
 void
 lck_rw_free(
-       lck_rw_t        *lck,
-       lck_grp_t       *grp)
+       lck_rw_t        *lck,
+       lck_grp_t       *grp)
 {
        lck_rw_destroy(lck, grp);
-       kfree(lck, sizeof(lck_rw_t));
+       zfree(ZV_LCK_RW, lck);
 }
 
 /*
@@ -1255,16 +925,18 @@ lck_rw_free(
  */
 void
 lck_rw_init(
-       lck_rw_t        *lck,
-       lck_grp_t       *grp,
-       lck_attr_t      *attr)
+       lck_rw_t        *lck,
+       lck_grp_t       *grp,
+       lck_attr_t      *attr)
 {
-       if (attr == LCK_ATTR_NULL)
+       if (attr == LCK_ATTR_NULL) {
                attr = &LockDefaultLckAttr;
+       }
        memset(lck, 0, sizeof(lck_rw_t));
        lck->lck_rw_can_sleep = TRUE;
-       if ((attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY) == 0)
+       if ((attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY) == 0) {
                lck->lck_rw_priv_excl = TRUE;
+       }
 
        lck_grp_reference(grp);
        lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
@@ -1276,11 +948,12 @@ lck_rw_init(
  */
 void
 lck_rw_destroy(
-       lck_rw_t        *lck,
-       lck_grp_t       *grp)
+       lck_rw_t        *lck,
+       lck_grp_t       *grp)
 {
-       if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
+       if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) {
                return;
+       }
 #if MACH_LDEBUG
        lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
 #endif
@@ -1295,15 +968,50 @@ lck_rw_destroy(
  */
 void
 lck_rw_lock(
-       lck_rw_t                *lck,
-       lck_rw_type_t   lck_rw_type)
+       lck_rw_t                *lck,
+       lck_rw_type_t   lck_rw_type)
 {
-       if (lck_rw_type == LCK_RW_TYPE_SHARED)
+       if (lck_rw_type == LCK_RW_TYPE_SHARED) {
                lck_rw_lock_shared(lck);
-       else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
+       } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
                lck_rw_lock_exclusive(lck);
-       else
+       } else {
                panic("lck_rw_lock(): Invalid RW lock type: %x", lck_rw_type);
+       }
+}
+
+#define LCK_RW_LOCK_EXCLUSIVE_TAS(lck) (atomic_test_and_set32(&(lck)->lck_rw_data, \
+           (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK), \
+           LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE))
+
+/*
+ *     Routine:        lck_rw_lock_exclusive_check_contended
+ */
+bool
+lck_rw_lock_exclusive_check_contended(lck_rw_t *lock)
+{
+       thread_t        thread = current_thread();
+       bool            contended  = false;
+
+       if (lock->lck_rw_can_sleep) {
+               thread->rwlock_count++;
+       } else if (get_preemption_level() == 0) {
+               panic("Taking non-sleepable RW lock with preemption enabled");
+       }
+       if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
+#if     CONFIG_DTRACE
+               LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
+#endif  /* CONFIG_DTRACE */
+       } else {
+               contended = true;
+               lck_rw_lock_exclusive_gen(lock);
+       }
+#if MACH_ASSERT
+       thread_t owner = ordered_load_rw_owner(lock);
+       assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
+#endif
+       ordered_store_rw_owner(lock, thread);
+       return contended;
 }
 
 /*
@@ -1312,17 +1020,20 @@ lck_rw_lock(
 void
 lck_rw_lock_exclusive(lck_rw_t *lock)
 {
-       thread_t        thread = current_thread();
+       thread_t        thread = current_thread();
 
-       thread->rwlock_count++;
-       if (atomic_test_and_set32(&lock->lck_rw_data,
-               (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
-               LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
-#if    CONFIG_DTRACE
+       if (lock->lck_rw_can_sleep) {
+               thread->rwlock_count++;
+       } else if (get_preemption_level() == 0) {
+               panic("Taking non-sleepable RW lock with preemption enabled");
+       }
+       if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
+#if     CONFIG_DTRACE
                LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
-#endif /* CONFIG_DTRACE */
-       } else
+#endif  /* CONFIG_DTRACE */
+       } else {
                lck_rw_lock_exclusive_gen(lock);
+       }
 #if MACH_ASSERT
        thread_t owner = ordered_load_rw_owner(lock);
        assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
@@ -1336,10 +1047,14 @@ lck_rw_lock_exclusive(lck_rw_t *lock)
 void
 lck_rw_lock_shared(lck_rw_t *lock)
 {
-       uint32_t        data, prev;
+       uint32_t        data, prev;
 
-       current_thread()->rwlock_count++;
-       for ( ; ; ) {
+       if (lock->lck_rw_can_sleep) {
+               current_thread()->rwlock_count++;
+       } else if (get_preemption_level() == 0) {
+               panic("Taking non-sleepable RW lock with preemption enabled");
+       }
+       for (;;) {
                data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
                if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
                        atomic_exchange_abort();
@@ -1347,29 +1062,32 @@ lck_rw_lock_shared(lck_rw_t *lock)
                        break;
                }
                data += LCK_RW_SHARED_READER;
-               if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
+               if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
                        break;
+               }
                cpu_pause();
        }
 #if MACH_ASSERT
        thread_t owner = ordered_load_rw_owner(lock);
        assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
 #endif
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
-#endif /* CONFIG_DTRACE */
+#endif  /* CONFIG_DTRACE */
        return;
 }
 
 /*
  *     Routine:        lck_rw_lock_shared_to_exclusive
+ *
+ *     False returned upon failure, in this case the shared lock is dropped.
  */
 boolean_t
 lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
 {
-       uint32_t        data, prev;
+       uint32_t        data, prev;
 
-       for ( ; ; ) {
+       for (;;) {
                data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
                if (data & LCK_RW_INTERLOCK) {
                        atomic_exchange_abort();
@@ -1378,29 +1096,33 @@ lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
                }
                if (data & LCK_RW_WANT_UPGRADE) {
                        data -= LCK_RW_SHARED_READER;
-                       if ((data & LCK_RW_SHARED_MASK) == 0)           /* we were the last reader */
-                               data &= ~(LCK_RW_W_WAITING);            /* so clear the wait indicator */
-                       if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
+                       if ((data & LCK_RW_SHARED_MASK) == 0) {         /* we were the last reader */
+                               data &= ~(LCK_RW_W_WAITING);            /* so clear the wait indicator */
+                       }
+                       if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
                                return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
+                       }
                } else {
-                       data |= LCK_RW_WANT_UPGRADE;            /* ask for WANT_UPGRADE */
-                       data -= LCK_RW_SHARED_READER;           /* and shed our read count */
-                       if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
+                       data |= LCK_RW_WANT_UPGRADE;            /* ask for WANT_UPGRADE */
+                       data -= LCK_RW_SHARED_READER;           /* and shed our read count */
+                       if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
                                break;
+                       }
                }
                cpu_pause();
        }
-                                                                               /* we now own the WANT_UPGRADE */
-       if (data & LCK_RW_SHARED_MASK)          /* check to see if all of the readers are drained */
-               lck_rw_lock_shared_to_exclusive_success(lock);  /* if not, we need to go wait */
+       /* we now own the WANT_UPGRADE */
+       if (data & LCK_RW_SHARED_MASK) {        /* check to see if all of the readers are drained */
+               lck_rw_lock_shared_to_exclusive_success(lock);  /* if not, we need to go wait */
+       }
 #if MACH_ASSERT
        thread_t owner = ordered_load_rw_owner(lock);
        assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
 #endif
        ordered_store_rw_owner(lock, current_thread());
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
-#endif /* CONFIG_DTRACE */
+#endif  /* CONFIG_DTRACE */
        return TRUE;
 }
 
@@ -1415,21 +1137,25 @@ lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
  */
 static boolean_t
 lck_rw_lock_shared_to_exclusive_failure(
-       lck_rw_t        *lck,
-       uint32_t        prior_lock_state)
+       lck_rw_t        *lck,
+       uint32_t        prior_lock_state)
 {
-       thread_t        thread = current_thread();
-       uint32_t        rwlock_count;
+       thread_t        thread = current_thread();
+       uint32_t        rwlock_count;
 
        /* Check if dropping the lock means that we need to unpromote */
-       rwlock_count = thread->rwlock_count--;
+       if (lck->lck_rw_can_sleep) {
+               rwlock_count = thread->rwlock_count--;
+       } else {
+               rwlock_count = UINT32_MAX;
+       }
 #if MACH_LDEBUG
        if (rwlock_count == 0) {
                panic("rw lock count underflow for thread %p", thread);
        }
 #endif
        if ((prior_lock_state & LCK_RW_W_WAITING) &&
-               ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
+           ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
                /*
                 *      Someone else has requested upgrade.
                 *      Since we've released the read lock, wake
@@ -1444,9 +1170,9 @@ lck_rw_lock_shared_to_exclusive_failure(
        }
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
-                    VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
+           VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
 
-       return (FALSE);
+       return FALSE;
 }
 
 /*
@@ -1459,26 +1185,25 @@ lck_rw_lock_shared_to_exclusive_failure(
  */
 static boolean_t
 lck_rw_lock_shared_to_exclusive_success(
-       lck_rw_t        *lock)
-{
-       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
-       int                     slept = 0;
-       lck_rw_word_t           word;
-       wait_result_t           res;
-       boolean_t               istate;
-       boolean_t               not_shared;
-
-#if    CONFIG_DTRACE
-       uint64_t                wait_interval = 0;
-       int                     readers_at_sleep = 0;
-       boolean_t               dtrace_ls_initialized = FALSE;
-       boolean_t               dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
+       lck_rw_t        *lock)
+{
+       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
+       int                     slept = 0;
+       lck_rw_word_t           word;
+       wait_result_t           res;
+       boolean_t               istate;
+       boolean_t               not_shared;
+
+#if     CONFIG_DTRACE
+       uint64_t                wait_interval = 0;
+       int                     readers_at_sleep = 0;
+       boolean_t               dtrace_ls_initialized = FALSE;
+       boolean_t               dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
 #endif
 
        while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE)) {
-
                word.data = ordered_load_rw(lock);
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
                if (dtrace_ls_initialized == FALSE) {
                        dtrace_ls_initialized = TRUE;
                        dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
@@ -1496,15 +1221,16 @@ lck_rw_lock_shared_to_exclusive_success(
 #endif
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
-                            trace_lck, word.shared_count, 0, 0, 0);
+                   trace_lck, word.shared_count, 0, 0, 0);
 
                not_shared = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE);
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
-                            trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
+                   trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
 
-               if (not_shared)
+               if (not_shared) {
                        break;
+               }
 
                /*
                 * if we get here, the spin deadline in lck_rw_wait_on_status()
@@ -1512,20 +1238,19 @@ lck_rw_lock_shared_to_exclusive_success(
                 * check to see if we're allowed to do a thread_block
                 */
                if (word.can_sleep) {
-                       
                        istate = lck_interlock_lock(lock);
-                       
+
                        word.data = ordered_load_rw(lock);
                        if (word.shared_count != 0) {
                                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
-                                            trace_lck, word.shared_count, 0, 0, 0);
+                                   trace_lck, word.shared_count, 0, 0, 0);
 
                                word.w_waiting = 1;
                                ordered_store_rw(lock, word.data);
 
                                thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
                                res = assert_wait(LCK_RW_WRITER_EVENT(lock),
-                                               THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
+                                   THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
                                lck_interlock_unlock(lock, istate);
 
                                if (res == THREAD_WAITING) {
@@ -1533,29 +1258,29 @@ lck_rw_lock_shared_to_exclusive_success(
                                        slept++;
                                }
                                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
-                                            trace_lck, res, slept, 0, 0);
+                                   trace_lck, res, slept, 0, 0);
                        } else {
                                lck_interlock_unlock(lock, istate);
                                break;
                        }
                }
        }
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        /*
         * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
         */
        if (dtrace_ls_enabled == TRUE) {
                if (slept == 0) {
-                       LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
+                       LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
                } else {
-                       LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
+                       LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
                            mach_absolute_time() - wait_interval, 1,
                            (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
                }
        }
        LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
 #endif
-       return (TRUE);
+       return TRUE;
 }
 
 
@@ -1563,32 +1288,32 @@ lck_rw_lock_shared_to_exclusive_success(
  *     Routine:        lck_rw_lock_exclusive_to_shared
  */
 
-void lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
+void
+lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
 {
-       uint32_t        data, prev;
+       uint32_t        data, prev;
 
        assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
        ordered_store_rw_owner(lock, THREAD_NULL);
-       for ( ; ; ) {
+       for (;;) {
                data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
                if (data & LCK_RW_INTERLOCK) {
-#if __SMP__
                        atomic_exchange_abort();
-                       lck_rw_interlock_spin(lock);    /* wait for interlock to clear */
+                       lck_rw_interlock_spin(lock);    /* wait for interlock to clear */
                        continue;
-#else
-                       panic("lck_rw_lock_exclusive_to_shared(): Interlock locked (%p): %x", lock, data);
-#endif // __SMP__
                }
                data += LCK_RW_SHARED_READER;
-               if (data & LCK_RW_WANT_UPGRADE)
+               if (data & LCK_RW_WANT_UPGRADE) {
                        data &= ~(LCK_RW_WANT_UPGRADE);
-               else
+               } else {
                        data &= ~(LCK_RW_WANT_EXCL);
-               if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL)))
+               }
+               if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL))) {
                        data &= ~(LCK_RW_W_WAITING);
-               if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp))
+               }
+               if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
                        break;
+               }
                cpu_pause();
        }
        return lck_rw_lock_exclusive_to_shared_gen(lock, prev);
@@ -1596,7 +1321,7 @@ void lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
 
 /*
  *      Routine:        lck_rw_lock_exclusive_to_shared_gen
- *     Function:
+ *      Function:
  *             Fast path has already dropped
  *             our exclusive state and bumped lck_rw_shared_count
  *             all we need to do here is determine if anyone
@@ -1604,11 +1329,11 @@ void lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
  */
 static void
 lck_rw_lock_exclusive_to_shared_gen(
-       lck_rw_t        *lck,
-       uint32_t        prior_lock_state)
+       lck_rw_t        *lck,
+       uint32_t        prior_lock_state)
 {
-       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
-       lck_rw_word_t   fake_lck;
+       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
+       lck_rw_word_t   fake_lck;
 
        /*
         * prior_lock state is a snapshot of the 1st word of the
@@ -1619,7 +1344,7 @@ lck_rw_lock_exclusive_to_shared_gen(
        fake_lck.data = prior_lock_state;
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
-                            trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
+           trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
 
        /*
         * don't wake up anyone waiting to take the lock exclusively
@@ -1629,11 +1354,12 @@ lck_rw_lock_exclusive_to_shared_gen(
         * wake up any waiting readers if we don't have any writers waiting,
         * or the lock is NOT marked as rw_priv_excl (writers have privilege)
         */
-       if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting)
+       if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
                thread_wakeup(LCK_RW_READER_EVENT(lck));
+       }
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
-                            trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
+           trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
 
 #if CONFIG_DTRACE
        LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
@@ -1646,15 +1372,16 @@ lck_rw_lock_exclusive_to_shared_gen(
  */
 boolean_t
 lck_rw_try_lock(
-       lck_rw_t                *lck,
-       lck_rw_type_t   lck_rw_type)
+       lck_rw_t                *lck,
+       lck_rw_type_t   lck_rw_type)
 {
-       if (lck_rw_type == LCK_RW_TYPE_SHARED)
+       if (lck_rw_type == LCK_RW_TYPE_SHARED) {
                return lck_rw_try_lock_shared(lck);
-       else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
+       } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
                return lck_rw_try_lock_exclusive(lck);
-       else
+       } else {
                panic("lck_rw_try_lock(): Invalid rw lock type: %x", lck_rw_type);
+       }
        return FALSE;
 }
 
@@ -1662,38 +1389,42 @@ lck_rw_try_lock(
  *     Routine:        lck_rw_try_lock_shared
  */
 
-boolean_t lck_rw_try_lock_shared(lck_rw_t *lock)
+boolean_t
+lck_rw_try_lock_shared(lck_rw_t *lock)
 {
-       uint32_t        data, prev;
+       uint32_t        data, prev;
 
-       for ( ; ; ) {
+       for (;;) {
                data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
                if (data & LCK_RW_INTERLOCK) {
-#if __SMP__
                        atomic_exchange_abort();
                        lck_rw_interlock_spin(lock);
                        continue;
-#else
-                       panic("lck_rw_try_lock_shared(): Interlock locked (%p): %x", lock, data);
-#endif
                }
                if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
                        atomic_exchange_abort();
-                       return FALSE;                                           /* lock is busy */
+                       return FALSE;                                           /* lock is busy */
                }
-               data += LCK_RW_SHARED_READER;                   /* Increment reader refcount */
-               if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
+               data += LCK_RW_SHARED_READER;                   /* Increment reader refcount */
+               if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
                        break;
+               }
                cpu_pause();
        }
 #if MACH_ASSERT
        thread_t owner = ordered_load_rw_owner(lock);
        assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
 #endif
-       current_thread()->rwlock_count++;
-#if    CONFIG_DTRACE
+
+       if (lock->lck_rw_can_sleep) {
+               current_thread()->rwlock_count++;
+       } else if (get_preemption_level() == 0) {
+               panic("Taking non-sleepable RW lock with preemption enabled");
+       }
+
+#if     CONFIG_DTRACE
        LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
-#endif /* CONFIG_DTRACE */
+#endif  /* CONFIG_DTRACE */
        return TRUE;
 }
 
@@ -1702,41 +1433,43 @@ boolean_t lck_rw_try_lock_shared(lck_rw_t *lock)
  *     Routine:        lck_rw_try_lock_exclusive
  */
 
-boolean_t lck_rw_try_lock_exclusive(lck_rw_t *lock)
+boolean_t
+lck_rw_try_lock_exclusive(lck_rw_t *lock)
 {
-       uint32_t        data, prev;
-       thread_t        thread;
+       uint32_t        data, prev;
+       thread_t        thread;
 
-       for ( ; ; ) {
+       for (;;) {
                data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
                if (data & LCK_RW_INTERLOCK) {
-#if __SMP__
                        atomic_exchange_abort();
                        lck_rw_interlock_spin(lock);
                        continue;
-#else
-                       panic("lck_rw_try_lock_exclusive(): Interlock locked (%p): %x", lock, data);
-#endif
                }
                if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
                        atomic_exchange_abort();
                        return FALSE;
                }
                data |= LCK_RW_WANT_EXCL;
-               if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
+               if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
                        break;
+               }
                cpu_pause();
        }
        thread = current_thread();
-       thread->rwlock_count++;
+       if (lock->lck_rw_can_sleep) {
+               thread->rwlock_count++;
+       } else if (get_preemption_level() == 0) {
+               panic("Taking non-sleepable RW lock with preemption enabled");
+       }
 #if MACH_ASSERT
        thread_t owner = ordered_load_rw_owner(lock);
        assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
 #endif
        ordered_store_rw_owner(lock, thread);
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
-#endif /* CONFIG_DTRACE */
+#endif  /* CONFIG_DTRACE */
        return TRUE;
 }
 
@@ -1746,15 +1479,16 @@ boolean_t lck_rw_try_lock_exclusive(lck_rw_t *lock)
  */
 void
 lck_rw_unlock(
-       lck_rw_t                *lck,
-       lck_rw_type_t   lck_rw_type)
+       lck_rw_t                *lck,
+       lck_rw_type_t   lck_rw_type)
 {
-       if (lck_rw_type == LCK_RW_TYPE_SHARED)
+       if (lck_rw_type == LCK_RW_TYPE_SHARED) {
                lck_rw_unlock_shared(lck);
-       else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
+       } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
                lck_rw_unlock_exclusive(lck);
-       else
+       } else {
                panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
+       }
 }
 
 
@@ -1763,16 +1497,17 @@ lck_rw_unlock(
  */
 void
 lck_rw_unlock_shared(
-       lck_rw_t        *lck)
+       lck_rw_t        *lck)
 {
-       lck_rw_type_t   ret;
+       lck_rw_type_t   ret;
 
        assertf(lck->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
        assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
        ret = lck_rw_done(lck);
 
-       if (ret != LCK_RW_TYPE_SHARED)
+       if (ret != LCK_RW_TYPE_SHARED) {
                panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
+       }
 }
 
 
@@ -1781,15 +1516,16 @@ lck_rw_unlock_shared(
  */
 void
 lck_rw_unlock_exclusive(
-       lck_rw_t        *lck)
+       lck_rw_t        *lck)
 {
-       lck_rw_type_t   ret;
+       lck_rw_type_t   ret;
 
        assertf(lck->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
        ret = lck_rw_done(lck);
 
-       if (ret != LCK_RW_TYPE_EXCLUSIVE)
+       if (ret != LCK_RW_TYPE_EXCLUSIVE) {
                panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
+       }
 }
 
 
@@ -1798,19 +1534,19 @@ lck_rw_unlock_exclusive(
  */
 static void
 lck_rw_lock_exclusive_gen(
-       lck_rw_t        *lock)
+       lck_rw_t        *lock)
 {
-       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
-       lck_rw_word_t           word;
-       int                     slept = 0;
-       boolean_t               gotlock = 0;
-       boolean_t               not_shared_or_upgrade = 0;
-       wait_result_t           res = 0;
-       boolean_t               istate;
+       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
+       lck_rw_word_t           word;
+       int                     slept = 0;
+       boolean_t               gotlock = 0;
+       boolean_t               not_shared_or_upgrade = 0;
+       wait_result_t           res = 0;
+       boolean_t               istate;
 
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        boolean_t dtrace_ls_initialized = FALSE;
-       boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE;
+       boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled = FALSE;
        uint64_t wait_interval = 0;
        int readers_at_sleep = 0;
 #endif
@@ -1819,8 +1555,7 @@ lck_rw_lock_exclusive_gen(
         *      Try to acquire the lck_rw_want_excl bit.
         */
        while (!lck_rw_grab(lock, LCK_RW_GRAB_WANT, FALSE)) {
-
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
                if (dtrace_ls_initialized == FALSE) {
                        dtrace_ls_initialized = TRUE;
                        dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
@@ -1843,8 +1578,9 @@ lck_rw_lock_exclusive_gen(
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
 
-               if (gotlock)
+               if (gotlock) {
                        break;
+               }
                /*
                 * if we get here, the deadline has expired w/o us
                 * being able to grab the lock exclusively
@@ -1852,12 +1588,10 @@ lck_rw_lock_exclusive_gen(
                 */
                word.data = ordered_load_rw(lock);
                if (word.can_sleep) {
-
                        istate = lck_interlock_lock(lock);
                        word.data = ordered_load_rw(lock);
 
                        if (word.want_excl) {
-
                                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
 
                                word.w_waiting = 1;
@@ -1865,7 +1599,7 @@ lck_rw_lock_exclusive_gen(
 
                                thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
                                res = assert_wait(LCK_RW_WRITER_EVENT(lock),
-                                               THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
+                                   THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
                                lck_interlock_unlock(lock, istate);
 
                                if (res == THREAD_WAITING) {
@@ -1885,8 +1619,7 @@ lck_rw_lock_exclusive_gen(
         * Wait for readers (and upgrades) to finish...
         */
        while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE)) {
-
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
                /*
                 * Either sleeping or spinning is happening, start
                 * a timing of our delay interval now.  If we set it
@@ -1915,8 +1648,9 @@ lck_rw_lock_exclusive_gen(
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, not_shared_or_upgrade, 0);
 
-               if (not_shared_or_upgrade)
+               if (not_shared_or_upgrade) {
                        break;
+               }
                /*
                 * if we get here, the deadline has expired w/o us
                 * being able to grab the lock exclusively
@@ -1924,7 +1658,6 @@ lck_rw_lock_exclusive_gen(
                 */
                word.data = ordered_load_rw(lock);
                if (word.can_sleep) {
-
                        istate = lck_interlock_lock(lock);
                        word.data = ordered_load_rw(lock);
 
@@ -1936,7 +1669,7 @@ lck_rw_lock_exclusive_gen(
 
                                thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
                                res = assert_wait(LCK_RW_WRITER_EVENT(lock),
-                                               THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
+                                   THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
                                lck_interlock_unlock(lock, istate);
 
                                if (res == THREAD_WAITING) {
@@ -1956,7 +1689,7 @@ lck_rw_lock_exclusive_gen(
                }
        }
 
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        /*
         * Decide what latencies we suffered that are Dtrace events.
         * If we have set wait_interval, then we either spun or slept.
@@ -1968,7 +1701,7 @@ lck_rw_lock_exclusive_gen(
         */
        if (dtrace_ls_enabled == TRUE) {
                if (slept == 0) {
-                       LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
+                       LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
                            mach_absolute_time() - wait_interval, 1);
                } else {
                        /*
@@ -1977,48 +1710,47 @@ lck_rw_lock_exclusive_gen(
                         * Notice that above we recorded this before we dropped
                         * the interlock so the count is accurate.
                         */
-                       LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
+                       LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
                            mach_absolute_time() - wait_interval, 1,
                            (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
                }
        }
        LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
-#endif /* CONFIG_DTRACE */
+#endif  /* CONFIG_DTRACE */
 }
 
 /*
  *      Routine:        lck_rw_done
  */
 
-lck_rw_type_t lck_rw_done(lck_rw_t *lock)
+lck_rw_type_t
+lck_rw_done(lck_rw_t *lock)
 {
-       uint32_t        data, prev;
-       boolean_t       once = FALSE;
+       uint32_t        data, prev;
+       boolean_t       once = FALSE;
 
-       for ( ; ; ) {
+       for (;;) {
                data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
-               if (data & LCK_RW_INTERLOCK) {          /* wait for interlock to clear */
-#if __SMP__
+               if (data & LCK_RW_INTERLOCK) {          /* wait for interlock to clear */
                        atomic_exchange_abort();
                        lck_rw_interlock_spin(lock);
                        continue;
-#else
-                       panic("lck_rw_done(): Interlock locked (%p): %x", lock, data);
-#endif // __SMP__
                }
-               if (data & LCK_RW_SHARED_MASK) {        /* lock is held shared */
+               if (data & LCK_RW_SHARED_MASK) {        /* lock is held shared */
                        assertf(lock->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
                        data -= LCK_RW_SHARED_READER;
-                       if ((data & LCK_RW_SHARED_MASK) == 0)   /* if reader count has now gone to 0, check for waiters */
+                       if ((data & LCK_RW_SHARED_MASK) == 0) { /* if reader count has now gone to 0, check for waiters */
                                goto check_waiters;
-               } else {                                        /* if reader count == 0, must be exclusive lock */
+                       }
+               } else {                                        /* if reader count == 0, must be exclusive lock */
                        if (data & LCK_RW_WANT_UPGRADE) {
                                data &= ~(LCK_RW_WANT_UPGRADE);
                        } else {
-                               if (data & LCK_RW_WANT_EXCL)
+                               if (data & LCK_RW_WANT_EXCL) {
                                        data &= ~(LCK_RW_WANT_EXCL);
-                               else                                    /* lock is not 'owned', panic */
+                               } else {                                /* lock is not 'owned', panic */
                                        panic("Releasing non-exclusive RW lock without a reader refcount!");
+                               }
                        }
                        if (!once) {
                                // Only check for holder and clear it once
@@ -2036,13 +1768,16 @@ check_waiters:
                         */
                        if (prev & LCK_RW_W_WAITING) {
                                data &= ~(LCK_RW_W_WAITING);
-                               if ((prev & LCK_RW_PRIV_EXCL) == 0)
+                               if ((prev & LCK_RW_PRIV_EXCL) == 0) {
                                        data &= ~(LCK_RW_R_WAITING);
-                       } else
+                               }
+                       } else {
                                data &= ~(LCK_RW_R_WAITING);
+                       }
                }
-               if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp))
+               if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
                        break;
+               }
                cpu_pause();
        }
        return lck_rw_done_gen(lock, prev);
@@ -2053,13 +1788,13 @@ check_waiters:
  *
  *     called from the assembly language wrapper...
  *     prior_lock_state is the value in the 1st
- *     word of the lock at the time of a successful
+ *      word of the lock at the time of a successful
  *     atomic compare and exchange with the new value...
- *     it represents the state of the lock before we
+ *      it represents the state of the lock before we
  *     decremented the rw_shared_count or cleared either
- *     rw_want_upgrade or rw_want_write and
+ *      rw_want_upgrade or rw_want_write and
  *     the lck_x_waiting bits...  since the wrapper
- *     routine has already changed the state atomically, 
+ *      routine has already changed the state atomically,
  *     we just need to decide if we should
  *     wake up anyone and what value to return... we do
  *     this by examining the state of the lock before
@@ -2067,13 +1802,13 @@ check_waiters:
  */
 static lck_rw_type_t
 lck_rw_done_gen(
-       lck_rw_t        *lck,
-       uint32_t        prior_lock_state)
+       lck_rw_t        *lck,
+       uint32_t        prior_lock_state)
 {
-       lck_rw_word_t   fake_lck;
-       lck_rw_type_t   lock_type;
-       thread_t                thread;
-       uint32_t                rwlock_count;
+       lck_rw_word_t   fake_lck;
+       lck_rw_type_t   lock_type;
+       thread_t                thread;
+       uint32_t                rwlock_count;
 
        /*
         * prior_lock state is a snapshot of the 1st word of the
@@ -2084,23 +1819,31 @@ lck_rw_done_gen(
        fake_lck.data = prior_lock_state;
 
        if (fake_lck.shared_count <= 1) {
-               if (fake_lck.w_waiting)
+               if (fake_lck.w_waiting) {
                        thread_wakeup(LCK_RW_WRITER_EVENT(lck));
+               }
 
-               if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting)
+               if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
                        thread_wakeup(LCK_RW_READER_EVENT(lck));
+               }
        }
-       if (fake_lck.shared_count)
+       if (fake_lck.shared_count) {
                lock_type = LCK_RW_TYPE_SHARED;
-       else
+       } else {
                lock_type = LCK_RW_TYPE_EXCLUSIVE;
+       }
 
        /* Check if dropping the lock means that we need to unpromote */
        thread = current_thread();
-       rwlock_count = thread->rwlock_count--;
+       if (fake_lck.can_sleep) {
+               rwlock_count = thread->rwlock_count--;
+       } else {
+               rwlock_count = UINT32_MAX;
+       }
 #if MACH_LDEBUG
-       if (rwlock_count == 0)
+       if (rwlock_count == 0) {
                panic("rw lock count underflow for thread %p", thread);
+       }
 #endif
        if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
                /* sched_flags checked without lock, but will be rechecked while clearing */
@@ -2121,25 +1864,24 @@ lck_rw_done_gen(
  */
 static void
 lck_rw_lock_shared_gen(
-       lck_rw_t        *lck)
+       lck_rw_t        *lck)
 {
-       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
-       lck_rw_word_t           word;
-       boolean_t               gotlock = 0;
-       int                     slept = 0;
-       wait_result_t           res = 0;
-       boolean_t               istate;
+       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
+       lck_rw_word_t           word;
+       boolean_t               gotlock = 0;
+       int                     slept = 0;
+       wait_result_t           res = 0;
+       boolean_t               istate;
 
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        uint64_t wait_interval = 0;
        int readers_at_sleep = 0;
        boolean_t dtrace_ls_initialized = FALSE;
        boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
 #endif /* CONFIG_DTRACE */
 
-       while ( !lck_rw_grab(lck, LCK_RW_GRAB_SHARED, FALSE)) {
-
-#if    CONFIG_DTRACE
+       while (!lck_rw_grab(lck, LCK_RW_GRAB_SHARED, FALSE)) {
+#if     CONFIG_DTRACE
                if (dtrace_ls_initialized == FALSE) {
                        dtrace_ls_initialized = TRUE;
                        dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
@@ -2157,37 +1899,36 @@ lck_rw_lock_shared_gen(
 #endif
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
-                            trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
+                   trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
 
                gotlock = lck_rw_grab(lck, LCK_RW_GRAB_SHARED, TRUE);
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
-                            trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, gotlock, 0);
+                   trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, gotlock, 0);
 
-               if (gotlock)
+               if (gotlock) {
                        break;
+               }
                /*
                 * if we get here, the deadline has expired w/o us
                 * being able to grab the lock for read
                 * check to see if we're allowed to do a thread_block
                 */
                if (lck->lck_rw_can_sleep) {
-
                        istate = lck_interlock_lock(lck);
 
                        word.data = ordered_load_rw(lck);
                        if ((word.want_excl || word.want_upgrade) &&
                            ((word.shared_count == 0) || word.priv_excl)) {
-
                                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
-                                            trace_lck, word.want_excl, word.want_upgrade, 0, 0);
+                                   trace_lck, word.want_excl, word.want_upgrade, 0, 0);
 
                                word.r_waiting = 1;
                                ordered_store_rw(lck, word.data);
 
                                thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
                                res = assert_wait(LCK_RW_READER_EVENT(lck),
-                                               THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
+                                   THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
                                lck_interlock_unlock(lck, istate);
 
                                if (res == THREAD_WAITING) {
@@ -2195,7 +1936,7 @@ lck_rw_lock_shared_gen(
                                        slept++;
                                }
                                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
-                                            trace_lck, res, slept, 0, 0);
+                                   trace_lck, res, slept, 0, 0);
                        } else {
                                word.shared_count++;
                                ordered_store_rw(lck, word.data);
@@ -2205,25 +1946,28 @@ lck_rw_lock_shared_gen(
                }
        }
 
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        if (dtrace_ls_enabled == TRUE) {
                if (slept == 0) {
-                       LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
+                       LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
                } else {
-                       LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
+                       LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
                            mach_absolute_time() - wait_interval, 0,
                            (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
                }
        }
        LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
-#endif /* CONFIG_DTRACE */
+#endif  /* CONFIG_DTRACE */
 }
 
-
+/*
+ * Required to verify thread ownership for exclusive locks by virtue of PPL
+ * usage
+ */
 void
 lck_rw_assert(
-       lck_rw_t                *lck,
-       unsigned int    type)
+       lck_rw_t                *lck,
+       unsigned int    type)
 {
        switch (type) {
        case LCK_RW_ASSERT_SHARED:
@@ -2234,22 +1978,23 @@ lck_rw_assert(
                break;
        case LCK_RW_ASSERT_EXCLUSIVE:
                if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
-                       (lck->lck_rw_shared_count == 0) &&
+                   (lck->lck_rw_shared_count == 0) &&
                    (lck->lck_rw_owner == current_thread())) {
                        return;
                }
                break;
        case LCK_RW_ASSERT_HELD:
-               if (lck->lck_rw_shared_count != 0)
-                       return;         // Held shared
+               if (lck->lck_rw_shared_count != 0) {
+                       return;         // Held shared
+               }
                if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
                    (lck->lck_rw_owner == current_thread())) {
-                       return;         // Held exclusive
+                       return;         // Held exclusive
                }
                break;
        case LCK_RW_ASSERT_NOTHELD:
                if ((lck->lck_rw_shared_count == 0) &&
-                  !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
+                   !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
                    (lck->lck_rw_owner == THREAD_NULL)) {
                        return;
                }
@@ -2266,7 +2011,8 @@ lck_rw_assert(
  * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
  */
 boolean_t
-kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck) {
+kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck)
+{
        if (not_in_kdp) {
                panic("panic: rw lock exclusive check done outside of kernel debugger");
        }
@@ -2282,26 +2028,25 @@ kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck) {
  * Forward declaration
  */
 
-void 
+void
 lck_mtx_ext_init(
-                lck_mtx_ext_t * lck,
-                lck_grp_t * grp,
-                lck_attr_t * attr);
+       lck_mtx_ext_t * lck,
+       lck_grp_t * grp,
+       lck_attr_t * attr);
 
 /*
  *      Routine:        lck_mtx_alloc_init
  */
 lck_mtx_t      *
 lck_mtx_alloc_init(
-                  lck_grp_t * grp,
-                  lck_attr_t * attr)
+       lck_grp_t * grp,
+       lck_attr_t * attr)
 {
        lck_mtx_t      *lck;
 
-       if ((lck = (lck_mtx_t *) kalloc(sizeof(lck_mtx_t))) != 0)
-               lck_mtx_init(lck, grp, attr);
-
-       return (lck);
+       lck = zalloc(ZV_LCK_MTX);
+       lck_mtx_init(lck, grp, attr);
+       return lck;
 }
 
 /*
@@ -2309,11 +2054,11 @@ lck_mtx_alloc_init(
  */
 void
 lck_mtx_free(
-            lck_mtx_t * lck,
-            lck_grp_t * grp)
+       lck_mtx_t * lck,
+       lck_grp_t * grp)
 {
        lck_mtx_destroy(lck, grp);
-       kfree((void *) lck, sizeof(lck_mtx_t));
+       zfree(ZV_LCK_MTX, lck);
 }
 
 /*
@@ -2321,34 +2066,33 @@ lck_mtx_free(
  */
 void
 lck_mtx_init(
-            lck_mtx_t * lck,
-            lck_grp_t * grp,
-            lck_attr_t * attr)
+       lck_mtx_t * lck,
+       lck_grp_t * grp,
+       lck_attr_t * attr)
 {
-#ifdef BER_XXX
+#ifdef  BER_XXX
        lck_mtx_ext_t  *lck_ext;
 #endif
        lck_attr_t     *lck_attr;
 
-       if (attr != LCK_ATTR_NULL)
+       if (attr != LCK_ATTR_NULL) {
                lck_attr = attr;
-       else
+       } else {
                lck_attr = &LockDefaultLckAttr;
+       }
 
-#ifdef BER_XXX
+#ifdef  BER_XXX
        if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
-               if ((lck_ext = (lck_mtx_ext_t *) kalloc(sizeof(lck_mtx_ext_t))) != 0) {
-                       lck_mtx_ext_init(lck_ext, grp, lck_attr);
-                       lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
-                       lck->lck_mtx_ptr = lck_ext;
-                       lck->lck_mtx_type = LCK_MTX_TYPE;
-               }
+               lck_ext = zalloc(ZV_LCK_MTX_EXT);
+               lck_mtx_ext_init(lck_ext, grp, lck_attr);
+               lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
+               lck->lck_mtx_ptr = lck_ext;
+               lck->lck_mtx_type = LCK_MTX_TYPE;
        } else
 #endif
        {
-               lck->lck_mtx_ptr = NULL;                // Clear any padding in the union fields below
+               lck->lck_mtx_ptr = NULL;                // Clear any padding in the union fields below
                lck->lck_mtx_waiters = 0;
-               lck->lck_mtx_pri = 0;
                lck->lck_mtx_type = LCK_MTX_TYPE;
                ordered_store_mtx(lck, 0);
        }
@@ -2361,17 +2105,18 @@ lck_mtx_init(
  */
 void
 lck_mtx_init_ext(
-                lck_mtx_t * lck,
-                lck_mtx_ext_t * lck_ext,
-                lck_grp_t * grp,
-                lck_attr_t * attr)
+       lck_mtx_t * lck,
+       lck_mtx_ext_t * lck_ext,
+       lck_grp_t * grp,
+       lck_attr_t * attr)
 {
        lck_attr_t     *lck_attr;
 
-       if (attr != LCK_ATTR_NULL)
+       if (attr != LCK_ATTR_NULL) {
                lck_attr = attr;
-       else
+       } else {
                lck_attr = &LockDefaultLckAttr;
+       }
 
        if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
                lck_mtx_ext_init(lck_ext, grp, lck_attr);
@@ -2380,7 +2125,6 @@ lck_mtx_init_ext(
                lck->lck_mtx_type = LCK_MTX_TYPE;
        } else {
                lck->lck_mtx_waiters = 0;
-               lck->lck_mtx_pri = 0;
                lck->lck_mtx_type = LCK_MTX_TYPE;
                ordered_store_mtx(lck, 0);
        }
@@ -2393,9 +2137,9 @@ lck_mtx_init_ext(
  */
 void
 lck_mtx_ext_init(
-                lck_mtx_ext_t * lck,
-                lck_grp_t * grp,
-                lck_attr_t * attr)
+       lck_mtx_ext_t * lck,
+       lck_grp_t * grp,
+       lck_attr_t * attr)
 {
        bzero((void *) lck, sizeof(lck_mtx_ext_t));
 
@@ -2407,8 +2151,9 @@ lck_mtx_ext_init(
        }
        lck->lck_mtx_grp = grp;
 
-       if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
+       if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) {
                lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
+       }
 }
 
 /* The slow versions */
@@ -2416,6 +2161,9 @@ static void lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t i
 static boolean_t lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread);
 static void lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
 
+/* The adaptive spin function */
+static spinwait_result_t lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
+
 /*
  *     Routine:        lck_mtx_verify
  *
@@ -2424,12 +2172,14 @@ static void lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t
 static inline void
 lck_mtx_verify(lck_mtx_t *lock)
 {
-       if (lock->lck_mtx_type != LCK_MTX_TYPE)
+       if (lock->lck_mtx_type != LCK_MTX_TYPE) {
                panic("Invalid mutex %p", lock);
-#if    DEVELOPMENT || DEBUG
-       if (lock->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
+       }
+#if     DEVELOPMENT || DEBUG
+       if (lock->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
                panic("Mutex destroyed %p", lock);
-#endif /* DEVELOPMENT || DEBUG */
+       }
+#endif  /* DEVELOPMENT || DEBUG */
 }
 
 /*
@@ -2441,11 +2191,16 @@ lck_mtx_verify(lck_mtx_t *lock)
 static inline void
 lck_mtx_check_preemption(lck_mtx_t *lock)
 {
-#if    DEVELOPMENT || DEBUG
+#if     DEVELOPMENT || DEBUG
+       if (current_cpu_datap()->cpu_hibernate) {
+               return;
+       }
+
        int pl = get_preemption_level();
 
-       if (pl != 0)
+       if (pl != 0) {
                panic("Attempt to take mutex with preemption disabled. Lock=%p, level=%d", lock, pl);
+       }
 #else
        (void)lock;
 #endif
@@ -2457,14 +2212,14 @@ lck_mtx_check_preemption(lck_mtx_t *lock)
 void
 lck_mtx_lock(lck_mtx_t *lock)
 {
-       thread_t        thread;
+       thread_t        thread;
 
        lck_mtx_verify(lock);
        lck_mtx_check_preemption(lock);
        thread = current_thread();
-       if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread),
-                                       memory_order_acquire_smp, FALSE)) {
-#if    CONFIG_DTRACE
+       if (os_atomic_cmpxchg(&lock->lck_mtx_data,
+           0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
+#if     CONFIG_DTRACE
                LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
 #endif /* CONFIG_DTRACE */
                return;
@@ -2473,79 +2228,453 @@ lck_mtx_lock(lck_mtx_t *lock)
 }
 
 /*
-       This is the slow version of mutex locking.
      This is the slow version of mutex locking.
  */
 static void NOINLINE
 lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
 {
-       thread_t        holding_thread;
-       uintptr_t       state;
-       int             waiters;
-
-       if (interlocked)
-               goto interlock_held;
-
-       /* TODO: short-duration spin for on-core contention <rdar://problem/10234625> */
+       thread_t                holding_thread;
+       uintptr_t               state;
+       int                     waiters = 0;
+       spinwait_result_t       sw_res;
+       struct turnstile        *ts = NULL;
 
        /* Loop waiting until I see that the mutex is unowned */
-       for ( ; ; ) {
-               interlock_lock(lock);
-interlock_held:
+       for (;;) {
+               sw_res = lck_mtx_lock_contended_spinwait_arm(lock, thread, interlocked);
+               interlocked = FALSE;
+
+               switch (sw_res) {
+               case SPINWAIT_ACQUIRED:
+                       if (ts != NULL) {
+                               interlock_lock(lock);
+                               turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
+                               interlock_unlock(lock);
+                       }
+                       goto done;
+               case SPINWAIT_INTERLOCK:
+                       goto set_owner;
+               default:
+                       break;
+               }
+
                state = ordered_load_mtx(lock);
                holding_thread = LCK_MTX_STATE_TO_THREAD(state);
-               if (holding_thread == NULL)
+               if (holding_thread == NULL) {
                        break;
+               }
                ordered_store_mtx(lock, (state | LCK_ILOCK | ARM_LCK_WAITERS)); // Set waiters bit and wait
-               lck_mtx_lock_wait(lock, holding_thread);
+               lck_mtx_lock_wait(lock, holding_thread, &ts);
                /* returns interlock unlocked */
        }
 
+set_owner:
        /* Hooray, I'm the new owner! */
-       waiters = lck_mtx_lock_acquire(lock);
+       state = ordered_load_mtx(lock);
+
+       if (state & ARM_LCK_WAITERS) {
+               /* Skip lck_mtx_lock_acquire if there are no waiters. */
+               waiters = lck_mtx_lock_acquire(lock, ts);
+               /*
+                * lck_mtx_lock_acquire will call
+                * turnstile_complete
+                */
+       } else {
+               if (ts != NULL) {
+                       turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
+               }
+       }
+
        state = LCK_MTX_THREAD_TO_STATE(thread);
-       if (waiters != 0)
+       if (waiters != 0) {
                state |= ARM_LCK_WAITERS;
-#if __SMP__
-       state |= LCK_ILOCK;                             // Preserve interlock
-       ordered_store_mtx(lock, state); // Set ownership
-       interlock_unlock(lock);                 // Release interlock, enable preemption
-#else
-       ordered_store_mtx(lock, state); // Set ownership
-       enable_preemption();
-#endif
+       }
+       state |= LCK_ILOCK;                             // Preserve interlock
+       ordered_store_mtx(lock, state); // Set ownership
+       interlock_unlock(lock);                 // Release interlock, enable preemption
+
+done:
        load_memory_barrier();
 
-#if    CONFIG_DTRACE
+       assert(thread->turnstile != NULL);
+
+       if (ts != NULL) {
+               turnstile_cleanup();
+       }
+
+#if CONFIG_DTRACE
        LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
 #endif /* CONFIG_DTRACE */
 }
 
+/*
+ * Routine: lck_mtx_lock_spinwait_arm
+ *
+ * Invoked trying to acquire a mutex when there is contention but
+ * the holder is running on another processor. We spin for up to a maximum
+ * time waiting for the lock to be released.
+ */
+static spinwait_result_t
+lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
+{
+       int                     has_interlock = (int)interlocked;
+       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
+       thread_t        owner, prev_owner;
+       uint64_t        window_deadline, sliding_deadline, high_deadline;
+       uint64_t        start_time, cur_time, avg_hold_time, bias, delta;
+       int             loopcount = 0;
+       uint            i, prev_owner_cpu;
+       int             total_hold_time_samples, window_hold_time_samples, unfairness;
+       bool            owner_on_core, adjust;
+       uintptr_t       state, new_state, waiters;
+       spinwait_result_t       retval = SPINWAIT_DID_SPIN_HIGH_THR;
+
+       if (__improbable(!(lck_mtx_adaptive_spin_mode & ADAPTIVE_SPIN_ENABLE))) {
+               if (!has_interlock) {
+                       interlock_lock(lock);
+               }
+
+               return SPINWAIT_DID_NOT_SPIN;
+       }
+
+       KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
+           trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, 0, 0);
+
+       start_time = mach_absolute_time();
+       /*
+        * window_deadline represents the "learning" phase.
+        * The thread collects statistics about the lock during
+        * window_deadline and then it makes a decision on whether to spin more
+        * or block according to the concurrency behavior
+        * observed.
+        *
+        * Every thread can spin at least low_MutexSpin.
+        */
+       window_deadline = start_time + low_MutexSpin;
+       /*
+        * Sliding_deadline is the adjusted spin deadline
+        * computed after the "learning" phase.
+        */
+       sliding_deadline = window_deadline;
+       /*
+        * High_deadline is a hard deadline. No thread
+        * can spin more than this deadline.
+        */
+       if (high_MutexSpin >= 0) {
+               high_deadline = start_time + high_MutexSpin;
+       } else {
+               high_deadline = start_time + low_MutexSpin * real_ncpus;
+       }
+
+       /*
+        * Do not know yet which is the owner cpu.
+        * Initialize prev_owner_cpu with next cpu.
+        */
+       prev_owner_cpu = (cpu_number() + 1) % real_ncpus;
+       total_hold_time_samples = 0;
+       window_hold_time_samples = 0;
+       avg_hold_time = 0;
+       adjust = TRUE;
+       bias = (os_hash_kernel_pointer(lock) + cpu_number()) % real_ncpus;
+
+       /* Snoop the lock state */
+       state = ordered_load_mtx(lock);
+       owner = LCK_MTX_STATE_TO_THREAD(state);
+       prev_owner = owner;
+
+       if (has_interlock) {
+               if (owner == NULL) {
+                       retval = SPINWAIT_INTERLOCK;
+                       goto done_spinning;
+               } else {
+                       /*
+                        * We are holding the interlock, so
+                        * we can safely dereference owner.
+                        */
+                       if (!machine_thread_on_core(owner) || (owner->state & TH_IDLE)) {
+                               retval = SPINWAIT_DID_NOT_SPIN;
+                               goto done_spinning;
+                       }
+               }
+               interlock_unlock(lock);
+               has_interlock = 0;
+       }
+
+       /*
+        * Spin while:
+        *   - mutex is locked, and
+        *   - it's locked as a spin lock, and
+        *   - owner is running on another processor, and
+        *   - we haven't spun for long enough.
+        */
+       do {
+               /*
+                * Try to acquire the lock.
+                */
+               owner = LCK_MTX_STATE_TO_THREAD(state);
+               if (owner == NULL) {
+                       waiters = state & ARM_LCK_WAITERS;
+                       if (waiters) {
+                               /*
+                                * preserve the waiter bit
+                                * and try acquire the interlock.
+                                * Note: we will successfully acquire
+                                * the interlock only if we can also
+                                * acquire the lock.
+                                */
+                               new_state = ARM_LCK_WAITERS | LCK_ILOCK;
+                               has_interlock = 1;
+                               retval = SPINWAIT_INTERLOCK;
+                               disable_preemption();
+                       } else {
+                               new_state = LCK_MTX_THREAD_TO_STATE(thread);
+                               retval = SPINWAIT_ACQUIRED;
+                       }
+
+                       /*
+                        * The cmpxchg will succed only if the lock
+                        * is not owned (doesn't have an owner set)
+                        * and it is not interlocked.
+                        * It will not fail if there are waiters.
+                        */
+                       if (os_atomic_cmpxchgv(&lock->lck_mtx_data,
+                           waiters, new_state, &state, acquire)) {
+                               goto done_spinning;
+                       } else {
+                               if (waiters) {
+                                       has_interlock = 0;
+                                       enable_preemption();
+                               }
+                       }
+               }
+
+               cur_time = mach_absolute_time();
+
+               /*
+                * Never spin past high_deadline.
+                */
+               if (cur_time >= high_deadline) {
+                       retval = SPINWAIT_DID_SPIN_HIGH_THR;
+                       break;
+               }
+
+               /*
+                * Check if owner is on core. If not block.
+                */
+               owner = LCK_MTX_STATE_TO_THREAD(state);
+               if (owner) {
+                       i = prev_owner_cpu;
+                       owner_on_core = FALSE;
+
+                       disable_preemption();
+                       state = ordered_load_mtx(lock);
+                       owner = LCK_MTX_STATE_TO_THREAD(state);
+
+                       /*
+                        * For scalability we want to check if the owner is on core
+                        * without locking the mutex interlock.
+                        * If we do not lock the mutex interlock, the owner that we see might be
+                        * invalid, so we cannot dereference it. Therefore we cannot check
+                        * any field of the thread to tell us if it is on core.
+                        * Check if the thread that is running on the other cpus matches the owner.
+                        */
+                       if (owner) {
+                               do {
+                                       cpu_data_t *cpu_data_ptr = CpuDataEntries[i].cpu_data_vaddr;
+                                       if ((cpu_data_ptr != NULL) && (cpu_data_ptr->cpu_active_thread == owner)) {
+                                               owner_on_core = TRUE;
+                                               break;
+                                       }
+                                       if (++i >= real_ncpus) {
+                                               i = 0;
+                                       }
+                               } while (i != prev_owner_cpu);
+                               enable_preemption();
+
+                               if (owner_on_core) {
+                                       prev_owner_cpu = i;
+                               } else {
+                                       prev_owner = owner;
+                                       state = ordered_load_mtx(lock);
+                                       owner = LCK_MTX_STATE_TO_THREAD(state);
+                                       if (owner == prev_owner) {
+                                               /*
+                                                * Owner is not on core.
+                                                * Stop spinning.
+                                                */
+                                               if (loopcount == 0) {
+                                                       retval = SPINWAIT_DID_NOT_SPIN;
+                                               } else {
+                                                       retval = SPINWAIT_DID_SPIN_OWNER_NOT_CORE;
+                                               }
+                                               break;
+                                       }
+                                       /*
+                                        * Fall through if the owner changed while we were scanning.
+                                        * The new owner could potentially be on core, so loop
+                                        * again.
+                                        */
+                               }
+                       } else {
+                               enable_preemption();
+                       }
+               }
+
+               /*
+                * Save how many times we see the owner changing.
+                * We can roughly estimate the the mutex hold
+                * time and the fairness with that.
+                */
+               if (owner != prev_owner) {
+                       prev_owner = owner;
+                       total_hold_time_samples++;
+                       window_hold_time_samples++;
+               }
+
+               /*
+                * Learning window expired.
+                * Try to adjust the sliding_deadline.
+                */
+               if (cur_time >= window_deadline) {
+                       /*
+                        * If there was not contention during the window
+                        * stop spinning.
+                        */
+                       if (window_hold_time_samples < 1) {
+                               retval = SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION;
+                               break;
+                       }
+
+                       if (adjust) {
+                               /*
+                                * For a fair lock, we'd wait for at most (NCPU-1) periods,
+                                * but the lock is unfair, so let's try to estimate by how much.
+                                */
+                               unfairness = total_hold_time_samples / real_ncpus;
+
+                               if (unfairness == 0) {
+                                       /*
+                                        * We observed the owner changing `total_hold_time_samples` times which
+                                        * let us estimate the average hold time of this mutex for the duration
+                                        * of the spin time.
+                                        * avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
+                                        *
+                                        * In this case spin at max avg_hold_time * (real_ncpus - 1)
+                                        */
+                                       delta = cur_time - start_time;
+                                       sliding_deadline = start_time + (delta * (real_ncpus - 1)) / total_hold_time_samples;
+                               } else {
+                                       /*
+                                        * In this case at least one of the other cpus was able to get the lock twice
+                                        * while I was spinning.
+                                        * We could spin longer but it won't necessarily help if the system is unfair.
+                                        * Try to randomize the wait to reduce contention.
+                                        *
+                                        * We compute how much time we could potentially spin
+                                        * and distribute it over the cpus.
+                                        *
+                                        * bias is an integer between 0 and real_ncpus.
+                                        * distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
+                                        */
+                                       delta = high_deadline - cur_time;
+                                       sliding_deadline = cur_time + ((delta * bias) / real_ncpus);
+                                       adjust = FALSE;
+                               }
+                       }
+
+                       window_deadline += low_MutexSpin;
+                       window_hold_time_samples = 0;
+               }
+
+               /*
+                * Stop spinning if we past
+                * the adjusted deadline.
+                */
+               if (cur_time >= sliding_deadline) {
+                       retval = SPINWAIT_DID_SPIN_SLIDING_THR;
+                       break;
+               }
+
+               /*
+                * We want to arm the monitor for wfe,
+                * so load exclusively the lock.
+                *
+                * NOTE:
+                * we rely on the fact that wfe will
+                * eventually return even if the cache line
+                * is not modified. This way we will keep
+                * looping and checking if the deadlines expired.
+                */
+               state = os_atomic_load_exclusive(&lock->lck_mtx_data, relaxed);
+               owner = LCK_MTX_STATE_TO_THREAD(state);
+               if (owner != NULL) {
+                       wait_for_event();
+                       state = ordered_load_mtx(lock);
+               } else {
+                       atomic_exchange_abort();
+               }
+
+               loopcount++;
+       } while (TRUE);
+
+done_spinning:
+#if     CONFIG_DTRACE
+       /*
+        * Note that we record a different probe id depending on whether
+        * this is a direct or indirect mutex.  This allows us to
+        * penalize only lock groups that have debug/stats enabled
+        * with dtrace processing if desired.
+        */
+       if (__probable(lock->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)) {
+               LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, lock,
+                   mach_absolute_time() - start_time);
+       } else {
+               LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, lock,
+                   mach_absolute_time() - start_time);
+       }
+       /* The lockstat acquire event is recorded by the caller. */
+#endif
+
+       state = ordered_load_mtx(lock);
+
+       KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
+           trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, retval, 0);
+       if ((!has_interlock) && (retval != SPINWAIT_ACQUIRED)) {
+               /* We must own either the lock or the interlock on return. */
+               interlock_lock(lock);
+       }
+
+       return retval;
+}
+
+
 /*
  *     Common code for mutex locking as spinlock
  */
 static inline void
 lck_mtx_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
 {
-       uintptr_t       state;
+       uintptr_t       state;
 
        interlock_lock(lock);
        state = ordered_load_mtx(lock);
        if (LCK_MTX_STATE_TO_THREAD(state)) {
-               if (allow_held_as_mutex)
+               if (allow_held_as_mutex) {
                        lck_mtx_lock_contended(lock, current_thread(), TRUE);
-               else
+               } else {
                        // "Always" variants can never block. If the lock is held and blocking is not allowed
                        // then someone is mixing always and non-always calls on the same lock, which is
                        // forbidden.
                        panic("Attempting to block on a lock taken as spin-always %p", lock);
+               }
                return;
        }
-       state &= ARM_LCK_WAITERS;                                               // Preserve waiters bit
-       state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK);        // Add spin tag and maintain interlock
+       state &= ARM_LCK_WAITERS;                                               // Preserve waiters bit
+       state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK);        // Add spin tag and maintain interlock
        ordered_store_mtx(lock, state);
        load_memory_barrier();
 
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
 #endif /* CONFIG_DTRACE */
 }
@@ -2575,12 +2704,12 @@ lck_mtx_lock_spin_always(lck_mtx_t *lock)
 boolean_t
 lck_mtx_try_lock(lck_mtx_t *lock)
 {
-       thread_t        thread = current_thread();
+       thread_t        thread = current_thread();
 
        lck_mtx_verify(lock);
-       if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread),
-                                       memory_order_acquire_smp, FALSE)) {
-#if    CONFIG_DTRACE
+       if (os_atomic_cmpxchg(&lock->lck_mtx_data,
+           0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
+#if     CONFIG_DTRACE
                LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, lock, 0);
 #endif /* CONFIG_DTRACE */
                return TRUE;
@@ -2591,11 +2720,10 @@ lck_mtx_try_lock(lck_mtx_t *lock)
 static boolean_t NOINLINE
 lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread)
 {
-       thread_t        holding_thread;
-       uintptr_t       state;
-       int             waiters;
+       thread_t        holding_thread;
+       uintptr_t       state;
+       int             waiters;
 
-#if    __SMP__
        interlock_lock(lock);
        state = ordered_load_mtx(lock);
        holding_thread = LCK_MTX_STATE_TO_THREAD(state);
@@ -2603,60 +2731,48 @@ lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread)
                interlock_unlock(lock);
                return FALSE;
        }
-#else
-       disable_preemption_for_thread(thread);
-       state = ordered_load_mtx(lock);
-       if (state & LCK_ILOCK)
-               panic("Unexpected interlock set (%p)", lock);
-       holding_thread = LCK_MTX_STATE_TO_THREAD(state);
-       if (holding_thread) {
-               enable_preemption();
-               return FALSE;
-       }
-       state |= LCK_ILOCK;
-       ordered_store_mtx(lock, state);
-#endif // __SMP__
-       waiters = lck_mtx_lock_acquire(lock);
+       waiters = lck_mtx_lock_acquire(lock, NULL);
        state = LCK_MTX_THREAD_TO_STATE(thread);
-       if (waiters != 0)
+       if (waiters != 0) {
                state |= ARM_LCK_WAITERS;
-#if __SMP__
-       state |= LCK_ILOCK;                             // Preserve interlock
-       ordered_store_mtx(lock, state); // Set ownership
-       interlock_unlock(lock);                 // Release interlock, enable preemption
-#else
-       ordered_store_mtx(lock, state); // Set ownership
-       enable_preemption();
-#endif
+       }
+       state |= LCK_ILOCK;                             // Preserve interlock
+       ordered_store_mtx(lock, state); // Set ownership
+       interlock_unlock(lock);                 // Release interlock, enable preemption
        load_memory_barrier();
+
+       turnstile_cleanup();
+
        return TRUE;
 }
 
 static inline boolean_t
 lck_mtx_try_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
 {
-       uintptr_t       state;
+       uintptr_t       state;
 
-       if (!interlock_try(lock))
+       if (!interlock_try(lock)) {
                return FALSE;
+       }
        state = ordered_load_mtx(lock);
-       if(LCK_MTX_STATE_TO_THREAD(state)) {
+       if (LCK_MTX_STATE_TO_THREAD(state)) {
                // Lock is held as mutex
-               if (allow_held_as_mutex)
+               if (allow_held_as_mutex) {
                        interlock_unlock(lock);
-               else
+               } else {
                        // "Always" variants can never block. If the lock is held as a normal mutex
                        // then someone is mixing always and non-always calls on the same lock, which is
                        // forbidden.
                        panic("Spin-mutex held as full mutex %p", lock);
+               }
                return FALSE;
        }
-       state &= ARM_LCK_WAITERS;                                               // Preserve waiters bit
-       state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK);        // Add spin tag and maintain interlock
+       state &= ARM_LCK_WAITERS;                                               // Preserve waiters bit
+       state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK);        // Add spin tag and maintain interlock
        ordered_store_mtx(lock, state);
        load_memory_barrier();
 
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, lock, 0);
 #endif /* CONFIG_DTRACE */
        return TRUE;
@@ -2688,22 +2804,23 @@ lck_mtx_try_lock_spin_always(lck_mtx_t *lock)
 void
 lck_mtx_unlock(lck_mtx_t *lock)
 {
-       thread_t        thread = current_thread();
-       uintptr_t       state;
-       boolean_t       ilk_held = FALSE;
+       thread_t        thread = current_thread();
+       uintptr_t       state;
+       boolean_t       ilk_held = FALSE;
 
        lck_mtx_verify(lock);
 
        state = ordered_load_mtx(lock);
        if (state & LCK_ILOCK) {
-               if(LCK_MTX_STATE_TO_THREAD(state) == (thread_t)LCK_MTX_SPIN_TAG)
-                       ilk_held = TRUE;        // Interlock is held by (presumably) this thread
+               if (LCK_MTX_STATE_TO_THREAD(state) == (thread_t)LCK_MTX_SPIN_TAG) {
+                       ilk_held = TRUE;        // Interlock is held by (presumably) this thread
+               }
                goto slow_case;
        }
        // Locked as a mutex
-       if (atomic_compare_exchange(&lock->lck_mtx_data, LCK_MTX_THREAD_TO_STATE(thread), 0,
-                                       memory_order_release_smp, FALSE)) {
-#if    CONFIG_DTRACE
+       if (os_atomic_cmpxchg(&lock->lck_mtx_data,
+           LCK_MTX_THREAD_TO_STATE(thread), 0, release)) {
+#if     CONFIG_DTRACE
                LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
 #endif /* CONFIG_DTRACE */
                return;
@@ -2715,44 +2832,44 @@ slow_case:
 static void NOINLINE
 lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t ilk_held)
 {
-       uintptr_t       state;
+       uintptr_t       state;
+       boolean_t               cleanup = FALSE;
 
        if (ilk_held) {
                state = ordered_load_mtx(lock);
        } else {
-#if    __SMP__
                interlock_lock(lock);
                state = ordered_load_mtx(lock);
-               if (thread != LCK_MTX_STATE_TO_THREAD(state))
-                       panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
-#else
-               disable_preemption_for_thread(thread);
-               state = ordered_load_mtx(lock);
-               if (state & LCK_ILOCK)
-                       panic("lck_mtx_unlock(): Unexpected interlock set (%p)", lock);
-               if (thread != LCK_MTX_STATE_TO_THREAD(state))
+               if (thread != LCK_MTX_STATE_TO_THREAD(state)) {
                        panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
-               state |= LCK_ILOCK;
-               ordered_store_mtx(lock, state);
-#endif
+               }
                if (state & ARM_LCK_WAITERS) {
-                       lck_mtx_unlock_wakeup(lock, thread);
-                       state = ordered_load_mtx(lock);
-               } else {
-            assertf(lock->lck_mtx_pri == 0, "pri=0x%x", lock->lck_mtx_pri);
+                       if (lck_mtx_unlock_wakeup(lock, thread)) {
+                               state = ARM_LCK_WAITERS;
+                       } else {
+                               state = 0;
+                       }
+                       cleanup = TRUE;
+                       goto unlock;
                }
        }
        state &= ARM_LCK_WAITERS;   /* Clear state, retain waiters bit */
-#if __SMP__
+unlock:
        state |= LCK_ILOCK;
        ordered_store_mtx(lock, state);
        interlock_unlock(lock);
-#else
-       ordered_store_mtx(lock, state);
-       enable_preemption();
-#endif
+       if (cleanup) {
+               /*
+                * Do not do any turnstile operations outside of this block.
+                * lock/unlock is called at early stage of boot with single thread,
+                * when turnstile is not yet initialized.
+                * Even without contention we can come throught the slow path
+                * if the mutex is acquired as a spin lock.
+                */
+               turnstile_cleanup();
+       }
 
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
 #endif /* CONFIG_DTRACE */
 }
@@ -2763,24 +2880,27 @@ lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t ilk_held)
 void
 lck_mtx_assert(lck_mtx_t *lock, unsigned int type)
 {
-       thread_t        thread, holder;
-       uintptr_t       state;
+       thread_t        thread, holder;
+       uintptr_t       state;
 
        state = ordered_load_mtx(lock);
        holder = LCK_MTX_STATE_TO_THREAD(state);
        if (holder == (thread_t)LCK_MTX_SPIN_TAG) {
-                       // Lock is held in spin mode, owner is unknown.
-               return; // Punt
+               // Lock is held in spin mode, owner is unknown.
+               return; // Punt
        }
        thread = current_thread();
        if (type == LCK_MTX_ASSERT_OWNED) {
-               if (thread != holder)
+               if (thread != holder) {
                        panic("lck_mtx_assert(): mutex (%p) owned", lock);
+               }
        } else if (type == LCK_MTX_ASSERT_NOTOWNED) {
-               if (thread == holder)
+               if (thread == holder) {
                        panic("lck_mtx_assert(): mutex (%p) not owned", lock);
-       } else
+               }
+       } else {
                panic("lck_mtx_assert(): invalid arg (%u)", type);
+       }
 }
 
 /*
@@ -2801,29 +2921,28 @@ lck_mtx_ilk_unlock(lck_mtx_t *lock)
 void
 lck_mtx_convert_spin(lck_mtx_t *lock)
 {
-       thread_t        thread = current_thread();
-       uintptr_t       state;
-       int                     waiters;
+       thread_t        thread = current_thread();
+       uintptr_t       state;
+       int                     waiters;
 
        state = ordered_load_mtx(lock);
-       if (LCK_MTX_STATE_TO_THREAD(state) == thread)
-               return;         // Already owned as mutex, return
-       if ((state & LCK_ILOCK) == 0 || (LCK_MTX_STATE_TO_THREAD(state) != (thread_t)LCK_MTX_SPIN_TAG))
+       if (LCK_MTX_STATE_TO_THREAD(state) == thread) {
+               return;         // Already owned as mutex, return
+       }
+       if ((state & LCK_ILOCK) == 0 || (LCK_MTX_STATE_TO_THREAD(state) != (thread_t)LCK_MTX_SPIN_TAG)) {
                panic("lck_mtx_convert_spin: Not held as spinlock (%p)", lock);
-       state &= ~(LCK_MTX_THREAD_MASK);                // Clear the spin tag
+       }
+       state &= ~(LCK_MTX_THREAD_MASK);                // Clear the spin tag
        ordered_store_mtx(lock, state);
-       waiters = lck_mtx_lock_acquire(lock);   // Acquire to manage priority boosts
+       waiters = lck_mtx_lock_acquire(lock, NULL);   // Acquire to manage priority boosts
        state = LCK_MTX_THREAD_TO_STATE(thread);
-       if (waiters != 0)
+       if (waiters != 0) {
                state |= ARM_LCK_WAITERS;
-#if __SMP__
+       }
        state |= LCK_ILOCK;
-       ordered_store_mtx(lock, state);                 // Set ownership
-       interlock_unlock(lock);                                 // Release interlock, enable preemption
-#else
-       ordered_store_mtx(lock, state);                 // Set ownership
-       enable_preemption();
-#endif
+       ordered_store_mtx(lock, state);                 // Set ownership
+       interlock_unlock(lock);                                 // Release interlock, enable preemption
+       turnstile_cleanup();
 }
 
 
@@ -2832,13 +2951,15 @@ lck_mtx_convert_spin(lck_mtx_t *lock)
  */
 void
 lck_mtx_destroy(
-               lck_mtx_t * lck,
-               lck_grp_t * grp)
+       lck_mtx_t * lck,
+       lck_grp_t * grp)
 {
-       if (lck->lck_mtx_type != LCK_MTX_TYPE)
+       if (lck->lck_mtx_type != LCK_MTX_TYPE) {
                panic("Destroying invalid mutex %p", lck);
-       if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
+       }
+       if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
                panic("Destroying previously destroyed lock %p", lck);
+       }
        lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
        lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED;
        lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
@@ -2852,39 +2973,41 @@ lck_mtx_destroy(
 void
 lck_spin_assert(lck_spin_t *lock, unsigned int type)
 {
-       thread_t        thread, holder;
-       uintptr_t       state;
+       thread_t        thread, holder;
+       uintptr_t       state;
 
-       if (lock->type != LCK_SPIN_TYPE)
+       if (lock->type != LCK_SPIN_TYPE) {
                panic("Invalid spinlock %p", lock);
+       }
 
        state = lock->lck_spin_data;
        holder = (thread_t)(state & ~LCK_ILOCK);
        thread = current_thread();
        if (type == LCK_ASSERT_OWNED) {
-               if (holder == 0)
+               if (holder == 0) {
                        panic("Lock not owned %p = %lx", lock, state);
-               if (holder != thread)
+               }
+               if (holder != thread) {
                        panic("Lock not owned by current thread %p = %lx", lock, state);
-               if ((state & LCK_ILOCK) == 0)
+               }
+               if ((state & LCK_ILOCK) == 0) {
                        panic("Lock bit not set %p = %lx", lock, state);
+               }
        } else if (type == LCK_ASSERT_NOTOWNED) {
                if (holder != 0) {
-                       if (holder == thread)
+                       if (holder == thread) {
                                panic("Lock owned by current thread %p = %lx", lock, state);
-                       else
-                               panic("Lock %p owned by thread %p", lock, holder);
+                       }
                }
-               if (state & LCK_ILOCK)
-                       panic("Lock bit set %p = %lx", lock, state);
-       } else
+       } else {
                panic("lck_spin_assert(): invalid arg (%u)", type);
+       }
 }
 
 boolean_t
 lck_rw_lock_yield_shared(lck_rw_t *lck, boolean_t force_yield)
 {
-       lck_rw_word_t   word;
+       lck_rw_word_t   word;
 
        lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
 
@@ -2906,16 +3029,18 @@ lck_rw_lock_yield_shared(lck_rw_t *lck, boolean_t force_yield)
 boolean_t
 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
 {
-       uintptr_t       state;
+       uintptr_t       state;
 
        if (not_in_kdp) {
                panic("panic: spinlock acquired check done outside of kernel debugger");
        }
        state = ordered_load_mtx(lck);
-       if (state == LCK_MTX_TAG_DESTROYED)
+       if (state == LCK_MTX_TAG_DESTROYED) {
                return FALSE;
-       if (LCK_MTX_STATE_TO_THREAD(state) || (state & LCK_ILOCK))
+       }
+       if (LCK_MTX_STATE_TO_THREAD(state) || (state & LCK_ILOCK)) {
                return TRUE;
+       }
        return FALSE;
 }
 
@@ -2938,18 +3063,18 @@ kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_wa
 void
 kdp_rwlck_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
 {
-       lck_rw_t        *rwlck = NULL;
-       switch(waitinfo->wait_type) {
-               case kThreadWaitKernelRWLockRead:
-                       rwlck = READ_EVENT_TO_RWLOCK(event);
-                       break;
-               case kThreadWaitKernelRWLockWrite:
-               case kThreadWaitKernelRWLockUpgrade:
-                       rwlck = WRITE_EVENT_TO_RWLOCK(event);
-                       break;
-               default:
-                       panic("%s was called with an invalid blocking type", __FUNCTION__);
-                       break;
+       lck_rw_t        *rwlck = NULL;
+       switch (waitinfo->wait_type) {
+       case kThreadWaitKernelRWLockRead:
+               rwlck = READ_EVENT_TO_RWLOCK(event);
+               break;
+       case kThreadWaitKernelRWLockWrite:
+       case kThreadWaitKernelRWLockUpgrade:
+               rwlck = WRITE_EVENT_TO_RWLOCK(event);
+               break;
+       default:
+               panic("%s was called with an invalid blocking type", __FUNCTION__);
+               break;
        }
        waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
        waitinfo->owner = thread_tid(rwlck->lck_rw_owner);