git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2007-2018 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*
	29	* @OSF_COPYRIGHT@
	30	*/
	31	/*
	32	* Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
	33	* Mellon University All Rights Reserved.
	34	*
	35	* Permission to use, copy, modify and distribute this software and its
	36	* documentation is hereby granted, provided that both the copyright notice
	37	* and this permission notice appear in all copies of the software,
	38	* derivative works or modified versions, and any portions thereof, and that
	39	* both notices appear in supporting documentation.
	40	*
	41	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
	42	* CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
	43	* WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	44	*
	45	* Carnegie Mellon requests users of this software to return to
	46	*
	47	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	48	* School of Computer Science Carnegie Mellon University Pittsburgh PA
	49	* 15213-3890
	50	*
	51	* any improvements or extensions that they make and grant Carnegie Mellon the
	52	* rights to redistribute these changes.
	53	*/
	54	/*
	55	* File: kern/lock.c
	56	* Author: Avadis Tevanian, Jr., Michael Wayne Young
	57	* Date: 1985
	58	*
	59	* Locking primitives implementation
	60	*/
	61
	62	#define LOCK_PRIVATE 1
	63
	64	#include <mach_ldebug.h>
	65
	66	#include <kern/zalloc.h>
	67	#include <kern/lock_stat.h>
	68	#include <kern/locks.h>
	69	#include <kern/misc_protos.h>
	70	#include <kern/thread.h>
	71	#include <kern/processor.h>
	72	#include <kern/sched_prim.h>
	73	#include <kern/debug.h>
	74	#include <kern/kcdata.h>
	75	#include <string.h>
	76	#include <arm/cpu_internal.h>
	77	#include <os/hash.h>
	78	#include <arm/cpu_data.h>
	79
	80	#include <arm/cpu_data_internal.h>
	81	#include <arm/proc_reg.h>
	82	#include <arm/smp.h>
	83	#include <machine/atomic.h>
	84	#include <machine/machine_cpu.h>
	85
	86	#include <sys/kdebug.h>
	87
	88	#if CONFIG_DTRACE
	89	#define DTRACE_RW_SHARED 0x0 //reader
	90	#define DTRACE_RW_EXCL 0x1 //writer
	91	#define DTRACE_NO_FLAG 0x0 //not applicable
	92	#endif /* CONFIG_DTRACE */
	93
	94	#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
	95	#define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
	96	#define LCK_RW_LCK_SHARED_CODE 0x102
	97	#define LCK_RW_LCK_SH_TO_EX_CODE 0x103
	98	#define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
	99	#define LCK_RW_LCK_EX_TO_SH_CODE 0x105
	100
	101
	102	#define ANY_LOCK_DEBUG (USLOCK_DEBUG \|\| LOCK_DEBUG \|\| MUTEX_DEBUG)
	103
	104	// Panic in tests that check lock usage correctness
	105	// These are undesirable when in a panic or a debugger is runnning.
	106	#define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
	107
	108	#define ADAPTIVE_SPIN_ENABLE 0x1
	109
	110	int lck_mtx_adaptive_spin_mode = ADAPTIVE_SPIN_ENABLE;
	111
	112	#define SPINWAIT_OWNER_CHECK_COUNT 4
	113
	114	typedef enum {
	115	SPINWAIT_ACQUIRED, /* Got the lock. */
	116	SPINWAIT_INTERLOCK, /* Got the interlock, no owner, but caller must finish acquiring the lock. */
	117	SPINWAIT_DID_SPIN_HIGH_THR, /* Got the interlock, spun, but failed to get the lock. */
	118	SPINWAIT_DID_SPIN_OWNER_NOT_CORE, /* Got the interlock, spun, but failed to get the lock. */
	119	SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION, /* Got the interlock, spun, but failed to get the lock. */
	120	SPINWAIT_DID_SPIN_SLIDING_THR,/* Got the interlock, spun, but failed to get the lock. */
	121	SPINWAIT_DID_NOT_SPIN, /* Got the interlock, did not spin. */
	122	} spinwait_result_t;
	123
	124	#if CONFIG_DTRACE
	125	extern uint64_t dtrace_spin_threshold;
	126	#endif
	127
	128	/* Forwards */
	129
	130	extern unsigned int not_in_kdp;
	131
	132	/*
	133	* We often want to know the addresses of the callers
	134	* of the various lock routines. However, this information
	135	* is only used for debugging and statistics.
	136	*/
	137	typedef void *pc_t;
	138	#define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
	139	#define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
	140
	141	#ifdef lint
	142	/*
	143	* Eliminate lint complaints about unused local pc variables.
	144	*/
	145	#define OBTAIN_PC(pc, l) ++pc
	146	#else /* lint */
	147	#define OBTAIN_PC(pc, l)
	148	#endif /* lint */
	149
	150
	151	/*
	152	* Portable lock package implementation of usimple_locks.
	153	*/
	154
	155	/*
	156	* Owner thread pointer when lock held in spin mode
	157	*/
	158	#define LCK_MTX_SPIN_TAG 0xfffffff0
	159
	160
	161	#define interlock_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
	162	#define interlock_try(lock) hw_lock_bit_try((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
	163	#define interlock_unlock(lock) hw_unlock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
	164	#define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT, LCK_GRP_NULL)
	165	#define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
	166
	167	#define load_memory_barrier() os_atomic_thread_fence(acquire)
	168
	169	// Enforce program order of loads and stores.
	170	#define ordered_load(target) \
	171	os_atomic_load(target, compiler_acq_rel)
	172	#define ordered_store(target, value) \
	173	os_atomic_store(target, value, compiler_acq_rel)
	174
	175	#define ordered_load_mtx(lock) ordered_load(&(lock)->lck_mtx_data)
	176	#define ordered_store_mtx(lock, value) ordered_store(&(lock)->lck_mtx_data, (value))
	177	#define ordered_load_rw(lock) ordered_load(&(lock)->lck_rw_data)
	178	#define ordered_store_rw(lock, value) ordered_store(&(lock)->lck_rw_data, (value))
	179	#define ordered_load_rw_owner(lock) ordered_load(&(lock)->lck_rw_owner)
	180	#define ordered_store_rw_owner(lock, value) ordered_store(&(lock)->lck_rw_owner, (value))
	181	#define ordered_load_hw(lock) ordered_load(&(lock)->lock_data)
	182	#define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, (value))
	183	#define ordered_load_bit(lock) ordered_load((lock))
	184	#define ordered_store_bit(lock, value) ordered_store((lock), (value))
	185
	186
	187	// Prevent the compiler from reordering memory operations around this
	188	#define compiler_memory_fence() __asm__ volatile ("" ::: "memory")
	189
	190	#define LOCK_PANIC_TIMEOUT 0xc00000
	191	#define NOINLINE __attribute__((noinline))
	192
	193
	194	#if __arm__
	195	#define interrupts_disabled(mask) (mask & PSR_INTMASK)
	196	#else
	197	#define interrupts_disabled(mask) (mask & DAIF_IRQF)
	198	#endif
	199
	200
	201	#if __arm__
	202	#define enable_fiq() __asm__ volatile ("cpsie f" ::: "memory");
	203	#define enable_interrupts() __asm__ volatile ("cpsie if" ::: "memory");
	204	#endif
	205
	206	ZONE_VIEW_DEFINE(ZV_LCK_SPIN, "lck_spin",
	207	KHEAP_ID_DEFAULT, sizeof(lck_spin_t));
	208
	209	ZONE_VIEW_DEFINE(ZV_LCK_MTX, "lck_mtx",
	210	KHEAP_ID_DEFAULT, sizeof(lck_mtx_t));
	211
	212	ZONE_VIEW_DEFINE(ZV_LCK_MTX_EXT, "lck_mtx_ext",
	213	KHEAP_ID_DEFAULT, sizeof(lck_mtx_ext_t));
	214
	215	ZONE_VIEW_DEFINE(ZV_LCK_RW, "lck_rw",
	216	KHEAP_ID_DEFAULT, sizeof(lck_rw_t));
	217
	218	/*
	219	* Forward declarations
	220	*/
	221
	222	static void lck_rw_lock_shared_gen(lck_rw_t *lck);
	223	static void lck_rw_lock_exclusive_gen(lck_rw_t *lck);
	224	static boolean_t lck_rw_lock_shared_to_exclusive_success(lck_rw_t *lck);
	225	static boolean_t lck_rw_lock_shared_to_exclusive_failure(lck_rw_t *lck, uint32_t prior_lock_state);
	226	static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t *lck, uint32_t prior_lock_state);
	227	static lck_rw_type_t lck_rw_done_gen(lck_rw_t *lck, uint32_t prior_lock_state);
	228	static boolean_t lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait);
	229
	230	/*
	231	* atomic exchange API is a low level abstraction of the operations
	232	* to atomically read, modify, and write a pointer. This abstraction works
	233	* for both Intel and ARMv8.1 compare and exchange atomic instructions as
	234	* well as the ARM exclusive instructions.
	235	*
	236	* atomic_exchange_begin() - begin exchange and retrieve current value
	237	* atomic_exchange_complete() - conclude an exchange
	238	* atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
	239	*/
	240	__unused static uint32_t
	241	load_exclusive32(uint32_t *target, enum memory_order ord)
	242	{
	243	uint32_t value;
	244
	245	#if __arm__
	246	if (_os_atomic_mo_has_release(ord)) {
	247	// Pre-load release barrier
	248	atomic_thread_fence(memory_order_release);
	249	}
	250	value = __builtin_arm_ldrex(target);
	251	#else
	252	if (_os_atomic_mo_has_acquire(ord)) {
	253	value = __builtin_arm_ldaex(target); // ldaxr
	254	} else {
	255	value = __builtin_arm_ldrex(target); // ldxr
	256	}
	257	#endif // __arm__
	258	return value;
	259	}
	260
	261	__unused static boolean_t
	262	store_exclusive32(uint32_t *target, uint32_t value, enum memory_order ord)
	263	{
	264	boolean_t err;
	265
	266	#if __arm__
	267	err = __builtin_arm_strex(value, target);
	268	if (_os_atomic_mo_has_acquire(ord)) {
	269	// Post-store acquire barrier
	270	atomic_thread_fence(memory_order_acquire);
	271	}
	272	#else
	273	if (_os_atomic_mo_has_release(ord)) {
	274	err = __builtin_arm_stlex(value, target); // stlxr
	275	} else {
	276	err = __builtin_arm_strex(value, target); // stxr
	277	}
	278	#endif // __arm__
	279	return !err;
	280	}
	281
	282	static uint32_t
	283	atomic_exchange_begin32(uint32_t target, uint32_t previous, enum memory_order ord)
	284	{
	285	uint32_t val;
	286
	287	#if __ARM_ATOMICS_8_1
	288	ord = memory_order_relaxed;
	289	#endif
	290	val = load_exclusive32(target, ord);
	291	*previous = val;
	292	return val;
	293	}
	294
	295	static boolean_t
	296	atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
	297	{
	298	#if __ARM_ATOMICS_8_1
	299	return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
	300	#else
	301	(void)previous; // Previous not needed, monitor is held
	302	return store_exclusive32(target, newval, ord);
	303	#endif
	304	}
	305
	306	static void
	307	atomic_exchange_abort(void)
	308	{
	309	os_atomic_clear_exclusive();
	310	}
	311
	312	static boolean_t
	313	atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
	314	{
	315	uint32_t value, prev;
	316
	317	for (;;) {
	318	value = atomic_exchange_begin32(target, &prev, ord);
	319	if (value & test_mask) {
	320	if (wait) {
	321	wait_for_event(); // Wait with monitor held
	322	} else {
	323	atomic_exchange_abort(); // Clear exclusive monitor
	324	}
	325	return FALSE;
	326	}
	327	value \|= set_mask;
	328	if (atomic_exchange_complete32(target, prev, value, ord)) {
	329	return TRUE;
	330	}
	331	}
	332	}
	333
	334	inline boolean_t
	335	hw_atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
	336	{
	337	return atomic_test_and_set32(target, test_mask, set_mask, ord, wait);
	338	}
	339
	340	/*
	341	* To help _disable_preemption() inline everywhere with LTO,
	342	* we keep these nice non inlineable functions as the panic()
	343	* codegen setup is quite large and for weird reasons causes a frame.
	344	*/
	345	__abortlike
	346	static void
	347	_disable_preemption_overflow(void)
	348	{
	349	panic("Preemption count overflow");
	350	}
	351
	352	void
	353	_disable_preemption(void)
	354	{
	355	thread_t thread = current_thread();
	356	unsigned int count = thread->machine.preemption_count;
	357
	358	if (__improbable(++count == 0)) {
	359	_disable_preemption_overflow();
	360	}
	361
	362	os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
	363	}
	364
	365	/*
	366	* This function checks whether an AST_URGENT has been pended.
	367	*
	368	* It is called once the preemption has been reenabled, which means the thread
	369	* may have been preempted right before this was called, and when this function
	370	* actually performs the check, we've changed CPU.
	371	*
	372	* This race is however benign: the point of AST_URGENT is to trigger a context
	373	* switch, so if one happened, there's nothing left to check for, and AST_URGENT
	374	* was cleared in the process.
	375	*
	376	* It follows that this check cannot have false negatives, which allows us
	377	* to avoid fiddling with interrupt state for the vast majority of cases
	378	* when the check will actually be negative.
	379	*/
	380	static NOINLINE void
	381	kernel_preempt_check(thread_t thread)
	382	{
	383	cpu_data_t *cpu_data_ptr;
	384	long state;
	385
	386	#if __arm__
	387	#define INTERRUPT_MASK PSR_IRQF
	388	#else // __arm__
	389	#define INTERRUPT_MASK DAIF_IRQF
	390	#endif // __arm__
	391
	392	/*
	393	* This check is racy and could load from another CPU's pending_ast mask,
	394	* but as described above, this can't have false negatives.
	395	*/
	396	cpu_data_ptr = os_atomic_load(&thread->machine.CpuDatap, compiler_acq_rel);
	397	if (__probable((cpu_data_ptr->cpu_pending_ast & AST_URGENT) == 0)) {
	398	return;
	399	}
	400
	401	/* If interrupts are masked, we can't take an AST here */
	402	state = get_interrupts();
	403	if ((state & INTERRUPT_MASK) == 0) {
	404	disable_interrupts_noread(); // Disable interrupts
	405
	406	/*
	407	* Reload cpu_data_ptr: a context switch would cause it to change.
	408	* Now that interrupts are disabled, this will debounce false positives.
	409	*/
	410	cpu_data_ptr = os_atomic_load(&thread->machine.CpuDatap, compiler_acq_rel);
	411	if (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
	412	#if __arm__
	413	#if __ARM_USER_PROTECT__
	414	uintptr_t up = arm_user_protect_begin(thread);
	415	#endif // __ARM_USER_PROTECT__
	416	enable_fiq();
	417	#endif // __arm__
	418	ast_taken_kernel(); // Handle urgent AST
	419	#if __arm__
	420	#if __ARM_USER_PROTECT__
	421	arm_user_protect_end(thread, up, TRUE);
	422	#endif // __ARM_USER_PROTECT__
	423	enable_interrupts();
	424	return; // Return early on arm only due to FIQ enabling
	425	#endif // __arm__
	426	}
	427	restore_interrupts(state); // Enable interrupts
	428	}
	429	}
	430
	431	/*
	432	* To help _enable_preemption() inline everywhere with LTO,
	433	* we keep these nice non inlineable functions as the panic()
	434	* codegen setup is quite large and for weird reasons causes a frame.
	435	*/
	436	__abortlike
	437	static void
	438	_enable_preemption_underflow(void)
	439	{
	440	panic("Preemption count underflow");
	441	}
	442
	443	void
	444	_enable_preemption(void)
	445	{
	446	thread_t thread = current_thread();
	447	unsigned int count = thread->machine.preemption_count;
	448
	449	if (__improbable(count == 0)) {
	450	_enable_preemption_underflow();
	451	}
	452	count -= 1;
	453
	454	os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
	455	if (count == 0) {
	456	kernel_preempt_check(thread);
	457	}
	458
	459	os_compiler_barrier();
	460	}
	461
	462	int
	463	get_preemption_level(void)
	464	{
	465	return current_thread()->machine.preemption_count;
	466	}
	467
	468	/*
	469	* Routine: lck_spin_alloc_init
	470	*/
	471	lck_spin_t *
	472	lck_spin_alloc_init(
	473	lck_grp_t * grp,
	474	lck_attr_t * attr)
	475	{
	476	lck_spin_t *lck;
	477
	478	lck = zalloc(ZV_LCK_SPIN);
	479	lck_spin_init(lck, grp, attr);
	480	return lck;
	481	}
	482
	483	/*
	484	* Routine: lck_spin_free
	485	*/
	486	void
	487	lck_spin_free(
	488	lck_spin_t * lck,
	489	lck_grp_t * grp)
	490	{
	491	lck_spin_destroy(lck, grp);
	492	zfree(ZV_LCK_SPIN, lck);
	493	}
	494
	495	/*
	496	* Routine: lck_spin_init
	497	*/
	498	void
	499	lck_spin_init(
	500	lck_spin_t * lck,
	501	lck_grp_t * grp,
	502	__unused lck_attr_t * attr)
	503	{
	504	lck->type = LCK_SPIN_TYPE;
	505	hw_lock_init(&lck->hwlock);
	506	if (grp) {
	507	lck_grp_reference(grp);
	508	lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
	509	}
	510	}
	511
	512	/*
	513	* arm_usimple_lock is a lck_spin_t without a group or attributes
	514	*/
	515	void inline
	516	arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
	517	{
	518	lck->type = LCK_SPIN_TYPE;
	519	hw_lock_init(&lck->hwlock);
	520	}
	521
	522
	523	/*
	524	* Routine: lck_spin_lock
	525	*/
	526	void
	527	lck_spin_lock(lck_spin_t *lock)
	528	{
	529	#if DEVELOPMENT \|\| DEBUG
	530	if (lock->type != LCK_SPIN_TYPE) {
	531	panic("Invalid spinlock %p", lock);
	532	}
	533	#endif // DEVELOPMENT \|\| DEBUG
	534	hw_lock_lock(&lock->hwlock, LCK_GRP_NULL);
	535	}
	536
	537	void
	538	lck_spin_lock_grp(lck_spin_t lock, lck_grp_t grp)
	539	{
	540	#pragma unused(grp)
	541	#if DEVELOPMENT \|\| DEBUG
	542	if (lock->type != LCK_SPIN_TYPE) {
	543	panic("Invalid spinlock %p", lock);
	544	}
	545	#endif // DEVELOPMENT \|\| DEBUG
	546	hw_lock_lock(&lock->hwlock, grp);
	547	}
	548
	549	/*
	550	* Routine: lck_spin_lock_nopreempt
	551	*/
	552	void
	553	lck_spin_lock_nopreempt(lck_spin_t *lock)
	554	{
	555	#if DEVELOPMENT \|\| DEBUG
	556	if (lock->type != LCK_SPIN_TYPE) {
	557	panic("Invalid spinlock %p", lock);
	558	}
	559	#endif // DEVELOPMENT \|\| DEBUG
	560	hw_lock_lock_nopreempt(&lock->hwlock, LCK_GRP_NULL);
	561	}
	562
	563	void
	564	lck_spin_lock_nopreempt_grp(lck_spin_t lock, lck_grp_t grp)
	565	{
	566	#pragma unused(grp)
	567	#if DEVELOPMENT \|\| DEBUG
	568	if (lock->type != LCK_SPIN_TYPE) {
	569	panic("Invalid spinlock %p", lock);
	570	}
	571	#endif // DEVELOPMENT \|\| DEBUG
	572	hw_lock_lock_nopreempt(&lock->hwlock, grp);
	573	}
	574
	575	/*
	576	* Routine: lck_spin_try_lock
	577	*/
	578	int
	579	lck_spin_try_lock(lck_spin_t *lock)
	580	{
	581	return hw_lock_try(&lock->hwlock, LCK_GRP_NULL);
	582	}
	583
	584	int
	585	lck_spin_try_lock_grp(lck_spin_t lock, lck_grp_t grp)
	586	{
	587	#pragma unused(grp)
	588	return hw_lock_try(&lock->hwlock, grp);
	589	}
	590
	591	/*
	592	* Routine: lck_spin_try_lock_nopreempt
	593	*/
	594	int
	595	lck_spin_try_lock_nopreempt(lck_spin_t *lock)
	596	{
	597	return hw_lock_try_nopreempt(&lock->hwlock, LCK_GRP_NULL);
	598	}
	599
	600	int
	601	lck_spin_try_lock_nopreempt_grp(lck_spin_t lock, lck_grp_t grp)
	602	{
	603	#pragma unused(grp)
	604	return hw_lock_try_nopreempt(&lock->hwlock, grp);
	605	}
	606
	607	/*
	608	* Routine: lck_spin_unlock
	609	*/
	610	void
	611	lck_spin_unlock(lck_spin_t *lock)
	612	{
	613	#if DEVELOPMENT \|\| DEBUG
	614	if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
	615	panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
	616	}
	617	if (lock->type != LCK_SPIN_TYPE) {
	618	panic("Invalid spinlock type %p", lock);
	619	}
	620	#endif // DEVELOPMENT \|\| DEBUG
	621	hw_lock_unlock(&lock->hwlock);
	622	}
	623
	624	/*
	625	* Routine: lck_spin_unlock_nopreempt
	626	*/
	627	void
	628	lck_spin_unlock_nopreempt(lck_spin_t *lock)
	629	{
	630	#if DEVELOPMENT \|\| DEBUG
	631	if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
	632	panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
	633	}
	634	if (lock->type != LCK_SPIN_TYPE) {
	635	panic("Invalid spinlock type %p", lock);
	636	}
	637	#endif // DEVELOPMENT \|\| DEBUG
	638	hw_lock_unlock_nopreempt(&lock->hwlock);
	639	}
	640
	641	/*
	642	* Routine: lck_spin_destroy
	643	*/
	644	void
	645	lck_spin_destroy(
	646	lck_spin_t * lck,
	647	lck_grp_t * grp)
	648	{
	649	if (lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED) {
	650	return;
	651	}
	652	lck->lck_spin_data = LCK_SPIN_TAG_DESTROYED;
	653	if (grp) {
	654	lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
	655	lck_grp_deallocate(grp);
	656	}
	657	}
	658
	659	/*
	660	* Routine: kdp_lck_spin_is_acquired
	661	* NOT SAFE: To be used only by kernel debugger to avoid deadlock.
	662	*/
	663	boolean_t
	664	kdp_lck_spin_is_acquired(lck_spin_t *lck)
	665	{
	666	if (not_in_kdp) {
	667	panic("panic: spinlock acquired check done outside of kernel debugger");
	668	}
	669	return ((lck->lck_spin_data & ~LCK_SPIN_TAG_DESTROYED) != 0) ? TRUE:FALSE;
	670	}
	671
	672	/*
	673	* Initialize a usimple_lock.
	674	*
	675	* No change in preemption state.
	676	*/
	677	void
	678	usimple_lock_init(
	679	usimple_lock_t l,
	680	unsigned short tag)
	681	{
	682	simple_lock_init((simple_lock_t) l, tag);
	683	}
	684
	685
	686	/*
	687	* Acquire a usimple_lock.
	688	*
	689	* Returns with preemption disabled. Note
	690	* that the hw_lock routines are responsible for
	691	* maintaining preemption state.
	692	*/
	693	void
	694	(usimple_lock)(
	695	usimple_lock_t l
	696	LCK_GRP_ARG(lck_grp_t *grp))
	697	{
	698	simple_lock((simple_lock_t) l, LCK_GRP_PROBEARG(grp));
	699	}
	700
	701
	702	extern void sync(void);
	703
	704	/*
	705	* Release a usimple_lock.
	706	*
	707	* Returns with preemption enabled. Note
	708	* that the hw_lock routines are responsible for
	709	* maintaining preemption state.
	710	*/
	711	void
	712	(usimple_unlock)(
	713	usimple_lock_t l)
	714	{
	715	simple_unlock((simple_lock_t)l);
	716	}
	717
	718
	719	/*
	720	* Conditionally acquire a usimple_lock.
	721	*
	722	* On success, returns with preemption disabled.
	723	* On failure, returns with preemption in the same state
	724	* as when first invoked. Note that the hw_lock routines
	725	* are responsible for maintaining preemption state.
	726	*
	727	* XXX No stats are gathered on a miss; I preserved this
	728	* behavior from the original assembly-language code, but
	729	* doesn't it make sense to log misses? XXX
	730	*/
	731	unsigned
	732	int
	733	(usimple_lock_try)(
	734	usimple_lock_t l
	735	LCK_GRP_ARG(lck_grp_t *grp))
	736	{
	737	return simple_lock_try((simple_lock_t) l, grp);
	738	}
	739
	740	/*
	741	* The C portion of the shared/exclusive locks package.
	742	*/
	743
	744	/*
	745	* compute the deadline to spin against when
	746	* waiting for a change of state on a lck_rw_t
	747	*/
	748	static inline uint64_t
	749	lck_rw_deadline_for_spin(lck_rw_t *lck)
	750	{
	751	lck_rw_word_t word;
	752
	753	word.data = ordered_load_rw(lck);
	754	if (word.can_sleep) {
	755	if (word.r_waiting \|\| word.w_waiting \|\| (word.shared_count > machine_info.max_cpus)) {
	756	/*
	757	* there are already threads waiting on this lock... this
	758	* implies that they have spun beyond their deadlines waiting for
	759	* the desired state to show up so we will not bother spinning at this time...
	760	* or
	761	* the current number of threads sharing this lock exceeds our capacity to run them
	762	* concurrently and since all states we're going to spin for require the rw_shared_count
	763	* to be at 0, we'll not bother spinning since the latency for this to happen is
	764	* unpredictable...
	765	*/
	766	return mach_absolute_time();
	767	}
	768	return mach_absolute_time() + MutexSpin;
	769	} else {
	770	return mach_absolute_time() + (100000LL * 1000000000LL);
	771	}
	772	}
	773
	774	static boolean_t
	775	lck_rw_drain_status(lck_rw_t *lock, uint32_t status_mask, boolean_t wait __unused)
	776	{
	777	uint64_t deadline = 0;
	778	uint32_t data;
	779
	780	if (wait) {
	781	deadline = lck_rw_deadline_for_spin(lock);
	782	}
	783
	784	for (;;) {
	785	data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp);
	786	if ((data & status_mask) == 0) {
	787	break;
	788	}
	789	if (wait) {
	790	wait_for_event();
	791	} else {
	792	os_atomic_clear_exclusive();
	793	}
	794	if (!wait \|\| (mach_absolute_time() >= deadline)) {
	795	return FALSE;
	796	}
	797	}
	798	os_atomic_clear_exclusive();
	799	return TRUE;
	800	}
	801
	802	/*
	803	* Spin while interlock is held.
	804	*/
	805	static inline void
	806	lck_rw_interlock_spin(lck_rw_t *lock)
	807	{
	808	uint32_t data;
	809
	810	for (;;) {
	811	data = load_exclusive32(&lock->lck_rw_data, memory_order_relaxed);
	812	if (data & LCK_RW_INTERLOCK) {
	813	wait_for_event();
	814	} else {
	815	os_atomic_clear_exclusive();
	816	return;
	817	}
	818	}
	819	}
	820
	821	/*
	822	* We disable interrupts while holding the RW interlock to prevent an
	823	* interrupt from exacerbating hold time.
	824	* Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
	825	*/
	826	static inline boolean_t
	827	lck_interlock_lock(lck_rw_t *lck)
	828	{
	829	boolean_t istate;
	830
	831	istate = ml_set_interrupts_enabled(FALSE);
	832	lck_rw_ilk_lock(lck);
	833	return istate;
	834	}
	835
	836	static inline void
	837	lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
	838	{
	839	lck_rw_ilk_unlock(lck);
	840	ml_set_interrupts_enabled(istate);
	841	}
	842
	843
	844	#define LCK_RW_GRAB_WANT 0
	845	#define LCK_RW_GRAB_SHARED 1
	846
	847	static boolean_t
	848	lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait)
	849	{
	850	uint64_t deadline = 0;
	851	uint32_t data, prev;
	852	boolean_t do_exch;
	853
	854	if (wait) {
	855	deadline = lck_rw_deadline_for_spin(lock);
	856	}
	857
	858	for (;;) {
	859	data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
	860	if (data & LCK_RW_INTERLOCK) {
	861	atomic_exchange_abort();
	862	lck_rw_interlock_spin(lock);
	863	continue;
	864	}
	865	do_exch = FALSE;
	866	if (mode == LCK_RW_GRAB_WANT) {
	867	if ((data & LCK_RW_WANT_EXCL) == 0) {
	868	data \|= LCK_RW_WANT_EXCL;
	869	do_exch = TRUE;
	870	}
	871	} else { // LCK_RW_GRAB_SHARED
	872	if (((data & (LCK_RW_WANT_EXCL \| LCK_RW_WANT_UPGRADE)) == 0) \|\|
	873	(((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
	874	data += LCK_RW_SHARED_READER;
	875	do_exch = TRUE;
	876	}
	877	}
	878	if (do_exch) {
	879	if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
	880	return TRUE;
	881	}
	882	} else {
	883	if (wait) { // Non-waiting
	884	wait_for_event();
	885	} else {
	886	atomic_exchange_abort();
	887	}
	888	if (!wait \|\| (mach_absolute_time() >= deadline)) {
	889	return FALSE;
	890	}
	891	}
	892	}
	893	}
	894
	895
	896	/*
	897	* Routine: lck_rw_alloc_init
	898	*/
	899	lck_rw_t *
	900	lck_rw_alloc_init(
	901	lck_grp_t *grp,
	902	lck_attr_t *attr)
	903	{
	904	lck_rw_t *lck;
	905
	906	lck = zalloc_flags(ZV_LCK_RW, Z_WAITOK \| Z_ZERO);
	907	lck_rw_init(lck, grp, attr);
	908	return lck;
	909	}
	910
	911	/*
	912	* Routine: lck_rw_free
	913	*/
	914	void
	915	lck_rw_free(
	916	lck_rw_t *lck,
	917	lck_grp_t *grp)
	918	{
	919	lck_rw_destroy(lck, grp);
	920	zfree(ZV_LCK_RW, lck);
	921	}
	922
	923	/*
	924	* Routine: lck_rw_init
	925	*/
	926	void
	927	lck_rw_init(
	928	lck_rw_t *lck,
	929	lck_grp_t *grp,
	930	lck_attr_t *attr)
	931	{
	932	if (attr == LCK_ATTR_NULL) {
	933	attr = &LockDefaultLckAttr;
	934	}
	935	memset(lck, 0, sizeof(lck_rw_t));
	936	lck->lck_rw_can_sleep = TRUE;
	937	if ((attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY) == 0) {
	938	lck->lck_rw_priv_excl = TRUE;
	939	}
	940
	941	lck_grp_reference(grp);
	942	lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
	943	}
	944
	945
	946	/*
	947	* Routine: lck_rw_destroy
	948	*/
	949	void
	950	lck_rw_destroy(
	951	lck_rw_t *lck,
	952	lck_grp_t *grp)
	953	{
	954	if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) {
	955	return;
	956	}
	957	#if MACH_LDEBUG
	958	lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
	959	#endif
	960	lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
	961	lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
	962	lck_grp_deallocate(grp);
	963	return;
	964	}
	965
	966	/*
	967	* Routine: lck_rw_lock
	968	*/
	969	void
	970	lck_rw_lock(
	971	lck_rw_t *lck,
	972	lck_rw_type_t lck_rw_type)
	973	{
	974	if (lck_rw_type == LCK_RW_TYPE_SHARED) {
	975	lck_rw_lock_shared(lck);
	976	} else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
	977	lck_rw_lock_exclusive(lck);
	978	} else {
	979	panic("lck_rw_lock(): Invalid RW lock type: %x", lck_rw_type);
	980	}
	981	}
	982
	983	#define LCK_RW_LOCK_EXCLUSIVE_TAS(lck) (atomic_test_and_set32(&(lck)->lck_rw_data, \
	984	(LCK_RW_SHARED_MASK \| LCK_RW_WANT_EXCL \| LCK_RW_WANT_UPGRADE \| LCK_RW_INTERLOCK), \
	985	LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE))
	986
	987	/*
	988	* Routine: lck_rw_lock_exclusive_check_contended
	989	*/
	990	bool
	991	lck_rw_lock_exclusive_check_contended(lck_rw_t *lock)
	992	{
	993	thread_t thread = current_thread();
	994	bool contended = false;
	995
	996	if (lock->lck_rw_can_sleep) {
	997	thread->rwlock_count++;
	998	} else if (get_preemption_level() == 0) {
	999	panic("Taking non-sleepable RW lock with preemption enabled");
	1000	}
	1001	if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
	1002	#if CONFIG_DTRACE
	1003	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
	1004	#endif /* CONFIG_DTRACE */
	1005	} else {
	1006	contended = true;
	1007	lck_rw_lock_exclusive_gen(lock);
	1008	}
	1009	#if MACH_ASSERT
	1010	thread_t owner = ordered_load_rw_owner(lock);
	1011	assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
	1012	#endif
	1013	ordered_store_rw_owner(lock, thread);
	1014	return contended;
	1015	}
	1016
	1017	/*
	1018	* Routine: lck_rw_lock_exclusive
	1019	*/
	1020	void
	1021	lck_rw_lock_exclusive(lck_rw_t *lock)
	1022	{
	1023	thread_t thread = current_thread();
	1024
	1025	if (lock->lck_rw_can_sleep) {
	1026	thread->rwlock_count++;
	1027	} else if (get_preemption_level() == 0) {
	1028	panic("Taking non-sleepable RW lock with preemption enabled");
	1029	}
	1030	if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
	1031	#if CONFIG_DTRACE
	1032	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
	1033	#endif /* CONFIG_DTRACE */
	1034	} else {
	1035	lck_rw_lock_exclusive_gen(lock);
	1036	}
	1037	#if MACH_ASSERT
	1038	thread_t owner = ordered_load_rw_owner(lock);
	1039	assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
	1040	#endif
	1041	ordered_store_rw_owner(lock, thread);
	1042	}
	1043
	1044	/*
	1045	* Routine: lck_rw_lock_shared
	1046	*/
	1047	void
	1048	lck_rw_lock_shared(lck_rw_t *lock)
	1049	{
	1050	uint32_t data, prev;
	1051
	1052	if (lock->lck_rw_can_sleep) {
	1053	current_thread()->rwlock_count++;
	1054	} else if (get_preemption_level() == 0) {
	1055	panic("Taking non-sleepable RW lock with preemption enabled");
	1056	}
	1057	for (;;) {
	1058	data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
	1059	if (data & (LCK_RW_WANT_EXCL \| LCK_RW_WANT_UPGRADE \| LCK_RW_INTERLOCK)) {
	1060	atomic_exchange_abort();
	1061	lck_rw_lock_shared_gen(lock);
	1062	break;
	1063	}
	1064	data += LCK_RW_SHARED_READER;
	1065	if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
	1066	break;
	1067	}
	1068	cpu_pause();
	1069	}
	1070	#if MACH_ASSERT
	1071	thread_t owner = ordered_load_rw_owner(lock);
	1072	assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
	1073	#endif
	1074	#if CONFIG_DTRACE
	1075	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
	1076	#endif /* CONFIG_DTRACE */
	1077	return;
	1078	}
	1079
	1080	/*
	1081	* Routine: lck_rw_lock_shared_to_exclusive
	1082	*
	1083	* False returned upon failure, in this case the shared lock is dropped.
	1084	*/
	1085	boolean_t
	1086	lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
	1087	{
	1088	uint32_t data, prev;
	1089
	1090	for (;;) {
	1091	data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
	1092	if (data & LCK_RW_INTERLOCK) {
	1093	atomic_exchange_abort();
	1094	lck_rw_interlock_spin(lock);
	1095	continue;
	1096	}
	1097	if (data & LCK_RW_WANT_UPGRADE) {
	1098	data -= LCK_RW_SHARED_READER;
	1099	if ((data & LCK_RW_SHARED_MASK) == 0) { /* we were the last reader */
	1100	data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
	1101	}
	1102	if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
	1103	return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
	1104	}
	1105	} else {
	1106	data \|= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
	1107	data -= LCK_RW_SHARED_READER; /* and shed our read count */
	1108	if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
	1109	break;
	1110	}
	1111	}
	1112	cpu_pause();
	1113	}
	1114	/* we now own the WANT_UPGRADE */
	1115	if (data & LCK_RW_SHARED_MASK) { /* check to see if all of the readers are drained */
	1116	lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
	1117	}
	1118	#if MACH_ASSERT
	1119	thread_t owner = ordered_load_rw_owner(lock);
	1120	assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
	1121	#endif
	1122	ordered_store_rw_owner(lock, current_thread());
	1123	#if CONFIG_DTRACE
	1124	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
	1125	#endif /* CONFIG_DTRACE */
	1126	return TRUE;
	1127	}
	1128
	1129
	1130	/*
	1131	* Routine: lck_rw_lock_shared_to_exclusive_failure
	1132	* Function:
	1133	* Fast path code has already dropped our read
	1134	* count and determined that someone else owns 'lck_rw_want_upgrade'
	1135	* if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
	1136	* all we need to do here is determine if a wakeup is needed
	1137	*/
	1138	static boolean_t
	1139	lck_rw_lock_shared_to_exclusive_failure(
	1140	lck_rw_t *lck,
	1141	uint32_t prior_lock_state)
	1142	{
	1143	thread_t thread = current_thread();
	1144	uint32_t rwlock_count;
	1145
	1146	/* Check if dropping the lock means that we need to unpromote */
	1147	if (lck->lck_rw_can_sleep) {
	1148	rwlock_count = thread->rwlock_count--;
	1149	} else {
	1150	rwlock_count = UINT32_MAX;
	1151	}
	1152	#if MACH_LDEBUG
	1153	if (rwlock_count == 0) {
	1154	panic("rw lock count underflow for thread %p", thread);
	1155	}
	1156	#endif
	1157	if ((prior_lock_state & LCK_RW_W_WAITING) &&
	1158	((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
	1159	/*
	1160	* Someone else has requested upgrade.
	1161	* Since we've released the read lock, wake
	1162	* him up if he's blocked waiting
	1163	*/
	1164	thread_wakeup(LCK_RW_WRITER_EVENT(lck));
	1165	}
	1166
	1167	if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
	1168	/* sched_flags checked without lock, but will be rechecked while clearing */
	1169	lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
	1170	}
	1171
	1172	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) \| DBG_FUNC_NONE,
	1173	VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
	1174
	1175	return FALSE;
	1176	}
	1177
	1178	/*
	1179	* Routine: lck_rw_lock_shared_to_exclusive_success
	1180	* Function:
	1181	* assembly fast path code has already dropped our read
	1182	* count and successfully acquired 'lck_rw_want_upgrade'
	1183	* we just need to wait for the rest of the readers to drain
	1184	* and then we can return as the exclusive holder of this lock
	1185	*/
	1186	static boolean_t
	1187	lck_rw_lock_shared_to_exclusive_success(
	1188	lck_rw_t *lock)
	1189	{
	1190	__kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
	1191	int slept = 0;
	1192	lck_rw_word_t word;
	1193	wait_result_t res;
	1194	boolean_t istate;
	1195	boolean_t not_shared;
	1196
	1197	#if CONFIG_DTRACE
	1198	uint64_t wait_interval = 0;
	1199	int readers_at_sleep = 0;
	1200	boolean_t dtrace_ls_initialized = FALSE;
	1201	boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
	1202	#endif
	1203
	1204	while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE)) {
	1205	word.data = ordered_load_rw(lock);
	1206	#if CONFIG_DTRACE
	1207	if (dtrace_ls_initialized == FALSE) {
	1208	dtrace_ls_initialized = TRUE;
	1209	dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
	1210	dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
	1211	dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin \|\| dtrace_rwl_shared_to_excl_block;
	1212	if (dtrace_ls_enabled) {
	1213	/*
	1214	* Either sleeping or spinning is happening,
	1215	* start a timing of our delay interval now.
	1216	*/
	1217	readers_at_sleep = word.shared_count;
	1218	wait_interval = mach_absolute_time();
	1219	}
	1220	}
	1221	#endif
	1222
	1223	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) \| DBG_FUNC_START,
	1224	trace_lck, word.shared_count, 0, 0, 0);
	1225
	1226	not_shared = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE);
	1227
	1228	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) \| DBG_FUNC_END,
	1229	trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
	1230
	1231	if (not_shared) {
	1232	break;
	1233	}
	1234
	1235	/*
	1236	* if we get here, the spin deadline in lck_rw_wait_on_status()
	1237	* has expired w/o the rw_shared_count having drained to 0
	1238	* check to see if we're allowed to do a thread_block
	1239	*/
	1240	if (word.can_sleep) {
	1241	istate = lck_interlock_lock(lock);
	1242
	1243	word.data = ordered_load_rw(lock);
	1244	if (word.shared_count != 0) {
	1245	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) \| DBG_FUNC_START,
	1246	trace_lck, word.shared_count, 0, 0, 0);
	1247
	1248	word.w_waiting = 1;
	1249	ordered_store_rw(lock, word.data);
	1250
	1251	thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
	1252	res = assert_wait(LCK_RW_WRITER_EVENT(lock),
	1253	THREAD_UNINT \| THREAD_WAIT_NOREPORT_USER);
	1254	lck_interlock_unlock(lock, istate);
	1255
	1256	if (res == THREAD_WAITING) {
	1257	res = thread_block(THREAD_CONTINUE_NULL);
	1258	slept++;
	1259	}
	1260	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) \| DBG_FUNC_END,
	1261	trace_lck, res, slept, 0, 0);
	1262	} else {
	1263	lck_interlock_unlock(lock, istate);
	1264	break;
	1265	}
	1266	}
	1267	}
	1268	#if CONFIG_DTRACE
	1269	/*
	1270	* We infer whether we took the sleep/spin path above by checking readers_at_sleep.
	1271	*/
	1272	if (dtrace_ls_enabled == TRUE) {
	1273	if (slept == 0) {
	1274	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
	1275	} else {
	1276	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
	1277	mach_absolute_time() - wait_interval, 1,
	1278	(readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
	1279	}
	1280	}
	1281	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
	1282	#endif
	1283	return TRUE;
	1284	}
	1285
	1286
	1287	/*
	1288	* Routine: lck_rw_lock_exclusive_to_shared
	1289	*/
	1290
	1291	void
	1292	lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
	1293	{
	1294	uint32_t data, prev;
	1295
	1296	assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
	1297	ordered_store_rw_owner(lock, THREAD_NULL);
	1298	for (;;) {
	1299	data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
	1300	if (data & LCK_RW_INTERLOCK) {
	1301	atomic_exchange_abort();
	1302	lck_rw_interlock_spin(lock); /* wait for interlock to clear */
	1303	continue;
	1304	}
	1305	data += LCK_RW_SHARED_READER;
	1306	if (data & LCK_RW_WANT_UPGRADE) {
	1307	data &= ~(LCK_RW_WANT_UPGRADE);
	1308	} else {
	1309	data &= ~(LCK_RW_WANT_EXCL);
	1310	}
	1311	if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL))) {
	1312	data &= ~(LCK_RW_W_WAITING);
	1313	}
	1314	if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
	1315	break;
	1316	}
	1317	cpu_pause();
	1318	}
	1319	return lck_rw_lock_exclusive_to_shared_gen(lock, prev);
	1320	}
	1321
	1322	/*
	1323	* Routine: lck_rw_lock_exclusive_to_shared_gen
	1324	* Function:
	1325	* Fast path has already dropped
	1326	* our exclusive state and bumped lck_rw_shared_count
	1327	* all we need to do here is determine if anyone
	1328	* needs to be awakened.
	1329	*/
	1330	static void
	1331	lck_rw_lock_exclusive_to_shared_gen(
	1332	lck_rw_t *lck,
	1333	uint32_t prior_lock_state)
	1334	{
	1335	__kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
	1336	lck_rw_word_t fake_lck;
	1337
	1338	/*
	1339	* prior_lock state is a snapshot of the 1st word of the
	1340	* lock in question... we'll fake up a pointer to it
	1341	* and carefully not access anything beyond whats defined
	1342	* in the first word of a lck_rw_t
	1343	*/
	1344	fake_lck.data = prior_lock_state;
	1345
	1346	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) \| DBG_FUNC_START,
	1347	trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
	1348
	1349	/*
	1350	* don't wake up anyone waiting to take the lock exclusively
	1351	* since we hold a read count... when the read count drops to 0,
	1352	* the writers will be woken.
	1353	*
	1354	* wake up any waiting readers if we don't have any writers waiting,
	1355	* or the lock is NOT marked as rw_priv_excl (writers have privilege)
	1356	*/
	1357	if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
	1358	thread_wakeup(LCK_RW_READER_EVENT(lck));
	1359	}
	1360
	1361	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) \| DBG_FUNC_END,
	1362	trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
	1363
	1364	#if CONFIG_DTRACE
	1365	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
	1366	#endif
	1367	}
	1368
	1369
	1370	/*
	1371	* Routine: lck_rw_try_lock
	1372	*/
	1373	boolean_t
	1374	lck_rw_try_lock(
	1375	lck_rw_t *lck,
	1376	lck_rw_type_t lck_rw_type)
	1377	{
	1378	if (lck_rw_type == LCK_RW_TYPE_SHARED) {
	1379	return lck_rw_try_lock_shared(lck);
	1380	} else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
	1381	return lck_rw_try_lock_exclusive(lck);
	1382	} else {
	1383	panic("lck_rw_try_lock(): Invalid rw lock type: %x", lck_rw_type);
	1384	}
	1385	return FALSE;
	1386	}
	1387
	1388	/*
	1389	* Routine: lck_rw_try_lock_shared
	1390	*/
	1391
	1392	boolean_t
	1393	lck_rw_try_lock_shared(lck_rw_t *lock)
	1394	{
	1395	uint32_t data, prev;
	1396
	1397	for (;;) {
	1398	data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
	1399	if (data & LCK_RW_INTERLOCK) {
	1400	atomic_exchange_abort();
	1401	lck_rw_interlock_spin(lock);
	1402	continue;
	1403	}
	1404	if (data & (LCK_RW_WANT_EXCL \| LCK_RW_WANT_UPGRADE)) {
	1405	atomic_exchange_abort();
	1406	return FALSE; /* lock is busy */
	1407	}
	1408	data += LCK_RW_SHARED_READER; /* Increment reader refcount */
	1409	if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
	1410	break;
	1411	}
	1412	cpu_pause();
	1413	}
	1414	#if MACH_ASSERT
	1415	thread_t owner = ordered_load_rw_owner(lock);
	1416	assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
	1417	#endif
	1418
	1419	if (lock->lck_rw_can_sleep) {
	1420	current_thread()->rwlock_count++;
	1421	} else if (get_preemption_level() == 0) {
	1422	panic("Taking non-sleepable RW lock with preemption enabled");
	1423	}
	1424
	1425	#if CONFIG_DTRACE
	1426	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
	1427	#endif /* CONFIG_DTRACE */
	1428	return TRUE;
	1429	}
	1430
	1431
	1432	/*
	1433	* Routine: lck_rw_try_lock_exclusive
	1434	*/
	1435
	1436	boolean_t
	1437	lck_rw_try_lock_exclusive(lck_rw_t *lock)
	1438	{
	1439	uint32_t data, prev;
	1440	thread_t thread;
	1441
	1442	for (;;) {
	1443	data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
	1444	if (data & LCK_RW_INTERLOCK) {
	1445	atomic_exchange_abort();
	1446	lck_rw_interlock_spin(lock);
	1447	continue;
	1448	}
	1449	if (data & (LCK_RW_SHARED_MASK \| LCK_RW_WANT_EXCL \| LCK_RW_WANT_UPGRADE)) {
	1450	atomic_exchange_abort();
	1451	return FALSE;
	1452	}
	1453	data \|= LCK_RW_WANT_EXCL;
	1454	if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
	1455	break;
	1456	}
	1457	cpu_pause();
	1458	}
	1459	thread = current_thread();
	1460	if (lock->lck_rw_can_sleep) {
	1461	thread->rwlock_count++;
	1462	} else if (get_preemption_level() == 0) {
	1463	panic("Taking non-sleepable RW lock with preemption enabled");
	1464	}
	1465	#if MACH_ASSERT
	1466	thread_t owner = ordered_load_rw_owner(lock);
	1467	assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
	1468	#endif
	1469	ordered_store_rw_owner(lock, thread);
	1470	#if CONFIG_DTRACE
	1471	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
	1472	#endif /* CONFIG_DTRACE */
	1473	return TRUE;
	1474	}
	1475
	1476
	1477	/*
	1478	* Routine: lck_rw_unlock
	1479	*/
	1480	void
	1481	lck_rw_unlock(
	1482	lck_rw_t *lck,
	1483	lck_rw_type_t lck_rw_type)
	1484	{
	1485	if (lck_rw_type == LCK_RW_TYPE_SHARED) {
	1486	lck_rw_unlock_shared(lck);
	1487	} else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
	1488	lck_rw_unlock_exclusive(lck);
	1489	} else {
	1490	panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
	1491	}
	1492	}
	1493
	1494
	1495	/*
	1496	* Routine: lck_rw_unlock_shared
	1497	*/
	1498	void
	1499	lck_rw_unlock_shared(
	1500	lck_rw_t *lck)
	1501	{
	1502	lck_rw_type_t ret;
	1503
	1504	assertf(lck->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
	1505	assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
	1506	ret = lck_rw_done(lck);
	1507
	1508	if (ret != LCK_RW_TYPE_SHARED) {
	1509	panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
	1510	}
	1511	}
	1512
	1513
	1514	/*
	1515	* Routine: lck_rw_unlock_exclusive
	1516	*/
	1517	void
	1518	lck_rw_unlock_exclusive(
	1519	lck_rw_t *lck)
	1520	{
	1521	lck_rw_type_t ret;
	1522
	1523	assertf(lck->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
	1524	ret = lck_rw_done(lck);
	1525
	1526	if (ret != LCK_RW_TYPE_EXCLUSIVE) {
	1527	panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
	1528	}
	1529	}
	1530
	1531
	1532	/*
	1533	* Routine: lck_rw_lock_exclusive_gen
	1534	*/
	1535	static void
	1536	lck_rw_lock_exclusive_gen(
	1537	lck_rw_t *lock)
	1538	{
	1539	__kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
	1540	lck_rw_word_t word;
	1541	int slept = 0;
	1542	boolean_t gotlock = 0;
	1543	boolean_t not_shared_or_upgrade = 0;
	1544	wait_result_t res = 0;
	1545	boolean_t istate;
	1546
	1547	#if CONFIG_DTRACE
	1548	boolean_t dtrace_ls_initialized = FALSE;
	1549	boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled = FALSE;
	1550	uint64_t wait_interval = 0;
	1551	int readers_at_sleep = 0;
	1552	#endif
	1553
	1554	/*
	1555	* Try to acquire the lck_rw_want_excl bit.
	1556	*/
	1557	while (!lck_rw_grab(lock, LCK_RW_GRAB_WANT, FALSE)) {
	1558	#if CONFIG_DTRACE
	1559	if (dtrace_ls_initialized == FALSE) {
	1560	dtrace_ls_initialized = TRUE;
	1561	dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
	1562	dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
	1563	dtrace_ls_enabled = dtrace_rwl_excl_spin \|\| dtrace_rwl_excl_block;
	1564	if (dtrace_ls_enabled) {
	1565	/*
	1566	* Either sleeping or spinning is happening,
	1567	* start a timing of our delay interval now.
	1568	*/
	1569	readers_at_sleep = lock->lck_rw_shared_count;
	1570	wait_interval = mach_absolute_time();
	1571	}
	1572	}
	1573	#endif
	1574
	1575	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) \| DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
	1576
	1577	gotlock = lck_rw_grab(lock, LCK_RW_GRAB_WANT, TRUE);
	1578
	1579	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) \| DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
	1580
	1581	if (gotlock) {
	1582	break;
	1583	}
	1584	/*
	1585	* if we get here, the deadline has expired w/o us
	1586	* being able to grab the lock exclusively
	1587	* check to see if we're allowed to do a thread_block
	1588	*/
	1589	word.data = ordered_load_rw(lock);
	1590	if (word.can_sleep) {
	1591	istate = lck_interlock_lock(lock);
	1592	word.data = ordered_load_rw(lock);
	1593
	1594	if (word.want_excl) {
	1595	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) \| DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
	1596
	1597	word.w_waiting = 1;
	1598	ordered_store_rw(lock, word.data);
	1599
	1600	thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
	1601	res = assert_wait(LCK_RW_WRITER_EVENT(lock),
	1602	THREAD_UNINT \| THREAD_WAIT_NOREPORT_USER);
	1603	lck_interlock_unlock(lock, istate);
	1604
	1605	if (res == THREAD_WAITING) {
	1606	res = thread_block(THREAD_CONTINUE_NULL);
	1607	slept++;
	1608	}
	1609	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) \| DBG_FUNC_END, trace_lck, res, slept, 0, 0);
	1610	} else {
	1611	word.want_excl = 1;
	1612	ordered_store_rw(lock, word.data);
	1613	lck_interlock_unlock(lock, istate);
	1614	break;
	1615	}
	1616	}
	1617	}
	1618	/*
	1619	* Wait for readers (and upgrades) to finish...
	1620	*/
	1621	while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK \| LCK_RW_WANT_UPGRADE, FALSE)) {
	1622	#if CONFIG_DTRACE
	1623	/*
	1624	* Either sleeping or spinning is happening, start
	1625	* a timing of our delay interval now. If we set it
	1626	* to -1 we don't have accurate data so we cannot later
	1627	* decide to record a dtrace spin or sleep event.
	1628	*/
	1629	if (dtrace_ls_initialized == FALSE) {
	1630	dtrace_ls_initialized = TRUE;
	1631	dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
	1632	dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
	1633	dtrace_ls_enabled = dtrace_rwl_excl_spin \|\| dtrace_rwl_excl_block;
	1634	if (dtrace_ls_enabled) {
	1635	/*
	1636	* Either sleeping or spinning is happening,
	1637	* start a timing of our delay interval now.
	1638	*/
	1639	readers_at_sleep = lock->lck_rw_shared_count;
	1640	wait_interval = mach_absolute_time();
	1641	}
	1642	}
	1643	#endif
	1644
	1645	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) \| DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
	1646
	1647	not_shared_or_upgrade = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK \| LCK_RW_WANT_UPGRADE, TRUE);
	1648
	1649	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) \| DBG_FUNC_END, trace_lck, 0, 0, not_shared_or_upgrade, 0);
	1650
	1651	if (not_shared_or_upgrade) {
	1652	break;
	1653	}
	1654	/*
	1655	* if we get here, the deadline has expired w/o us
	1656	* being able to grab the lock exclusively
	1657	* check to see if we're allowed to do a thread_block
	1658	*/
	1659	word.data = ordered_load_rw(lock);
	1660	if (word.can_sleep) {
	1661	istate = lck_interlock_lock(lock);
	1662	word.data = ordered_load_rw(lock);
	1663
	1664	if (word.shared_count != 0 \|\| word.want_upgrade) {
	1665	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) \| DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
	1666
	1667	word.w_waiting = 1;
	1668	ordered_store_rw(lock, word.data);
	1669
	1670	thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
	1671	res = assert_wait(LCK_RW_WRITER_EVENT(lock),
	1672	THREAD_UNINT \| THREAD_WAIT_NOREPORT_USER);
	1673	lck_interlock_unlock(lock, istate);
	1674
	1675	if (res == THREAD_WAITING) {
	1676	res = thread_block(THREAD_CONTINUE_NULL);
	1677	slept++;
	1678	}
	1679	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) \| DBG_FUNC_END, trace_lck, res, slept, 0, 0);
	1680	} else {
	1681	lck_interlock_unlock(lock, istate);
	1682	/*
	1683	* must own the lock now, since we checked for
	1684	* readers or upgrade owner behind the interlock
	1685	* no need for a call to 'lck_rw_drain_status'
	1686	*/
	1687	break;
	1688	}
	1689	}
	1690	}
	1691
	1692	#if CONFIG_DTRACE
	1693	/*
	1694	* Decide what latencies we suffered that are Dtrace events.
	1695	* If we have set wait_interval, then we either spun or slept.
	1696	* At least we get out from under the interlock before we record
	1697	* which is the best we can do here to minimize the impact
	1698	* of the tracing.
	1699	* If we have set wait_interval to -1, then dtrace was not enabled when we
	1700	* started sleeping/spinning so we don't record this event.
	1701	*/
	1702	if (dtrace_ls_enabled == TRUE) {
	1703	if (slept == 0) {
	1704	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
	1705	mach_absolute_time() - wait_interval, 1);
	1706	} else {
	1707	/*
	1708	* For the blocking case, we also record if when we blocked
	1709	* it was held for read or write, and how many readers.
	1710	* Notice that above we recorded this before we dropped
	1711	* the interlock so the count is accurate.
	1712	*/
	1713	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
	1714	mach_absolute_time() - wait_interval, 1,
	1715	(readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
	1716	}
	1717	}
	1718	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
	1719	#endif /* CONFIG_DTRACE */
	1720	}
	1721
	1722	/*
	1723	* Routine: lck_rw_done
	1724	*/
	1725
	1726	lck_rw_type_t
	1727	lck_rw_done(lck_rw_t *lock)
	1728	{
	1729	uint32_t data, prev;
	1730	boolean_t once = FALSE;
	1731
	1732	for (;;) {
	1733	data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
	1734	if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
	1735	atomic_exchange_abort();
	1736	lck_rw_interlock_spin(lock);
	1737	continue;
	1738	}
	1739	if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
	1740	assertf(lock->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
	1741	data -= LCK_RW_SHARED_READER;
	1742	if ((data & LCK_RW_SHARED_MASK) == 0) { /* if reader count has now gone to 0, check for waiters */
	1743	goto check_waiters;
	1744	}
	1745	} else { /* if reader count == 0, must be exclusive lock */
	1746	if (data & LCK_RW_WANT_UPGRADE) {
	1747	data &= ~(LCK_RW_WANT_UPGRADE);
	1748	} else {
	1749	if (data & LCK_RW_WANT_EXCL) {
	1750	data &= ~(LCK_RW_WANT_EXCL);
	1751	} else { /* lock is not 'owned', panic */
	1752	panic("Releasing non-exclusive RW lock without a reader refcount!");
	1753	}
	1754	}
	1755	if (!once) {
	1756	// Only check for holder and clear it once
	1757	assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
	1758	ordered_store_rw_owner(lock, THREAD_NULL);
	1759	once = TRUE;
	1760	}
	1761	check_waiters:
	1762	/*
	1763	* test the original values to match what
	1764	* lck_rw_done_gen is going to do to determine
	1765	* which wakeups need to happen...
	1766	*
	1767	* if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
	1768	*/
	1769	if (prev & LCK_RW_W_WAITING) {
	1770	data &= ~(LCK_RW_W_WAITING);
	1771	if ((prev & LCK_RW_PRIV_EXCL) == 0) {
	1772	data &= ~(LCK_RW_R_WAITING);
	1773	}
	1774	} else {
	1775	data &= ~(LCK_RW_R_WAITING);
	1776	}
	1777	}
	1778	if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
	1779	break;
	1780	}
	1781	cpu_pause();
	1782	}
	1783	return lck_rw_done_gen(lock, prev);
	1784	}
	1785
	1786	/*
	1787	* Routine: lck_rw_done_gen
	1788	*
	1789	* called from the assembly language wrapper...
	1790	* prior_lock_state is the value in the 1st
	1791	* word of the lock at the time of a successful
	1792	* atomic compare and exchange with the new value...
	1793	* it represents the state of the lock before we
	1794	* decremented the rw_shared_count or cleared either
	1795	* rw_want_upgrade or rw_want_write and
	1796	* the lck_x_waiting bits... since the wrapper
	1797	* routine has already changed the state atomically,
	1798	* we just need to decide if we should
	1799	* wake up anyone and what value to return... we do
	1800	* this by examining the state of the lock before
	1801	* we changed it
	1802	*/
	1803	static lck_rw_type_t
	1804	lck_rw_done_gen(
	1805	lck_rw_t *lck,
	1806	uint32_t prior_lock_state)
	1807	{
	1808	lck_rw_word_t fake_lck;
	1809	lck_rw_type_t lock_type;
	1810	thread_t thread;
	1811	uint32_t rwlock_count;
	1812
	1813	/*
	1814	* prior_lock state is a snapshot of the 1st word of the
	1815	* lock in question... we'll fake up a pointer to it
	1816	* and carefully not access anything beyond whats defined
	1817	* in the first word of a lck_rw_t
	1818	*/
	1819	fake_lck.data = prior_lock_state;
	1820
	1821	if (fake_lck.shared_count <= 1) {
	1822	if (fake_lck.w_waiting) {
	1823	thread_wakeup(LCK_RW_WRITER_EVENT(lck));
	1824	}
	1825
	1826	if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
	1827	thread_wakeup(LCK_RW_READER_EVENT(lck));
	1828	}
	1829	}
	1830	if (fake_lck.shared_count) {
	1831	lock_type = LCK_RW_TYPE_SHARED;
	1832	} else {
	1833	lock_type = LCK_RW_TYPE_EXCLUSIVE;
	1834	}
	1835
	1836	/* Check if dropping the lock means that we need to unpromote */
	1837	thread = current_thread();
	1838	if (fake_lck.can_sleep) {
	1839	rwlock_count = thread->rwlock_count--;
	1840	} else {
	1841	rwlock_count = UINT32_MAX;
	1842	}
	1843	#if MACH_LDEBUG
	1844	if (rwlock_count == 0) {
	1845	panic("rw lock count underflow for thread %p", thread);
	1846	}
	1847	#endif
	1848	if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
	1849	/* sched_flags checked without lock, but will be rechecked while clearing */
	1850	lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
	1851	}
	1852	#if CONFIG_DTRACE
	1853	LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
	1854	#endif
	1855	return lock_type;
	1856	}
	1857
	1858	/*
	1859	* Routine: lck_rw_lock_shared_gen
	1860	* Function:
	1861	* Fast path code has determined that this lock
	1862	* is held exclusively... this is where we spin/block
	1863	* until we can acquire the lock in the shared mode
	1864	*/
	1865	static void
	1866	lck_rw_lock_shared_gen(
	1867	lck_rw_t *lck)
	1868	{
	1869	__kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
	1870	lck_rw_word_t word;
	1871	boolean_t gotlock = 0;
	1872	int slept = 0;
	1873	wait_result_t res = 0;
	1874	boolean_t istate;
	1875
	1876	#if CONFIG_DTRACE
	1877	uint64_t wait_interval = 0;
	1878	int readers_at_sleep = 0;
	1879	boolean_t dtrace_ls_initialized = FALSE;
	1880	boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
	1881	#endif /* CONFIG_DTRACE */
	1882
	1883	while (!lck_rw_grab(lck, LCK_RW_GRAB_SHARED, FALSE)) {
	1884	#if CONFIG_DTRACE
	1885	if (dtrace_ls_initialized == FALSE) {
	1886	dtrace_ls_initialized = TRUE;
	1887	dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
	1888	dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
	1889	dtrace_ls_enabled = dtrace_rwl_shared_spin \|\| dtrace_rwl_shared_block;
	1890	if (dtrace_ls_enabled) {
	1891	/*
	1892	* Either sleeping or spinning is happening,
	1893	* start a timing of our delay interval now.
	1894	*/
	1895	readers_at_sleep = lck->lck_rw_shared_count;
	1896	wait_interval = mach_absolute_time();
	1897	}
	1898	}
	1899	#endif
	1900
	1901	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) \| DBG_FUNC_START,
	1902	trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
	1903
	1904	gotlock = lck_rw_grab(lck, LCK_RW_GRAB_SHARED, TRUE);
	1905
	1906	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) \| DBG_FUNC_END,
	1907	trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, gotlock, 0);
	1908
	1909	if (gotlock) {
	1910	break;
	1911	}
	1912	/*
	1913	* if we get here, the deadline has expired w/o us
	1914	* being able to grab the lock for read
	1915	* check to see if we're allowed to do a thread_block
	1916	*/
	1917	if (lck->lck_rw_can_sleep) {
	1918	istate = lck_interlock_lock(lck);
	1919
	1920	word.data = ordered_load_rw(lck);
	1921	if ((word.want_excl \|\| word.want_upgrade) &&
	1922	((word.shared_count == 0) \|\| word.priv_excl)) {
	1923	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) \| DBG_FUNC_START,
	1924	trace_lck, word.want_excl, word.want_upgrade, 0, 0);
	1925
	1926	word.r_waiting = 1;
	1927	ordered_store_rw(lck, word.data);
	1928
	1929	thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
	1930	res = assert_wait(LCK_RW_READER_EVENT(lck),
	1931	THREAD_UNINT \| THREAD_WAIT_NOREPORT_USER);
	1932	lck_interlock_unlock(lck, istate);
	1933
	1934	if (res == THREAD_WAITING) {
	1935	res = thread_block(THREAD_CONTINUE_NULL);
	1936	slept++;
	1937	}
	1938	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) \| DBG_FUNC_END,
	1939	trace_lck, res, slept, 0, 0);
	1940	} else {
	1941	word.shared_count++;
	1942	ordered_store_rw(lck, word.data);
	1943	lck_interlock_unlock(lck, istate);
	1944	break;
	1945	}
	1946	}
	1947	}
	1948
	1949	#if CONFIG_DTRACE
	1950	if (dtrace_ls_enabled == TRUE) {
	1951	if (slept == 0) {
	1952	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
	1953	} else {
	1954	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
	1955	mach_absolute_time() - wait_interval, 0,
	1956	(readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
	1957	}
	1958	}
	1959	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
	1960	#endif /* CONFIG_DTRACE */
	1961	}
	1962
	1963	/*
	1964	* Required to verify thread ownership for exclusive locks by virtue of PPL
	1965	* usage
	1966	*/
	1967	void
	1968	lck_rw_assert(
	1969	lck_rw_t *lck,
	1970	unsigned int type)
	1971	{
	1972	switch (type) {
	1973	case LCK_RW_ASSERT_SHARED:
	1974	if ((lck->lck_rw_shared_count != 0) &&
	1975	(lck->lck_rw_owner == THREAD_NULL)) {
	1976	return;
	1977	}
	1978	break;
	1979	case LCK_RW_ASSERT_EXCLUSIVE:
	1980	if ((lck->lck_rw_want_excl \|\| lck->lck_rw_want_upgrade) &&
	1981	(lck->lck_rw_shared_count == 0) &&
	1982	(lck->lck_rw_owner == current_thread())) {
	1983	return;
	1984	}
	1985	break;
	1986	case LCK_RW_ASSERT_HELD:
	1987	if (lck->lck_rw_shared_count != 0) {
	1988	return; // Held shared
	1989	}
	1990	if ((lck->lck_rw_want_excl \|\| lck->lck_rw_want_upgrade) &&
	1991	(lck->lck_rw_owner == current_thread())) {
	1992	return; // Held exclusive
	1993	}
	1994	break;
	1995	case LCK_RW_ASSERT_NOTHELD:
	1996	if ((lck->lck_rw_shared_count == 0) &&
	1997	!(lck->lck_rw_want_excl \|\| lck->lck_rw_want_upgrade) &&
	1998	(lck->lck_rw_owner == THREAD_NULL)) {
	1999	return;
	2000	}
	2001	break;
	2002	default:
	2003	break;
	2004	}
	2005	panic("rw lock (%p)%s held (mode=%u)", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type);
	2006	}
	2007
	2008
	2009	/*
	2010	* Routine: kdp_lck_rw_lock_is_acquired_exclusive
	2011	* NOT SAFE: To be used only by kernel debugger to avoid deadlock.
	2012	*/
	2013	boolean_t
	2014	kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck)
	2015	{
	2016	if (not_in_kdp) {
	2017	panic("panic: rw lock exclusive check done outside of kernel debugger");
	2018	}
	2019	return ((lck->lck_rw_want_upgrade \|\| lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
	2020	}
	2021
	2022	/*
	2023	* The C portion of the mutex package. These routines are only invoked
	2024	* if the optimized assembler routines can't do the work.
	2025	*/
	2026
	2027	/*
	2028	* Forward declaration
	2029	*/
	2030
	2031	void
	2032	lck_mtx_ext_init(
	2033	lck_mtx_ext_t * lck,
	2034	lck_grp_t * grp,
	2035	lck_attr_t * attr);
	2036
	2037	/*
	2038	* Routine: lck_mtx_alloc_init
	2039	*/
	2040	lck_mtx_t *
	2041	lck_mtx_alloc_init(
	2042	lck_grp_t * grp,
	2043	lck_attr_t * attr)
	2044	{
	2045	lck_mtx_t *lck;
	2046
	2047	lck = zalloc(ZV_LCK_MTX);
	2048	lck_mtx_init(lck, grp, attr);
	2049	return lck;
	2050	}
	2051
	2052	/*
	2053	* Routine: lck_mtx_free
	2054	*/
	2055	void
	2056	lck_mtx_free(
	2057	lck_mtx_t * lck,
	2058	lck_grp_t * grp)
	2059	{
	2060	lck_mtx_destroy(lck, grp);
	2061	zfree(ZV_LCK_MTX, lck);
	2062	}
	2063
	2064	/*
	2065	* Routine: lck_mtx_init
	2066	*/
	2067	void
	2068	lck_mtx_init(
	2069	lck_mtx_t * lck,
	2070	lck_grp_t * grp,
	2071	lck_attr_t * attr)
	2072	{
	2073	#ifdef BER_XXX
	2074	lck_mtx_ext_t *lck_ext;
	2075	#endif
	2076	lck_attr_t *lck_attr;
	2077
	2078	if (attr != LCK_ATTR_NULL) {
	2079	lck_attr = attr;
	2080	} else {
	2081	lck_attr = &LockDefaultLckAttr;
	2082	}
	2083
	2084	#ifdef BER_XXX
	2085	if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
	2086	lck_ext = zalloc(ZV_LCK_MTX_EXT);
	2087	lck_mtx_ext_init(lck_ext, grp, lck_attr);
	2088	lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
	2089	lck->lck_mtx_ptr = lck_ext;
	2090	lck->lck_mtx_type = LCK_MTX_TYPE;
	2091	} else
	2092	#endif
	2093	{
	2094	lck->lck_mtx_ptr = NULL; // Clear any padding in the union fields below
	2095	lck->lck_mtx_waiters = 0;
	2096	lck->lck_mtx_type = LCK_MTX_TYPE;
	2097	ordered_store_mtx(lck, 0);
	2098	}
	2099	lck_grp_reference(grp);
	2100	lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
	2101	}
	2102
	2103	/*
	2104	* Routine: lck_mtx_init_ext
	2105	*/
	2106	void
	2107	lck_mtx_init_ext(
	2108	lck_mtx_t * lck,
	2109	lck_mtx_ext_t * lck_ext,
	2110	lck_grp_t * grp,
	2111	lck_attr_t * attr)
	2112	{
	2113	lck_attr_t *lck_attr;
	2114
	2115	if (attr != LCK_ATTR_NULL) {
	2116	lck_attr = attr;
	2117	} else {
	2118	lck_attr = &LockDefaultLckAttr;
	2119	}
	2120
	2121	if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
	2122	lck_mtx_ext_init(lck_ext, grp, lck_attr);
	2123	lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
	2124	lck->lck_mtx_ptr = lck_ext;
	2125	lck->lck_mtx_type = LCK_MTX_TYPE;
	2126	} else {
	2127	lck->lck_mtx_waiters = 0;
	2128	lck->lck_mtx_type = LCK_MTX_TYPE;
	2129	ordered_store_mtx(lck, 0);
	2130	}
	2131	lck_grp_reference(grp);
	2132	lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
	2133	}
	2134
	2135	/*
	2136	* Routine: lck_mtx_ext_init
	2137	*/
	2138	void
	2139	lck_mtx_ext_init(
	2140	lck_mtx_ext_t * lck,
	2141	lck_grp_t * grp,
	2142	lck_attr_t * attr)
	2143	{
	2144	bzero((void *) lck, sizeof(lck_mtx_ext_t));
	2145
	2146	lck->lck_mtx.lck_mtx_type = LCK_MTX_TYPE;
	2147
	2148	if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
	2149	lck->lck_mtx_deb.type = MUTEX_TAG;
	2150	lck->lck_mtx_attr \|= LCK_MTX_ATTR_DEBUG;
	2151	}
	2152	lck->lck_mtx_grp = grp;
	2153
	2154	if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) {
	2155	lck->lck_mtx_attr \|= LCK_MTX_ATTR_STAT;
	2156	}
	2157	}
	2158
	2159	/* The slow versions */
	2160	static void lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
	2161	static boolean_t lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread);
	2162	static void lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
	2163
	2164	/* The adaptive spin function */
	2165	static spinwait_result_t lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
	2166
	2167	/*
	2168	* Routine: lck_mtx_verify
	2169	*
	2170	* Verify if a mutex is valid
	2171	*/
	2172	static inline void
	2173	lck_mtx_verify(lck_mtx_t *lock)
	2174	{
	2175	if (lock->lck_mtx_type != LCK_MTX_TYPE) {
	2176	panic("Invalid mutex %p", lock);
	2177	}
	2178	#if DEVELOPMENT \|\| DEBUG
	2179	if (lock->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
	2180	panic("Mutex destroyed %p", lock);
	2181	}
	2182	#endif /* DEVELOPMENT \|\| DEBUG */
	2183	}
	2184
	2185	/*
	2186	* Routine: lck_mtx_check_preemption
	2187	*
	2188	* Verify preemption is enabled when attempting to acquire a mutex.
	2189	*/
	2190
	2191	static inline void
	2192	lck_mtx_check_preemption(lck_mtx_t *lock)
	2193	{
	2194	#if DEVELOPMENT \|\| DEBUG
	2195	if (current_cpu_datap()->cpu_hibernate) {
	2196	return;
	2197	}
	2198
	2199	int pl = get_preemption_level();
	2200
	2201	if (pl != 0) {
	2202	panic("Attempt to take mutex with preemption disabled. Lock=%p, level=%d", lock, pl);
	2203	}
	2204	#else
	2205	(void)lock;
	2206	#endif
	2207	}
	2208
	2209	/*
	2210	* Routine: lck_mtx_lock
	2211	*/
	2212	void
	2213	lck_mtx_lock(lck_mtx_t *lock)
	2214	{
	2215	thread_t thread;
	2216
	2217	lck_mtx_verify(lock);
	2218	lck_mtx_check_preemption(lock);
	2219	thread = current_thread();
	2220	if (os_atomic_cmpxchg(&lock->lck_mtx_data,
	2221	0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
	2222	#if CONFIG_DTRACE
	2223	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
	2224	#endif /* CONFIG_DTRACE */
	2225	return;
	2226	}
	2227	lck_mtx_lock_contended(lock, thread, FALSE);
	2228	}
	2229
	2230	/*
	2231	* This is the slow version of mutex locking.
	2232	*/
	2233	static void NOINLINE
	2234	lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
	2235	{
	2236	thread_t holding_thread;
	2237	uintptr_t state;
	2238	int waiters = 0;
	2239	spinwait_result_t sw_res;
	2240	struct turnstile *ts = NULL;
	2241
	2242	/* Loop waiting until I see that the mutex is unowned */
	2243	for (;;) {
	2244	sw_res = lck_mtx_lock_contended_spinwait_arm(lock, thread, interlocked);
	2245	interlocked = FALSE;
	2246
	2247	switch (sw_res) {
	2248	case SPINWAIT_ACQUIRED:
	2249	if (ts != NULL) {
	2250	interlock_lock(lock);
	2251	turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
	2252	interlock_unlock(lock);
	2253	}
	2254	goto done;
	2255	case SPINWAIT_INTERLOCK:
	2256	goto set_owner;
	2257	default:
	2258	break;
	2259	}
	2260
	2261	state = ordered_load_mtx(lock);
	2262	holding_thread = LCK_MTX_STATE_TO_THREAD(state);
	2263	if (holding_thread == NULL) {
	2264	break;
	2265	}
	2266	ordered_store_mtx(lock, (state \| LCK_ILOCK \| ARM_LCK_WAITERS)); // Set waiters bit and wait
	2267	lck_mtx_lock_wait(lock, holding_thread, &ts);
	2268	/* returns interlock unlocked */
	2269	}
	2270
	2271	set_owner:
	2272	/* Hooray, I'm the new owner! */
	2273	state = ordered_load_mtx(lock);
	2274
	2275	if (state & ARM_LCK_WAITERS) {
	2276	/* Skip lck_mtx_lock_acquire if there are no waiters. */
	2277	waiters = lck_mtx_lock_acquire(lock, ts);
	2278	/*
	2279	* lck_mtx_lock_acquire will call
	2280	* turnstile_complete
	2281	*/
	2282	} else {
	2283	if (ts != NULL) {
	2284	turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
	2285	}
	2286	}
	2287
	2288	state = LCK_MTX_THREAD_TO_STATE(thread);
	2289	if (waiters != 0) {
	2290	state \|= ARM_LCK_WAITERS;
	2291	}
	2292	state \|= LCK_ILOCK; // Preserve interlock
	2293	ordered_store_mtx(lock, state); // Set ownership
	2294	interlock_unlock(lock); // Release interlock, enable preemption
	2295
	2296	done:
	2297	load_memory_barrier();
	2298
	2299	assert(thread->turnstile != NULL);
	2300
	2301	if (ts != NULL) {
	2302	turnstile_cleanup();
	2303	}
	2304
	2305	#if CONFIG_DTRACE
	2306	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
	2307	#endif /* CONFIG_DTRACE */
	2308	}
	2309
	2310	/*
	2311	* Routine: lck_mtx_lock_spinwait_arm
	2312	*
	2313	* Invoked trying to acquire a mutex when there is contention but
	2314	* the holder is running on another processor. We spin for up to a maximum
	2315	* time waiting for the lock to be released.
	2316	*/
	2317	static spinwait_result_t
	2318	lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
	2319	{
	2320	int has_interlock = (int)interlocked;
	2321	__kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
	2322	thread_t owner, prev_owner;
	2323	uint64_t window_deadline, sliding_deadline, high_deadline;
	2324	uint64_t start_time, cur_time, avg_hold_time, bias, delta;
	2325	int loopcount = 0;
	2326	uint i, prev_owner_cpu;
	2327	int total_hold_time_samples, window_hold_time_samples, unfairness;
	2328	bool owner_on_core, adjust;
	2329	uintptr_t state, new_state, waiters;
	2330	spinwait_result_t retval = SPINWAIT_DID_SPIN_HIGH_THR;
	2331
	2332	if (__improbable(!(lck_mtx_adaptive_spin_mode & ADAPTIVE_SPIN_ENABLE))) {
	2333	if (!has_interlock) {
	2334	interlock_lock(lock);
	2335	}
	2336
	2337	return SPINWAIT_DID_NOT_SPIN;
	2338	}
	2339
	2340	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) \| DBG_FUNC_START,
	2341	trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, 0, 0);
	2342
	2343	start_time = mach_absolute_time();
	2344	/*
	2345	* window_deadline represents the "learning" phase.
	2346	* The thread collects statistics about the lock during
	2347	* window_deadline and then it makes a decision on whether to spin more
	2348	* or block according to the concurrency behavior
	2349	* observed.
	2350	*
	2351	* Every thread can spin at least low_MutexSpin.
	2352	*/
	2353	window_deadline = start_time + low_MutexSpin;
	2354	/*
	2355	* Sliding_deadline is the adjusted spin deadline
	2356	* computed after the "learning" phase.
	2357	*/
	2358	sliding_deadline = window_deadline;
	2359	/*
	2360	* High_deadline is a hard deadline. No thread
	2361	* can spin more than this deadline.
	2362	*/
	2363	if (high_MutexSpin >= 0) {
	2364	high_deadline = start_time + high_MutexSpin;
	2365	} else {
	2366	high_deadline = start_time + low_MutexSpin * real_ncpus;
	2367	}
	2368
	2369	/*
	2370	* Do not know yet which is the owner cpu.
	2371	* Initialize prev_owner_cpu with next cpu.
	2372	*/
	2373	prev_owner_cpu = (cpu_number() + 1) % real_ncpus;
	2374	total_hold_time_samples = 0;
	2375	window_hold_time_samples = 0;
	2376	avg_hold_time = 0;
	2377	adjust = TRUE;
	2378	bias = (os_hash_kernel_pointer(lock) + cpu_number()) % real_ncpus;
	2379
	2380	/* Snoop the lock state */
	2381	state = ordered_load_mtx(lock);
	2382	owner = LCK_MTX_STATE_TO_THREAD(state);
	2383	prev_owner = owner;
	2384
	2385	if (has_interlock) {
	2386	if (owner == NULL) {
	2387	retval = SPINWAIT_INTERLOCK;
	2388	goto done_spinning;
	2389	} else {
	2390	/*
	2391	* We are holding the interlock, so
	2392	* we can safely dereference owner.
	2393	*/
	2394	if (!machine_thread_on_core(owner) \|\| (owner->state & TH_IDLE)) {
	2395	retval = SPINWAIT_DID_NOT_SPIN;
	2396	goto done_spinning;
	2397	}
	2398	}
	2399	interlock_unlock(lock);
	2400	has_interlock = 0;
	2401	}
	2402
	2403	/*
	2404	* Spin while:
	2405	* - mutex is locked, and
	2406	* - it's locked as a spin lock, and
	2407	* - owner is running on another processor, and
	2408	* - we haven't spun for long enough.
	2409	*/
	2410	do {
	2411	/*
	2412	* Try to acquire the lock.
	2413	*/
	2414	owner = LCK_MTX_STATE_TO_THREAD(state);
	2415	if (owner == NULL) {
	2416	waiters = state & ARM_LCK_WAITERS;
	2417	if (waiters) {
	2418	/*
	2419	* preserve the waiter bit
	2420	* and try acquire the interlock.
	2421	* Note: we will successfully acquire
	2422	* the interlock only if we can also
	2423	* acquire the lock.
	2424	*/
	2425	new_state = ARM_LCK_WAITERS \| LCK_ILOCK;
	2426	has_interlock = 1;
	2427	retval = SPINWAIT_INTERLOCK;
	2428	disable_preemption();
	2429	} else {
	2430	new_state = LCK_MTX_THREAD_TO_STATE(thread);
	2431	retval = SPINWAIT_ACQUIRED;
	2432	}
	2433
	2434	/*
	2435	* The cmpxchg will succed only if the lock
	2436	* is not owned (doesn't have an owner set)
	2437	* and it is not interlocked.
	2438	* It will not fail if there are waiters.
	2439	*/
	2440	if (os_atomic_cmpxchgv(&lock->lck_mtx_data,
	2441	waiters, new_state, &state, acquire)) {
	2442	goto done_spinning;
	2443	} else {
	2444	if (waiters) {
	2445	has_interlock = 0;
	2446	enable_preemption();
	2447	}
	2448	}
	2449	}
	2450
	2451	cur_time = mach_absolute_time();
	2452
	2453	/*
	2454	* Never spin past high_deadline.
	2455	*/
	2456	if (cur_time >= high_deadline) {
	2457	retval = SPINWAIT_DID_SPIN_HIGH_THR;
	2458	break;
	2459	}
	2460
	2461	/*
	2462	* Check if owner is on core. If not block.
	2463	*/
	2464	owner = LCK_MTX_STATE_TO_THREAD(state);
	2465	if (owner) {
	2466	i = prev_owner_cpu;
	2467	owner_on_core = FALSE;
	2468
	2469	disable_preemption();
	2470	state = ordered_load_mtx(lock);
	2471	owner = LCK_MTX_STATE_TO_THREAD(state);
	2472
	2473	/*
	2474	* For scalability we want to check if the owner is on core
	2475	* without locking the mutex interlock.
	2476	* If we do not lock the mutex interlock, the owner that we see might be
	2477	* invalid, so we cannot dereference it. Therefore we cannot check
	2478	* any field of the thread to tell us if it is on core.
	2479	* Check if the thread that is running on the other cpus matches the owner.
	2480	*/
	2481	if (owner) {
	2482	do {
	2483	cpu_data_t *cpu_data_ptr = CpuDataEntries[i].cpu_data_vaddr;
	2484	if ((cpu_data_ptr != NULL) && (cpu_data_ptr->cpu_active_thread == owner)) {
	2485	owner_on_core = TRUE;
	2486	break;
	2487	}
	2488	if (++i >= real_ncpus) {
	2489	i = 0;
	2490	}
	2491	} while (i != prev_owner_cpu);
	2492	enable_preemption();
	2493
	2494	if (owner_on_core) {
	2495	prev_owner_cpu = i;
	2496	} else {
	2497	prev_owner = owner;
	2498	state = ordered_load_mtx(lock);
	2499	owner = LCK_MTX_STATE_TO_THREAD(state);
	2500	if (owner == prev_owner) {
	2501	/*
	2502	* Owner is not on core.
	2503	* Stop spinning.
	2504	*/
	2505	if (loopcount == 0) {
	2506	retval = SPINWAIT_DID_NOT_SPIN;
	2507	} else {
	2508	retval = SPINWAIT_DID_SPIN_OWNER_NOT_CORE;
	2509	}
	2510	break;
	2511	}
	2512	/*
	2513	* Fall through if the owner changed while we were scanning.
	2514	* The new owner could potentially be on core, so loop
	2515	* again.
	2516	*/
	2517	}
	2518	} else {
	2519	enable_preemption();
	2520	}
	2521	}
	2522
	2523	/*
	2524	* Save how many times we see the owner changing.
	2525	* We can roughly estimate the the mutex hold
	2526	* time and the fairness with that.
	2527	*/
	2528	if (owner != prev_owner) {
	2529	prev_owner = owner;
	2530	total_hold_time_samples++;
	2531	window_hold_time_samples++;
	2532	}
	2533
	2534	/*
	2535	* Learning window expired.
	2536	* Try to adjust the sliding_deadline.
	2537	*/
	2538	if (cur_time >= window_deadline) {
	2539	/*
	2540	* If there was not contention during the window
	2541	* stop spinning.
	2542	*/
	2543	if (window_hold_time_samples < 1) {
	2544	retval = SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION;
	2545	break;
	2546	}
	2547
	2548	if (adjust) {
	2549	/*
	2550	* For a fair lock, we'd wait for at most (NCPU-1) periods,
	2551	* but the lock is unfair, so let's try to estimate by how much.
	2552	*/
	2553	unfairness = total_hold_time_samples / real_ncpus;
	2554
	2555	if (unfairness == 0) {
	2556	/*
	2557	* We observed the owner changing `total_hold_time_samples` times which
	2558	* let us estimate the average hold time of this mutex for the duration
	2559	* of the spin time.
	2560	* avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
	2561	*
	2562	* In this case spin at max avg_hold_time * (real_ncpus - 1)
	2563	*/
	2564	delta = cur_time - start_time;
	2565	sliding_deadline = start_time + (delta * (real_ncpus - 1)) / total_hold_time_samples;
	2566	} else {
	2567	/*
	2568	* In this case at least one of the other cpus was able to get the lock twice
	2569	* while I was spinning.
	2570	* We could spin longer but it won't necessarily help if the system is unfair.
	2571	* Try to randomize the wait to reduce contention.
	2572	*
	2573	* We compute how much time we could potentially spin
	2574	* and distribute it over the cpus.
	2575	*
	2576	* bias is an integer between 0 and real_ncpus.
	2577	* distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
	2578	*/
	2579	delta = high_deadline - cur_time;
	2580	sliding_deadline = cur_time + ((delta * bias) / real_ncpus);
	2581	adjust = FALSE;
	2582	}
	2583	}
	2584
	2585	window_deadline += low_MutexSpin;
	2586	window_hold_time_samples = 0;
	2587	}
	2588
	2589	/*
	2590	* Stop spinning if we past
	2591	* the adjusted deadline.
	2592	*/
	2593	if (cur_time >= sliding_deadline) {
	2594	retval = SPINWAIT_DID_SPIN_SLIDING_THR;
	2595	break;
	2596	}
	2597
	2598	/*
	2599	* We want to arm the monitor for wfe,
	2600	* so load exclusively the lock.
	2601	*
	2602	* NOTE:
	2603	* we rely on the fact that wfe will
	2604	* eventually return even if the cache line
	2605	* is not modified. This way we will keep
	2606	* looping and checking if the deadlines expired.
	2607	*/
	2608	state = os_atomic_load_exclusive(&lock->lck_mtx_data, relaxed);
	2609	owner = LCK_MTX_STATE_TO_THREAD(state);
	2610	if (owner != NULL) {
	2611	wait_for_event();
	2612	state = ordered_load_mtx(lock);
	2613	} else {
	2614	atomic_exchange_abort();
	2615	}
	2616
	2617	loopcount++;
	2618	} while (TRUE);
	2619
	2620	done_spinning:
	2621	#if CONFIG_DTRACE
	2622	/*
	2623	* Note that we record a different probe id depending on whether
	2624	* this is a direct or indirect mutex. This allows us to
	2625	* penalize only lock groups that have debug/stats enabled
	2626	* with dtrace processing if desired.
	2627	*/
	2628	if (__probable(lock->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)) {
	2629	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, lock,
	2630	mach_absolute_time() - start_time);
	2631	} else {
	2632	LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, lock,
	2633	mach_absolute_time() - start_time);
	2634	}
	2635	/* The lockstat acquire event is recorded by the caller. */
	2636	#endif
	2637
	2638	state = ordered_load_mtx(lock);
	2639
	2640	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) \| DBG_FUNC_END,
	2641	trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, retval, 0);
	2642	if ((!has_interlock) && (retval != SPINWAIT_ACQUIRED)) {
	2643	/* We must own either the lock or the interlock on return. */
	2644	interlock_lock(lock);
	2645	}
	2646
	2647	return retval;
	2648	}
	2649
	2650
	2651	/*
	2652	* Common code for mutex locking as spinlock
	2653	*/
	2654	static inline void
	2655	lck_mtx_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
	2656	{
	2657	uintptr_t state;
	2658
	2659	interlock_lock(lock);
	2660	state = ordered_load_mtx(lock);
	2661	if (LCK_MTX_STATE_TO_THREAD(state)) {
	2662	if (allow_held_as_mutex) {
	2663	lck_mtx_lock_contended(lock, current_thread(), TRUE);
	2664	} else {
	2665	// "Always" variants can never block. If the lock is held and blocking is not allowed
	2666	// then someone is mixing always and non-always calls on the same lock, which is
	2667	// forbidden.
	2668	panic("Attempting to block on a lock taken as spin-always %p", lock);
	2669	}
	2670	return;
	2671	}
	2672	state &= ARM_LCK_WAITERS; // Preserve waiters bit
	2673	state \|= (LCK_MTX_SPIN_TAG \| LCK_ILOCK); // Add spin tag and maintain interlock
	2674	ordered_store_mtx(lock, state);
	2675	load_memory_barrier();
	2676
	2677	#if CONFIG_DTRACE
	2678	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
	2679	#endif /* CONFIG_DTRACE */
	2680	}
	2681
	2682	/*
	2683	* Routine: lck_mtx_lock_spin
	2684	*/
	2685	void
	2686	lck_mtx_lock_spin(lck_mtx_t *lock)
	2687	{
	2688	lck_mtx_check_preemption(lock);
	2689	lck_mtx_lock_spin_internal(lock, TRUE);
	2690	}
	2691
	2692	/*
	2693	* Routine: lck_mtx_lock_spin_always
	2694	*/
	2695	void
	2696	lck_mtx_lock_spin_always(lck_mtx_t *lock)
	2697	{
	2698	lck_mtx_lock_spin_internal(lock, FALSE);
	2699	}
	2700
	2701	/*
	2702	* Routine: lck_mtx_try_lock
	2703	*/
	2704	boolean_t
	2705	lck_mtx_try_lock(lck_mtx_t *lock)
	2706	{
	2707	thread_t thread = current_thread();
	2708
	2709	lck_mtx_verify(lock);
	2710	if (os_atomic_cmpxchg(&lock->lck_mtx_data,
	2711	0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
	2712	#if CONFIG_DTRACE
	2713	LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, lock, 0);
	2714	#endif /* CONFIG_DTRACE */
	2715	return TRUE;
	2716	}
	2717	return lck_mtx_try_lock_contended(lock, thread);
	2718	}
	2719
	2720	static boolean_t NOINLINE
	2721	lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread)
	2722	{
	2723	thread_t holding_thread;
	2724	uintptr_t state;
	2725	int waiters;
	2726
	2727	interlock_lock(lock);
	2728	state = ordered_load_mtx(lock);
	2729	holding_thread = LCK_MTX_STATE_TO_THREAD(state);
	2730	if (holding_thread) {
	2731	interlock_unlock(lock);
	2732	return FALSE;
	2733	}
	2734	waiters = lck_mtx_lock_acquire(lock, NULL);
	2735	state = LCK_MTX_THREAD_TO_STATE(thread);
	2736	if (waiters != 0) {
	2737	state \|= ARM_LCK_WAITERS;
	2738	}
	2739	state \|= LCK_ILOCK; // Preserve interlock
	2740	ordered_store_mtx(lock, state); // Set ownership
	2741	interlock_unlock(lock); // Release interlock, enable preemption
	2742	load_memory_barrier();
	2743
	2744	turnstile_cleanup();
	2745
	2746	return TRUE;
	2747	}
	2748
	2749	static inline boolean_t
	2750	lck_mtx_try_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
	2751	{
	2752	uintptr_t state;
	2753
	2754	if (!interlock_try(lock)) {
	2755	return FALSE;
	2756	}
	2757	state = ordered_load_mtx(lock);
	2758	if (LCK_MTX_STATE_TO_THREAD(state)) {
	2759	// Lock is held as mutex
	2760	if (allow_held_as_mutex) {
	2761	interlock_unlock(lock);
	2762	} else {
	2763	// "Always" variants can never block. If the lock is held as a normal mutex
	2764	// then someone is mixing always and non-always calls on the same lock, which is
	2765	// forbidden.
	2766	panic("Spin-mutex held as full mutex %p", lock);
	2767	}
	2768	return FALSE;
	2769	}
	2770	state &= ARM_LCK_WAITERS; // Preserve waiters bit
	2771	state \|= (LCK_MTX_SPIN_TAG \| LCK_ILOCK); // Add spin tag and maintain interlock
	2772	ordered_store_mtx(lock, state);
	2773	load_memory_barrier();
	2774
	2775	#if CONFIG_DTRACE
	2776	LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, lock, 0);
	2777	#endif /* CONFIG_DTRACE */
	2778	return TRUE;
	2779	}
	2780
	2781	/*
	2782	* Routine: lck_mtx_try_lock_spin
	2783	*/
	2784	boolean_t
	2785	lck_mtx_try_lock_spin(lck_mtx_t *lock)
	2786	{
	2787	return lck_mtx_try_lock_spin_internal(lock, TRUE);
	2788	}
	2789
	2790	/*
	2791	* Routine: lck_mtx_try_lock_spin_always
	2792	*/
	2793	boolean_t
	2794	lck_mtx_try_lock_spin_always(lck_mtx_t *lock)
	2795	{
	2796	return lck_mtx_try_lock_spin_internal(lock, FALSE);
	2797	}
	2798
	2799
	2800
	2801	/*
	2802	* Routine: lck_mtx_unlock
	2803	*/
	2804	void
	2805	lck_mtx_unlock(lck_mtx_t *lock)
	2806	{
	2807	thread_t thread = current_thread();
	2808	uintptr_t state;
	2809	boolean_t ilk_held = FALSE;
	2810
	2811	lck_mtx_verify(lock);
	2812
	2813	state = ordered_load_mtx(lock);
	2814	if (state & LCK_ILOCK) {
	2815	if (LCK_MTX_STATE_TO_THREAD(state) == (thread_t)LCK_MTX_SPIN_TAG) {
	2816	ilk_held = TRUE; // Interlock is held by (presumably) this thread
	2817	}
	2818	goto slow_case;
	2819	}
	2820	// Locked as a mutex
	2821	if (os_atomic_cmpxchg(&lock->lck_mtx_data,
	2822	LCK_MTX_THREAD_TO_STATE(thread), 0, release)) {
	2823	#if CONFIG_DTRACE
	2824	LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
	2825	#endif /* CONFIG_DTRACE */
	2826	return;
	2827	}
	2828	slow_case:
	2829	lck_mtx_unlock_contended(lock, thread, ilk_held);
	2830	}
	2831
	2832	static void NOINLINE
	2833	lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t ilk_held)
	2834	{
	2835	uintptr_t state;
	2836	boolean_t cleanup = FALSE;
	2837
	2838	if (ilk_held) {
	2839	state = ordered_load_mtx(lock);
	2840	} else {
	2841	interlock_lock(lock);
	2842	state = ordered_load_mtx(lock);
	2843	if (thread != LCK_MTX_STATE_TO_THREAD(state)) {
	2844	panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
	2845	}
	2846	if (state & ARM_LCK_WAITERS) {
	2847	if (lck_mtx_unlock_wakeup(lock, thread)) {
	2848	state = ARM_LCK_WAITERS;
	2849	} else {
	2850	state = 0;
	2851	}
	2852	cleanup = TRUE;
	2853	goto unlock;
	2854	}
	2855	}
	2856	state &= ARM_LCK_WAITERS; /* Clear state, retain waiters bit */
	2857	unlock:
	2858	state \|= LCK_ILOCK;
	2859	ordered_store_mtx(lock, state);
	2860	interlock_unlock(lock);
	2861	if (cleanup) {
	2862	/*
	2863	* Do not do any turnstile operations outside of this block.
	2864	* lock/unlock is called at early stage of boot with single thread,
	2865	* when turnstile is not yet initialized.
	2866	* Even without contention we can come throught the slow path
	2867	* if the mutex is acquired as a spin lock.
	2868	*/
	2869	turnstile_cleanup();
	2870	}
	2871
	2872	#if CONFIG_DTRACE
	2873	LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
	2874	#endif /* CONFIG_DTRACE */
	2875	}
	2876
	2877	/*
	2878	* Routine: lck_mtx_assert
	2879	*/
	2880	void
	2881	lck_mtx_assert(lck_mtx_t *lock, unsigned int type)
	2882	{
	2883	thread_t thread, holder;
	2884	uintptr_t state;
	2885
	2886	state = ordered_load_mtx(lock);
	2887	holder = LCK_MTX_STATE_TO_THREAD(state);
	2888	if (holder == (thread_t)LCK_MTX_SPIN_TAG) {
	2889	// Lock is held in spin mode, owner is unknown.
	2890	return; // Punt
	2891	}
	2892	thread = current_thread();
	2893	if (type == LCK_MTX_ASSERT_OWNED) {
	2894	if (thread != holder) {
	2895	panic("lck_mtx_assert(): mutex (%p) owned", lock);
	2896	}
	2897	} else if (type == LCK_MTX_ASSERT_NOTOWNED) {
	2898	if (thread == holder) {
	2899	panic("lck_mtx_assert(): mutex (%p) not owned", lock);
	2900	}
	2901	} else {
	2902	panic("lck_mtx_assert(): invalid arg (%u)", type);
	2903	}
	2904	}
	2905
	2906	/*
	2907	* Routine: lck_mtx_ilk_unlock
	2908	*/
	2909	boolean_t
	2910	lck_mtx_ilk_unlock(lck_mtx_t *lock)
	2911	{
	2912	interlock_unlock(lock);
	2913	return TRUE;
	2914	}
	2915
	2916	/*
	2917	* Routine: lck_mtx_convert_spin
	2918	*
	2919	* Convert a mutex held for spin into a held full mutex
	2920	*/
	2921	void
	2922	lck_mtx_convert_spin(lck_mtx_t *lock)
	2923	{
	2924	thread_t thread = current_thread();
	2925	uintptr_t state;
	2926	int waiters;
	2927
	2928	state = ordered_load_mtx(lock);
	2929	if (LCK_MTX_STATE_TO_THREAD(state) == thread) {
	2930	return; // Already owned as mutex, return
	2931	}
	2932	if ((state & LCK_ILOCK) == 0 \|\| (LCK_MTX_STATE_TO_THREAD(state) != (thread_t)LCK_MTX_SPIN_TAG)) {
	2933	panic("lck_mtx_convert_spin: Not held as spinlock (%p)", lock);
	2934	}
	2935	state &= ~(LCK_MTX_THREAD_MASK); // Clear the spin tag
	2936	ordered_store_mtx(lock, state);
	2937	waiters = lck_mtx_lock_acquire(lock, NULL); // Acquire to manage priority boosts
	2938	state = LCK_MTX_THREAD_TO_STATE(thread);
	2939	if (waiters != 0) {
	2940	state \|= ARM_LCK_WAITERS;
	2941	}
	2942	state \|= LCK_ILOCK;
	2943	ordered_store_mtx(lock, state); // Set ownership
	2944	interlock_unlock(lock); // Release interlock, enable preemption
	2945	turnstile_cleanup();
	2946	}
	2947
	2948
	2949	/*
	2950	* Routine: lck_mtx_destroy
	2951	*/
	2952	void
	2953	lck_mtx_destroy(
	2954	lck_mtx_t * lck,
	2955	lck_grp_t * grp)
	2956	{
	2957	if (lck->lck_mtx_type != LCK_MTX_TYPE) {
	2958	panic("Destroying invalid mutex %p", lck);
	2959	}
	2960	if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
	2961	panic("Destroying previously destroyed lock %p", lck);
	2962	}
	2963	lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
	2964	lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED;
	2965	lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
	2966	lck_grp_deallocate(grp);
	2967	return;
	2968	}
	2969
	2970	/*
	2971	* Routine: lck_spin_assert
	2972	*/
	2973	void
	2974	lck_spin_assert(lck_spin_t *lock, unsigned int type)
	2975	{
	2976	thread_t thread, holder;
	2977	uintptr_t state;
	2978
	2979	if (lock->type != LCK_SPIN_TYPE) {
	2980	panic("Invalid spinlock %p", lock);
	2981	}
	2982
	2983	state = lock->lck_spin_data;
	2984	holder = (thread_t)(state & ~LCK_ILOCK);
	2985	thread = current_thread();
	2986	if (type == LCK_ASSERT_OWNED) {
	2987	if (holder == 0) {
	2988	panic("Lock not owned %p = %lx", lock, state);
	2989	}
	2990	if (holder != thread) {
	2991	panic("Lock not owned by current thread %p = %lx", lock, state);
	2992	}
	2993	if ((state & LCK_ILOCK) == 0) {
	2994	panic("Lock bit not set %p = %lx", lock, state);
	2995	}
	2996	} else if (type == LCK_ASSERT_NOTOWNED) {
	2997	if (holder != 0) {
	2998	if (holder == thread) {
	2999	panic("Lock owned by current thread %p = %lx", lock, state);
	3000	}
	3001	}
	3002	} else {
	3003	panic("lck_spin_assert(): invalid arg (%u)", type);
	3004	}
	3005	}
	3006
	3007	boolean_t
	3008	lck_rw_lock_yield_shared(lck_rw_t *lck, boolean_t force_yield)
	3009	{
	3010	lck_rw_word_t word;
	3011
	3012	lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
	3013
	3014	word.data = ordered_load_rw(lck);
	3015	if (word.want_excl \|\| word.want_upgrade \|\| force_yield) {
	3016	lck_rw_unlock_shared(lck);
	3017	mutex_pause(2);
	3018	lck_rw_lock_shared(lck);
	3019	return TRUE;
	3020	}
	3021
	3022	return FALSE;
	3023	}
	3024
	3025	/*
	3026	* Routine: kdp_lck_mtx_lock_spin_is_acquired
	3027	* NOT SAFE: To be used only by kernel debugger to avoid deadlock.
	3028	*/
	3029	boolean_t
	3030	kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
	3031	{
	3032	uintptr_t state;
	3033
	3034	if (not_in_kdp) {
	3035	panic("panic: spinlock acquired check done outside of kernel debugger");
	3036	}
	3037	state = ordered_load_mtx(lck);
	3038	if (state == LCK_MTX_TAG_DESTROYED) {
	3039	return FALSE;
	3040	}
	3041	if (LCK_MTX_STATE_TO_THREAD(state) \|\| (state & LCK_ILOCK)) {
	3042	return TRUE;
	3043	}
	3044	return FALSE;
	3045	}
	3046
	3047	void
	3048	kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
	3049	{
	3050	lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
	3051	waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
	3052	uintptr_t state = ordered_load_mtx(mutex);
	3053	thread_t holder = LCK_MTX_STATE_TO_THREAD(state);
	3054	if ((uintptr_t)holder == (uintptr_t)LCK_MTX_SPIN_TAG) {
	3055	waitinfo->owner = STACKSHOT_WAITOWNER_MTXSPIN;
	3056	} else {
	3057	assertf(state != (uintptr_t)LCK_MTX_TAG_DESTROYED, "state=0x%llx", (uint64_t)state);
	3058	assertf(state != (uintptr_t)LCK_MTX_TAG_INDIRECT, "state=0x%llx", (uint64_t)state);
	3059	waitinfo->owner = thread_tid(holder);
	3060	}
	3061	}
	3062
	3063	void
	3064	kdp_rwlck_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
	3065	{
	3066	lck_rw_t *rwlck = NULL;
	3067	switch (waitinfo->wait_type) {
	3068	case kThreadWaitKernelRWLockRead:
	3069	rwlck = READ_EVENT_TO_RWLOCK(event);
	3070	break;
	3071	case kThreadWaitKernelRWLockWrite:
	3072	case kThreadWaitKernelRWLockUpgrade:
	3073	rwlck = WRITE_EVENT_TO_RWLOCK(event);
	3074	break;
	3075	default:
	3076	panic("%s was called with an invalid blocking type", __FUNCTION__);
	3077	break;
	3078	}
	3079	waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
	3080	waitinfo->owner = thread_tid(rwlck->lck_rw_owner);
	3081	}