]> git.saurik.com Git - apple/xnu.git/blame - osfmk/arm/locks_arm.c
xnu-6153.11.26.tar.gz
[apple/xnu.git] / osfmk / arm / locks_arm.c
CommitLineData
5ba3f43e 1/*
cb323159 2 * Copyright (c) 2007-2018 Apple Inc. All rights reserved.
5ba3f43e
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
33 * Mellon University All Rights Reserved.
0a7de745 34 *
5ba3f43e
A
35 * Permission to use, copy, modify and distribute this software and its
36 * documentation is hereby granted, provided that both the copyright notice
37 * and this permission notice appear in all copies of the software,
38 * derivative works or modified versions, and any portions thereof, and that
39 * both notices appear in supporting documentation.
0a7de745 40 *
5ba3f43e
A
41 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
42 * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
43 * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
0a7de745 44 *
5ba3f43e 45 * Carnegie Mellon requests users of this software to return to
0a7de745 46 *
5ba3f43e
A
47 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
48 * School of Computer Science Carnegie Mellon University Pittsburgh PA
49 * 15213-3890
0a7de745 50 *
5ba3f43e
A
51 * any improvements or extensions that they make and grant Carnegie Mellon the
52 * rights to redistribute these changes.
53 */
54/*
55 * File: kern/lock.c
56 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * Date: 1985
58 *
59 * Locking primitives implementation
60 */
61
5ba3f43e
A
62#define LOCK_PRIVATE 1
63
64#include <mach_ldebug.h>
65
66#include <kern/kalloc.h>
0a7de745 67#include <kern/lock_stat.h>
5ba3f43e
A
68#include <kern/locks.h>
69#include <kern/misc_protos.h>
70#include <kern/thread.h>
71#include <kern/processor.h>
72#include <kern/sched_prim.h>
5ba3f43e
A
73#include <kern/debug.h>
74#include <kern/kcdata.h>
75#include <string.h>
76
77#include <arm/cpu_data_internal.h>
78#include <arm/proc_reg.h>
79#include <arm/smp.h>
80#include <machine/atomic.h>
81#include <machine/machine_cpu.h>
82
83#include <sys/kdebug.h>
84
0a7de745
A
85#if CONFIG_DTRACE
86#define DTRACE_RW_SHARED 0x0 //reader
87#define DTRACE_RW_EXCL 0x1 //writer
88#define DTRACE_NO_FLAG 0x0 //not applicable
89#endif /* CONFIG_DTRACE */
5ba3f43e 90
0a7de745
A
91#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
92#define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
93#define LCK_RW_LCK_SHARED_CODE 0x102
94#define LCK_RW_LCK_SH_TO_EX_CODE 0x103
95#define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
96#define LCK_RW_LCK_EX_TO_SH_CODE 0x105
5ba3f43e
A
97
98
0a7de745 99#define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
5ba3f43e
A
100
101// Panic in tests that check lock usage correctness
102// These are undesirable when in a panic or a debugger is runnning.
103#define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
104
105unsigned int LcksOpts = 0;
106
0a7de745
A
107#define ADAPTIVE_SPIN_ENABLE 0x1
108
109#if __SMP__
110int lck_mtx_adaptive_spin_mode = ADAPTIVE_SPIN_ENABLE;
111#else /* __SMP__ */
112int lck_mtx_adaptive_spin_mode = 0;
113#endif /* __SMP__ */
114
115#define SPINWAIT_OWNER_CHECK_COUNT 4
116
117typedef enum {
118 SPINWAIT_ACQUIRED, /* Got the lock. */
119 SPINWAIT_INTERLOCK, /* Got the interlock, no owner, but caller must finish acquiring the lock. */
120 SPINWAIT_DID_SPIN, /* Got the interlock, spun, but failed to get the lock. */
121 SPINWAIT_DID_NOT_SPIN, /* Got the interlock, did not spin. */
122} spinwait_result_t;
123
5ba3f43e
A
124#if CONFIG_DTRACE && __SMP__
125extern uint64_t dtrace_spin_threshold;
126#endif
127
128/* Forwards */
129
5ba3f43e
A
130extern unsigned int not_in_kdp;
131
132/*
133 * We often want to know the addresses of the callers
134 * of the various lock routines. However, this information
135 * is only used for debugging and statistics.
136 */
137typedef void *pc_t;
0a7de745
A
138#define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
139#define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
5ba3f43e 140
0a7de745 141#ifdef lint
5ba3f43e
A
142/*
143 * Eliminate lint complaints about unused local pc variables.
144 */
0a7de745
A
145#define OBTAIN_PC(pc, l) ++pc
146#else /* lint */
147#define OBTAIN_PC(pc, l)
148#endif /* lint */
5ba3f43e
A
149
150
151/*
152 * Portable lock package implementation of usimple_locks.
153 */
154
5ba3f43e
A
155/*
156 * Owner thread pointer when lock held in spin mode
157 */
158#define LCK_MTX_SPIN_TAG 0xfffffff0
159
160
0a7de745
A
161#define interlock_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
162#define interlock_try(lock) hw_lock_bit_try((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
163#define interlock_unlock(lock) hw_unlock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
164#define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT, LCK_GRP_NULL)
165#define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
5ba3f43e 166
cb323159 167#define load_memory_barrier() os_atomic_thread_fence(acquire)
5ba3f43e
A
168
169// Enforce program order of loads and stores.
cb323159
A
170#define ordered_load(target) \
171 os_atomic_load(target, compiler_acq_rel)
172#define ordered_store(target, value) \
173 os_atomic_store(target, value, compiler_acq_rel)
174
175#define ordered_load_mtx(lock) ordered_load(&(lock)->lck_mtx_data)
176#define ordered_store_mtx(lock, value) ordered_store(&(lock)->lck_mtx_data, (value))
177#define ordered_load_rw(lock) ordered_load(&(lock)->lck_rw_data)
178#define ordered_store_rw(lock, value) ordered_store(&(lock)->lck_rw_data, (value))
179#define ordered_load_rw_owner(lock) ordered_load(&(lock)->lck_rw_owner)
180#define ordered_store_rw_owner(lock, value) ordered_store(&(lock)->lck_rw_owner, (value))
181#define ordered_load_hw(lock) ordered_load(&(lock)->lock_data)
182#define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, (value))
183#define ordered_load_bit(lock) ordered_load((lock))
184#define ordered_store_bit(lock, value) ordered_store((lock), (value))
5ba3f43e
A
185
186
187// Prevent the compiler from reordering memory operations around this
0a7de745 188#define compiler_memory_fence() __asm__ volatile ("" ::: "memory")
5ba3f43e 189
0a7de745
A
190#define LOCK_PANIC_TIMEOUT 0xc00000
191#define NOINLINE __attribute__((noinline))
5ba3f43e
A
192
193
194#if __arm__
195#define interrupts_disabled(mask) (mask & PSR_INTMASK)
196#else
197#define interrupts_disabled(mask) (mask & DAIF_IRQF)
198#endif
199
200
201#if __arm__
0a7de745
A
202#define enable_fiq() __asm__ volatile ("cpsie f" ::: "memory");
203#define enable_interrupts() __asm__ volatile ("cpsie if" ::: "memory");
5ba3f43e
A
204#endif
205
206/*
207 * Forward declarations
208 */
209
210static void lck_rw_lock_shared_gen(lck_rw_t *lck);
211static void lck_rw_lock_exclusive_gen(lck_rw_t *lck);
212static boolean_t lck_rw_lock_shared_to_exclusive_success(lck_rw_t *lck);
213static boolean_t lck_rw_lock_shared_to_exclusive_failure(lck_rw_t *lck, uint32_t prior_lock_state);
214static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t *lck, uint32_t prior_lock_state);
215static lck_rw_type_t lck_rw_done_gen(lck_rw_t *lck, uint32_t prior_lock_state);
5ba3f43e
A
216static boolean_t lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait);
217
218/*
219 * atomic exchange API is a low level abstraction of the operations
220 * to atomically read, modify, and write a pointer. This abstraction works
221 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
222 * well as the ARM exclusive instructions.
223 *
224 * atomic_exchange_begin() - begin exchange and retrieve current value
225 * atomic_exchange_complete() - conclude an exchange
226 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
227 */
cb323159
A
228__unused static uint32_t
229load_exclusive32(uint32_t *target, enum memory_order ord)
230{
231 uint32_t value;
232
233#if __arm__
234 if (memory_order_has_release(ord)) {
235 // Pre-load release barrier
236 atomic_thread_fence(memory_order_release);
237 }
238 value = __builtin_arm_ldrex(target);
239#else
240 if (memory_order_has_acquire(ord)) {
241 value = __builtin_arm_ldaex(target); // ldaxr
242 } else {
243 value = __builtin_arm_ldrex(target); // ldxr
244 }
245#endif // __arm__
246 return value;
247}
248
249__unused static boolean_t
250store_exclusive32(uint32_t *target, uint32_t value, enum memory_order ord)
251{
252 boolean_t err;
253
254#if __arm__
255 err = __builtin_arm_strex(value, target);
256 if (memory_order_has_acquire(ord)) {
257 // Post-store acquire barrier
258 atomic_thread_fence(memory_order_acquire);
259 }
260#else
261 if (memory_order_has_release(ord)) {
262 err = __builtin_arm_stlex(value, target); // stlxr
263 } else {
264 err = __builtin_arm_strex(value, target); // stxr
265 }
266#endif // __arm__
267 return !err;
268}
269
5ba3f43e
A
270static uint32_t
271atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
272{
0a7de745 273 uint32_t val;
5ba3f43e 274
cb323159
A
275#if __ARM_ATOMICS_8_1
276 ord = memory_order_relaxed;
277#endif
5ba3f43e
A
278 val = load_exclusive32(target, ord);
279 *previous = val;
280 return val;
281}
282
283static boolean_t
284atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
285{
cb323159
A
286#if __ARM_ATOMICS_8_1
287 return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
288#else
0a7de745 289 (void)previous; // Previous not needed, monitor is held
5ba3f43e 290 return store_exclusive32(target, newval, ord);
cb323159 291#endif
5ba3f43e
A
292}
293
294static void
295atomic_exchange_abort(void)
296{
cb323159 297 os_atomic_clear_exclusive();
5ba3f43e
A
298}
299
300static boolean_t
301atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
302{
0a7de745 303 uint32_t value, prev;
5ba3f43e 304
0a7de745 305 for (;;) {
5ba3f43e
A
306 value = atomic_exchange_begin32(target, &prev, ord);
307 if (value & test_mask) {
0a7de745
A
308 if (wait) {
309 wait_for_event(); // Wait with monitor held
310 } else {
311 atomic_exchange_abort(); // Clear exclusive monitor
312 }
5ba3f43e
A
313 return FALSE;
314 }
315 value |= set_mask;
0a7de745 316 if (atomic_exchange_complete32(target, prev, value, ord)) {
5ba3f43e 317 return TRUE;
0a7de745 318 }
5ba3f43e
A
319 }
320}
321
cb323159
A
322inline boolean_t
323hw_atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
324{
325 return atomic_test_and_set32(target, test_mask, set_mask, ord, wait);
326}
327
0a7de745
A
328void
329_disable_preemption(void)
5ba3f43e 330{
cb323159
A
331 thread_t thread = current_thread();
332 unsigned int count = thread->machine.preemption_count;
5ba3f43e 333
cb323159
A
334 count += 1;
335 if (__improbable(count == 0)) {
336 panic("Preemption count overflow");
337 }
338
339 os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
5ba3f43e
A
340}
341
cb323159
A
342/*
343 * This function checks whether an AST_URGENT has been pended.
344 *
345 * It is called once the preemption has been reenabled, which means the thread
346 * may have been preempted right before this was called, and when this function
347 * actually performs the check, we've changed CPU.
348 *
349 * This race is however benign: the point of AST_URGENT is to trigger a context
350 * switch, so if one happened, there's nothing left to check for, and AST_URGENT
351 * was cleared in the process.
352 *
353 * It follows that this check cannot have false negatives, which allows us
354 * to avoid fiddling with interrupt state for the vast majority of cases
355 * when the check will actually be negative.
356 */
357static NOINLINE void
358kernel_preempt_check(thread_t thread)
5ba3f43e 359{
cb323159
A
360 cpu_data_t *cpu_data_ptr;
361 long state;
362
5ba3f43e
A
363#if __arm__
364#define INTERRUPT_MASK PSR_IRQF
0a7de745 365#else // __arm__
5ba3f43e 366#define INTERRUPT_MASK DAIF_IRQF
0a7de745 367#endif // __arm__
5ba3f43e 368
cb323159
A
369 /*
370 * This check is racy and could load from another CPU's pending_ast mask,
371 * but as described above, this can't have false negatives.
372 */
373 cpu_data_ptr = os_atomic_load(&thread->machine.CpuDatap, compiler_acq_rel);
374 if (__probable((cpu_data_ptr->cpu_pending_ast & AST_URGENT) == 0)) {
375 return;
0a7de745 376 }
cb323159
A
377
378 /* If interrupts are masked, we can't take an AST here */
379 state = get_interrupts();
380 if ((state & INTERRUPT_MASK) == 0) {
381 disable_interrupts_noread(); // Disable interrupts
382
383 /*
384 * Reload cpu_data_ptr: a context switch would cause it to change.
385 * Now that interrupts are disabled, this will debounce false positives.
386 */
387 cpu_data_ptr = os_atomic_load(&thread->machine.CpuDatap, compiler_acq_rel);
388 if (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
5ba3f43e
A
389#if __arm__
390#if __ARM_USER_PROTECT__
cb323159 391 uintptr_t up = arm_user_protect_begin(thread);
0a7de745 392#endif // __ARM_USER_PROTECT__
cb323159 393 enable_fiq();
0a7de745 394#endif // __arm__
cb323159 395 ast_taken_kernel(); // Handle urgent AST
5ba3f43e
A
396#if __arm__
397#if __ARM_USER_PROTECT__
cb323159 398 arm_user_protect_end(thread, up, TRUE);
0a7de745 399#endif // __ARM_USER_PROTECT__
cb323159
A
400 enable_interrupts();
401 return; // Return early on arm only due to FIQ enabling
0a7de745 402#endif // __arm__
0a7de745 403 }
cb323159 404 restore_interrupts(state); // Enable interrupts
5ba3f43e 405 }
5ba3f43e 406}
5ba3f43e
A
407
408void
cb323159 409_enable_preemption(void)
5ba3f43e 410{
cb323159
A
411 thread_t thread = current_thread();
412 unsigned int count = thread->machine.preemption_count;
5ba3f43e 413
cb323159
A
414 if (__improbable(count == 0)) {
415 panic("Preemption count underflow");
0a7de745 416 }
cb323159 417 count -= 1;
5ba3f43e 418
cb323159
A
419 os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
420 if (count == 0) {
421 kernel_preempt_check(thread);
0a7de745 422 }
d9a64523
A
423}
424
cb323159
A
425int
426get_preemption_level(void)
d9a64523 427{
cb323159 428 return current_thread()->machine.preemption_count;
d9a64523 429}
5ba3f43e 430
0a7de745
A
431#if __SMP__
432static inline boolean_t
433interlock_try_disable_interrupts(
434 lck_mtx_t *mutex,
435 boolean_t *istate)
436{
437 *istate = ml_set_interrupts_enabled(FALSE);
438
439 if (interlock_try(mutex)) {
440 return 1;
441 } else {
442 ml_set_interrupts_enabled(*istate);
443 return 0;
444 }
445}
446
447static inline void
448interlock_unlock_enable_interrupts(
449 lck_mtx_t *mutex,
450 boolean_t istate)
451{
452 interlock_unlock(mutex);
453 ml_set_interrupts_enabled(istate);
454}
455#endif /* __SMP__ */
456
5ba3f43e
A
457/*
458 * Routine: lck_spin_alloc_init
459 */
460lck_spin_t *
461lck_spin_alloc_init(
0a7de745
A
462 lck_grp_t * grp,
463 lck_attr_t * attr)
5ba3f43e
A
464{
465 lck_spin_t *lck;
466
0a7de745 467 if ((lck = (lck_spin_t *) kalloc(sizeof(lck_spin_t))) != 0) {
5ba3f43e 468 lck_spin_init(lck, grp, attr);
0a7de745 469 }
5ba3f43e 470
0a7de745 471 return lck;
5ba3f43e
A
472}
473
474/*
475 * Routine: lck_spin_free
476 */
477void
478lck_spin_free(
0a7de745
A
479 lck_spin_t * lck,
480 lck_grp_t * grp)
5ba3f43e
A
481{
482 lck_spin_destroy(lck, grp);
0a7de745 483 kfree(lck, sizeof(lck_spin_t));
5ba3f43e
A
484}
485
486/*
487 * Routine: lck_spin_init
488 */
489void
490lck_spin_init(
0a7de745
A
491 lck_spin_t * lck,
492 lck_grp_t * grp,
493 __unused lck_attr_t * attr)
5ba3f43e 494{
5ba3f43e 495 lck->type = LCK_SPIN_TYPE;
cb323159
A
496 hw_lock_init(&lck->hwlock);
497 if (grp) {
498 lck_grp_reference(grp);
499 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
500 }
5ba3f43e
A
501}
502
503/*
504 * arm_usimple_lock is a lck_spin_t without a group or attributes
505 */
506void inline
507arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
508{
509 lck->type = LCK_SPIN_TYPE;
510 hw_lock_init(&lck->hwlock);
5ba3f43e
A
511}
512
513
514/*
515 * Routine: lck_spin_lock
516 */
517void
518lck_spin_lock(lck_spin_t *lock)
519{
0a7de745
A
520#if DEVELOPMENT || DEBUG
521 if (lock->type != LCK_SPIN_TYPE) {
522 panic("Invalid spinlock %p", lock);
523 }
524#endif // DEVELOPMENT || DEBUG
525 hw_lock_lock(&lock->hwlock, LCK_GRP_NULL);
526}
527
528void
529lck_spin_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
530{
531#pragma unused(grp)
532#if DEVELOPMENT || DEBUG
533 if (lock->type != LCK_SPIN_TYPE) {
5ba3f43e 534 panic("Invalid spinlock %p", lock);
0a7de745
A
535 }
536#endif // DEVELOPMENT || DEBUG
537 hw_lock_lock(&lock->hwlock, grp);
5ba3f43e
A
538}
539
d9a64523
A
540/*
541 * Routine: lck_spin_lock_nopreempt
542 */
543void
544lck_spin_lock_nopreempt(lck_spin_t *lock)
545{
0a7de745
A
546#if DEVELOPMENT || DEBUG
547 if (lock->type != LCK_SPIN_TYPE) {
548 panic("Invalid spinlock %p", lock);
549 }
550#endif // DEVELOPMENT || DEBUG
551 hw_lock_lock_nopreempt(&lock->hwlock, LCK_GRP_NULL);
552}
553
554void
555lck_spin_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
556{
557#pragma unused(grp)
558#if DEVELOPMENT || DEBUG
559 if (lock->type != LCK_SPIN_TYPE) {
d9a64523 560 panic("Invalid spinlock %p", lock);
0a7de745
A
561 }
562#endif // DEVELOPMENT || DEBUG
563 hw_lock_lock_nopreempt(&lock->hwlock, grp);
d9a64523
A
564}
565
5ba3f43e
A
566/*
567 * Routine: lck_spin_try_lock
568 */
569int
570lck_spin_try_lock(lck_spin_t *lock)
571{
0a7de745
A
572 return hw_lock_try(&lock->hwlock, LCK_GRP_NULL);
573}
574
575int
576lck_spin_try_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
577{
578#pragma unused(grp)
579 return hw_lock_try(&lock->hwlock, grp);
5ba3f43e
A
580}
581
d9a64523
A
582/*
583 * Routine: lck_spin_try_lock_nopreempt
584 */
585int
586lck_spin_try_lock_nopreempt(lck_spin_t *lock)
587{
0a7de745
A
588 return hw_lock_try_nopreempt(&lock->hwlock, LCK_GRP_NULL);
589}
590
591int
592lck_spin_try_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
593{
594#pragma unused(grp)
595 return hw_lock_try_nopreempt(&lock->hwlock, grp);
d9a64523
A
596}
597
5ba3f43e
A
598/*
599 * Routine: lck_spin_unlock
600 */
601void
602lck_spin_unlock(lck_spin_t *lock)
603{
0a7de745
A
604#if DEVELOPMENT || DEBUG
605 if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
5ba3f43e 606 panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
0a7de745
A
607 }
608 if (lock->type != LCK_SPIN_TYPE) {
5ba3f43e 609 panic("Invalid spinlock type %p", lock);
0a7de745
A
610 }
611#endif // DEVELOPMENT || DEBUG
5ba3f43e
A
612 hw_lock_unlock(&lock->hwlock);
613}
614
d9a64523
A
615/*
616 * Routine: lck_spin_unlock_nopreempt
617 */
618void
619lck_spin_unlock_nopreempt(lck_spin_t *lock)
620{
0a7de745
A
621#if DEVELOPMENT || DEBUG
622 if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
d9a64523 623 panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
0a7de745
A
624 }
625 if (lock->type != LCK_SPIN_TYPE) {
d9a64523 626 panic("Invalid spinlock type %p", lock);
0a7de745
A
627 }
628#endif // DEVELOPMENT || DEBUG
d9a64523
A
629 hw_lock_unlock_nopreempt(&lock->hwlock);
630}
631
5ba3f43e
A
632/*
633 * Routine: lck_spin_destroy
634 */
635void
636lck_spin_destroy(
0a7de745
A
637 lck_spin_t * lck,
638 lck_grp_t * grp)
5ba3f43e 639{
0a7de745 640 if (lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED) {
5ba3f43e 641 return;
0a7de745 642 }
5ba3f43e 643 lck->lck_spin_data = LCK_SPIN_TAG_DESTROYED;
cb323159
A
644 if (grp) {
645 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
646 lck_grp_deallocate(grp);
647 }
5ba3f43e
A
648}
649
650/*
651 * Routine: kdp_lck_spin_is_acquired
652 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
653 */
654boolean_t
0a7de745
A
655kdp_lck_spin_is_acquired(lck_spin_t *lck)
656{
5ba3f43e
A
657 if (not_in_kdp) {
658 panic("panic: spinlock acquired check done outside of kernel debugger");
659 }
660 return ((lck->lck_spin_data & ~LCK_SPIN_TAG_DESTROYED) != 0) ? TRUE:FALSE;
661}
662
663/*
664 * Initialize a usimple_lock.
665 *
666 * No change in preemption state.
667 */
668void
669usimple_lock_init(
0a7de745
A
670 usimple_lock_t l,
671 unsigned short tag)
5ba3f43e 672{
5ba3f43e 673 simple_lock_init((simple_lock_t) l, tag);
5ba3f43e
A
674}
675
676
677/*
678 * Acquire a usimple_lock.
679 *
680 * Returns with preemption disabled. Note
681 * that the hw_lock routines are responsible for
682 * maintaining preemption state.
683 */
684void
0a7de745
A
685(usimple_lock)(
686 usimple_lock_t l
687 LCK_GRP_ARG(lck_grp_t *grp))
5ba3f43e 688{
0a7de745 689 simple_lock((simple_lock_t) l, LCK_GRP_PROBEARG(grp));
5ba3f43e
A
690}
691
692
693extern void sync(void);
694
695/*
696 * Release a usimple_lock.
697 *
698 * Returns with preemption enabled. Note
699 * that the hw_lock routines are responsible for
700 * maintaining preemption state.
701 */
702void
0a7de745
A
703(usimple_unlock)(
704 usimple_lock_t l)
5ba3f43e 705{
0a7de745 706 simple_unlock((simple_lock_t)l);
5ba3f43e
A
707}
708
709
710/*
711 * Conditionally acquire a usimple_lock.
712 *
713 * On success, returns with preemption disabled.
714 * On failure, returns with preemption in the same state
715 * as when first invoked. Note that the hw_lock routines
716 * are responsible for maintaining preemption state.
717 *
718 * XXX No stats are gathered on a miss; I preserved this
719 * behavior from the original assembly-language code, but
720 * doesn't it make sense to log misses? XXX
721 */
0a7de745
A
722unsigned
723int
724(usimple_lock_try)(
725 usimple_lock_t l
726 LCK_GRP_ARG(lck_grp_t *grp))
5ba3f43e 727{
0a7de745 728 return simple_lock_try((simple_lock_t) l, grp);
5ba3f43e
A
729}
730
5ba3f43e
A
731/*
732 * The C portion of the shared/exclusive locks package.
733 */
734
735/*
736 * compute the deadline to spin against when
737 * waiting for a change of state on a lck_rw_t
738 */
0a7de745 739#if __SMP__
5ba3f43e
A
740static inline uint64_t
741lck_rw_deadline_for_spin(lck_rw_t *lck)
742{
0a7de745 743 lck_rw_word_t word;
5ba3f43e
A
744
745 word.data = ordered_load_rw(lck);
746 if (word.can_sleep) {
747 if (word.r_waiting || word.w_waiting || (word.shared_count > machine_info.max_cpus)) {
748 /*
749 * there are already threads waiting on this lock... this
750 * implies that they have spun beyond their deadlines waiting for
751 * the desired state to show up so we will not bother spinning at this time...
752 * or
753 * the current number of threads sharing this lock exceeds our capacity to run them
754 * concurrently and since all states we're going to spin for require the rw_shared_count
755 * to be at 0, we'll not bother spinning since the latency for this to happen is
756 * unpredictable...
757 */
0a7de745 758 return mach_absolute_time();
5ba3f43e 759 }
0a7de745
A
760 return mach_absolute_time() + MutexSpin;
761 } else {
762 return mach_absolute_time() + (100000LL * 1000000000LL);
763 }
5ba3f43e 764}
0a7de745 765#endif // __SMP__
5ba3f43e
A
766
767static boolean_t
768lck_rw_drain_status(lck_rw_t *lock, uint32_t status_mask, boolean_t wait __unused)
769{
0a7de745
A
770#if __SMP__
771 uint64_t deadline = 0;
772 uint32_t data;
5ba3f43e 773
0a7de745 774 if (wait) {
5ba3f43e 775 deadline = lck_rw_deadline_for_spin(lock);
0a7de745 776 }
5ba3f43e 777
0a7de745 778 for (;;) {
5ba3f43e 779 data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp);
0a7de745 780 if ((data & status_mask) == 0) {
5ba3f43e 781 break;
0a7de745
A
782 }
783 if (wait) {
5ba3f43e 784 wait_for_event();
0a7de745 785 } else {
cb323159 786 os_atomic_clear_exclusive();
0a7de745
A
787 }
788 if (!wait || (mach_absolute_time() >= deadline)) {
5ba3f43e 789 return FALSE;
0a7de745 790 }
5ba3f43e 791 }
cb323159 792 os_atomic_clear_exclusive();
5ba3f43e
A
793 return TRUE;
794#else
0a7de745 795 uint32_t data;
5ba3f43e
A
796
797 data = ordered_load_rw(lock);
0a7de745 798 if ((data & status_mask) == 0) {
5ba3f43e 799 return TRUE;
0a7de745 800 } else {
5ba3f43e 801 return FALSE;
0a7de745
A
802 }
803#endif // __SMP__
5ba3f43e
A
804}
805
806/*
807 * Spin while interlock is held.
808 */
809static inline void
810lck_rw_interlock_spin(lck_rw_t *lock)
811{
812#if __SMP__
0a7de745 813 uint32_t data;
5ba3f43e 814
0a7de745 815 for (;;) {
5ba3f43e 816 data = load_exclusive32(&lock->lck_rw_data, memory_order_relaxed);
0a7de745 817 if (data & LCK_RW_INTERLOCK) {
5ba3f43e 818 wait_for_event();
0a7de745 819 } else {
cb323159 820 os_atomic_clear_exclusive();
5ba3f43e
A
821 return;
822 }
823 }
824#else
825 panic("lck_rw_interlock_spin(): Interlock locked %p %x", lock, lock->lck_rw_data);
826#endif
827}
828
829/*
830 * We disable interrupts while holding the RW interlock to prevent an
831 * interrupt from exacerbating hold time.
832 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
833 */
834static inline boolean_t
835lck_interlock_lock(lck_rw_t *lck)
836{
0a7de745 837 boolean_t istate;
5ba3f43e 838
0a7de745 839 istate = ml_set_interrupts_enabled(FALSE);
5ba3f43e
A
840 lck_rw_ilk_lock(lck);
841 return istate;
842}
843
844static inline void
845lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
846{
847 lck_rw_ilk_unlock(lck);
848 ml_set_interrupts_enabled(istate);
849}
850
851
0a7de745
A
852#define LCK_RW_GRAB_WANT 0
853#define LCK_RW_GRAB_SHARED 1
5ba3f43e
A
854
855static boolean_t
856lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait)
857{
0a7de745
A
858 uint64_t deadline = 0;
859 uint32_t data, prev;
860 boolean_t do_exch;
5ba3f43e
A
861
862#if __SMP__
0a7de745 863 if (wait) {
5ba3f43e 864 deadline = lck_rw_deadline_for_spin(lock);
0a7de745 865 }
5ba3f43e 866#else
0a7de745 867 wait = FALSE; // Don't spin on UP systems
5ba3f43e
A
868#endif
869
0a7de745 870 for (;;) {
5ba3f43e
A
871 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
872 if (data & LCK_RW_INTERLOCK) {
873 atomic_exchange_abort();
874 lck_rw_interlock_spin(lock);
875 continue;
876 }
877 do_exch = FALSE;
878 if (mode == LCK_RW_GRAB_WANT) {
879 if ((data & LCK_RW_WANT_EXCL) == 0) {
880 data |= LCK_RW_WANT_EXCL;
881 do_exch = TRUE;
882 }
0a7de745 883 } else { // LCK_RW_GRAB_SHARED
5ba3f43e 884 if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) ||
0a7de745 885 (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
5ba3f43e
A
886 data += LCK_RW_SHARED_READER;
887 do_exch = TRUE;
888 }
889 }
890 if (do_exch) {
0a7de745 891 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
5ba3f43e 892 return TRUE;
0a7de745 893 }
5ba3f43e 894 } else {
0a7de745 895 if (wait) { // Non-waiting
5ba3f43e 896 wait_for_event();
0a7de745 897 } else {
5ba3f43e 898 atomic_exchange_abort();
0a7de745
A
899 }
900 if (!wait || (mach_absolute_time() >= deadline)) {
5ba3f43e 901 return FALSE;
0a7de745 902 }
5ba3f43e
A
903 }
904 }
905}
906
907
908/*
909 * Routine: lck_rw_alloc_init
910 */
911lck_rw_t *
912lck_rw_alloc_init(
0a7de745
A
913 lck_grp_t *grp,
914 lck_attr_t *attr)
5ba3f43e 915{
0a7de745 916 lck_rw_t *lck;
5ba3f43e 917
0a7de745 918 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) {
5ba3f43e 919 lck_rw_init(lck, grp, attr);
0a7de745 920 }
5ba3f43e
A
921
922 return lck;
923}
924
925/*
926 * Routine: lck_rw_free
927 */
928void
929lck_rw_free(
0a7de745
A
930 lck_rw_t *lck,
931 lck_grp_t *grp)
5ba3f43e
A
932{
933 lck_rw_destroy(lck, grp);
934 kfree(lck, sizeof(lck_rw_t));
935}
936
937/*
938 * Routine: lck_rw_init
939 */
940void
941lck_rw_init(
0a7de745
A
942 lck_rw_t *lck,
943 lck_grp_t *grp,
944 lck_attr_t *attr)
5ba3f43e 945{
0a7de745 946 if (attr == LCK_ATTR_NULL) {
5ba3f43e 947 attr = &LockDefaultLckAttr;
0a7de745 948 }
5ba3f43e
A
949 memset(lck, 0, sizeof(lck_rw_t));
950 lck->lck_rw_can_sleep = TRUE;
0a7de745 951 if ((attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY) == 0) {
5ba3f43e 952 lck->lck_rw_priv_excl = TRUE;
0a7de745 953 }
5ba3f43e
A
954
955 lck_grp_reference(grp);
956 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
957}
958
959
960/*
961 * Routine: lck_rw_destroy
962 */
963void
964lck_rw_destroy(
0a7de745
A
965 lck_rw_t *lck,
966 lck_grp_t *grp)
5ba3f43e 967{
0a7de745 968 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) {
5ba3f43e 969 return;
0a7de745 970 }
5ba3f43e
A
971#if MACH_LDEBUG
972 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
973#endif
974 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
975 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
976 lck_grp_deallocate(grp);
977 return;
978}
979
980/*
981 * Routine: lck_rw_lock
982 */
983void
984lck_rw_lock(
0a7de745
A
985 lck_rw_t *lck,
986 lck_rw_type_t lck_rw_type)
5ba3f43e 987{
0a7de745 988 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
5ba3f43e 989 lck_rw_lock_shared(lck);
0a7de745 990 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
5ba3f43e 991 lck_rw_lock_exclusive(lck);
0a7de745 992 } else {
5ba3f43e 993 panic("lck_rw_lock(): Invalid RW lock type: %x", lck_rw_type);
0a7de745 994 }
5ba3f43e
A
995}
996
997/*
998 * Routine: lck_rw_lock_exclusive
999 */
1000void
1001lck_rw_lock_exclusive(lck_rw_t *lock)
1002{
0a7de745 1003 thread_t thread = current_thread();
5ba3f43e
A
1004
1005 thread->rwlock_count++;
1006 if (atomic_test_and_set32(&lock->lck_rw_data,
0a7de745
A
1007 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
1008 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
1009#if CONFIG_DTRACE
5ba3f43e 1010 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
0a7de745
A
1011#endif /* CONFIG_DTRACE */
1012 } else {
5ba3f43e 1013 lck_rw_lock_exclusive_gen(lock);
0a7de745 1014 }
5ba3f43e
A
1015#if MACH_ASSERT
1016 thread_t owner = ordered_load_rw_owner(lock);
1017 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1018#endif
1019 ordered_store_rw_owner(lock, thread);
1020}
1021
1022/*
1023 * Routine: lck_rw_lock_shared
1024 */
1025void
1026lck_rw_lock_shared(lck_rw_t *lock)
1027{
0a7de745 1028 uint32_t data, prev;
5ba3f43e
A
1029
1030 current_thread()->rwlock_count++;
0a7de745 1031 for (;;) {
5ba3f43e
A
1032 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1033 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
1034 atomic_exchange_abort();
1035 lck_rw_lock_shared_gen(lock);
1036 break;
1037 }
1038 data += LCK_RW_SHARED_READER;
0a7de745 1039 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
5ba3f43e 1040 break;
0a7de745 1041 }
5ba3f43e
A
1042 cpu_pause();
1043 }
1044#if MACH_ASSERT
1045 thread_t owner = ordered_load_rw_owner(lock);
1046 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1047#endif
0a7de745 1048#if CONFIG_DTRACE
5ba3f43e 1049 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
0a7de745 1050#endif /* CONFIG_DTRACE */
5ba3f43e
A
1051 return;
1052}
1053
1054/*
1055 * Routine: lck_rw_lock_shared_to_exclusive
cb323159
A
1056 *
1057 * False returned upon failure, in this case the shared lock is dropped.
5ba3f43e
A
1058 */
1059boolean_t
1060lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
1061{
0a7de745 1062 uint32_t data, prev;
5ba3f43e 1063
0a7de745 1064 for (;;) {
5ba3f43e
A
1065 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1066 if (data & LCK_RW_INTERLOCK) {
1067 atomic_exchange_abort();
1068 lck_rw_interlock_spin(lock);
1069 continue;
1070 }
1071 if (data & LCK_RW_WANT_UPGRADE) {
1072 data -= LCK_RW_SHARED_READER;
0a7de745
A
1073 if ((data & LCK_RW_SHARED_MASK) == 0) { /* we were the last reader */
1074 data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
1075 }
1076 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
5ba3f43e 1077 return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
0a7de745 1078 }
5ba3f43e 1079 } else {
0a7de745
A
1080 data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
1081 data -= LCK_RW_SHARED_READER; /* and shed our read count */
1082 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
5ba3f43e 1083 break;
0a7de745 1084 }
5ba3f43e
A
1085 }
1086 cpu_pause();
1087 }
0a7de745
A
1088 /* we now own the WANT_UPGRADE */
1089 if (data & LCK_RW_SHARED_MASK) { /* check to see if all of the readers are drained */
1090 lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
1091 }
5ba3f43e
A
1092#if MACH_ASSERT
1093 thread_t owner = ordered_load_rw_owner(lock);
1094 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1095#endif
1096 ordered_store_rw_owner(lock, current_thread());
0a7de745 1097#if CONFIG_DTRACE
5ba3f43e 1098 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
0a7de745 1099#endif /* CONFIG_DTRACE */
5ba3f43e
A
1100 return TRUE;
1101}
1102
1103
1104/*
1105 * Routine: lck_rw_lock_shared_to_exclusive_failure
1106 * Function:
1107 * Fast path code has already dropped our read
1108 * count and determined that someone else owns 'lck_rw_want_upgrade'
1109 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1110 * all we need to do here is determine if a wakeup is needed
1111 */
1112static boolean_t
1113lck_rw_lock_shared_to_exclusive_failure(
0a7de745
A
1114 lck_rw_t *lck,
1115 uint32_t prior_lock_state)
5ba3f43e 1116{
0a7de745
A
1117 thread_t thread = current_thread();
1118 uint32_t rwlock_count;
5ba3f43e
A
1119
1120 /* Check if dropping the lock means that we need to unpromote */
1121 rwlock_count = thread->rwlock_count--;
1122#if MACH_LDEBUG
1123 if (rwlock_count == 0) {
1124 panic("rw lock count underflow for thread %p", thread);
1125 }
1126#endif
1127 if ((prior_lock_state & LCK_RW_W_WAITING) &&
0a7de745 1128 ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
5ba3f43e
A
1129 /*
1130 * Someone else has requested upgrade.
1131 * Since we've released the read lock, wake
1132 * him up if he's blocked waiting
1133 */
1134 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1135 }
1136
1137 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1138 /* sched_flags checked without lock, but will be rechecked while clearing */
d9a64523 1139 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
5ba3f43e
A
1140 }
1141
1142 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
0a7de745 1143 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
5ba3f43e 1144
0a7de745 1145 return FALSE;
5ba3f43e
A
1146}
1147
1148/*
1149 * Routine: lck_rw_lock_shared_to_exclusive_success
1150 * Function:
1151 * assembly fast path code has already dropped our read
1152 * count and successfully acquired 'lck_rw_want_upgrade'
1153 * we just need to wait for the rest of the readers to drain
1154 * and then we can return as the exclusive holder of this lock
1155 */
1156static boolean_t
1157lck_rw_lock_shared_to_exclusive_success(
0a7de745
A
1158 lck_rw_t *lock)
1159{
1160 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1161 int slept = 0;
1162 lck_rw_word_t word;
1163 wait_result_t res;
1164 boolean_t istate;
1165 boolean_t not_shared;
1166
1167#if CONFIG_DTRACE
1168 uint64_t wait_interval = 0;
1169 int readers_at_sleep = 0;
1170 boolean_t dtrace_ls_initialized = FALSE;
1171 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
5ba3f43e
A
1172#endif
1173
1174 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE)) {
5ba3f43e 1175 word.data = ordered_load_rw(lock);
0a7de745 1176#if CONFIG_DTRACE
5ba3f43e
A
1177 if (dtrace_ls_initialized == FALSE) {
1178 dtrace_ls_initialized = TRUE;
1179 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1180 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1181 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1182 if (dtrace_ls_enabled) {
1183 /*
1184 * Either sleeping or spinning is happening,
1185 * start a timing of our delay interval now.
1186 */
1187 readers_at_sleep = word.shared_count;
1188 wait_interval = mach_absolute_time();
1189 }
1190 }
1191#endif
1192
1193 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
0a7de745 1194 trace_lck, word.shared_count, 0, 0, 0);
5ba3f43e
A
1195
1196 not_shared = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE);
1197
1198 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
0a7de745 1199 trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
5ba3f43e 1200
0a7de745 1201 if (not_shared) {
5ba3f43e 1202 break;
0a7de745 1203 }
5ba3f43e
A
1204
1205 /*
1206 * if we get here, the spin deadline in lck_rw_wait_on_status()
1207 * has expired w/o the rw_shared_count having drained to 0
1208 * check to see if we're allowed to do a thread_block
1209 */
1210 if (word.can_sleep) {
5ba3f43e 1211 istate = lck_interlock_lock(lock);
0a7de745 1212
5ba3f43e
A
1213 word.data = ordered_load_rw(lock);
1214 if (word.shared_count != 0) {
1215 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
0a7de745 1216 trace_lck, word.shared_count, 0, 0, 0);
5ba3f43e
A
1217
1218 word.w_waiting = 1;
1219 ordered_store_rw(lock, word.data);
1220
1221 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
d9a64523 1222 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
0a7de745 1223 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
5ba3f43e
A
1224 lck_interlock_unlock(lock, istate);
1225
1226 if (res == THREAD_WAITING) {
1227 res = thread_block(THREAD_CONTINUE_NULL);
1228 slept++;
1229 }
1230 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
0a7de745 1231 trace_lck, res, slept, 0, 0);
5ba3f43e
A
1232 } else {
1233 lck_interlock_unlock(lock, istate);
1234 break;
1235 }
1236 }
1237 }
0a7de745 1238#if CONFIG_DTRACE
5ba3f43e
A
1239 /*
1240 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1241 */
1242 if (dtrace_ls_enabled == TRUE) {
1243 if (slept == 0) {
0a7de745 1244 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
5ba3f43e 1245 } else {
0a7de745 1246 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
5ba3f43e
A
1247 mach_absolute_time() - wait_interval, 1,
1248 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1249 }
1250 }
1251 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
1252#endif
0a7de745 1253 return TRUE;
5ba3f43e
A
1254}
1255
1256
1257/*
1258 * Routine: lck_rw_lock_exclusive_to_shared
1259 */
1260
0a7de745
A
1261void
1262lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
5ba3f43e 1263{
0a7de745 1264 uint32_t data, prev;
5ba3f43e
A
1265
1266 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
1267 ordered_store_rw_owner(lock, THREAD_NULL);
0a7de745 1268 for (;;) {
5ba3f43e
A
1269 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
1270 if (data & LCK_RW_INTERLOCK) {
1271#if __SMP__
1272 atomic_exchange_abort();
0a7de745 1273 lck_rw_interlock_spin(lock); /* wait for interlock to clear */
5ba3f43e
A
1274 continue;
1275#else
1276 panic("lck_rw_lock_exclusive_to_shared(): Interlock locked (%p): %x", lock, data);
1277#endif // __SMP__
1278 }
1279 data += LCK_RW_SHARED_READER;
0a7de745 1280 if (data & LCK_RW_WANT_UPGRADE) {
5ba3f43e 1281 data &= ~(LCK_RW_WANT_UPGRADE);
0a7de745 1282 } else {
5ba3f43e 1283 data &= ~(LCK_RW_WANT_EXCL);
0a7de745
A
1284 }
1285 if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL))) {
5ba3f43e 1286 data &= ~(LCK_RW_W_WAITING);
0a7de745
A
1287 }
1288 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
5ba3f43e 1289 break;
0a7de745 1290 }
5ba3f43e
A
1291 cpu_pause();
1292 }
1293 return lck_rw_lock_exclusive_to_shared_gen(lock, prev);
1294}
1295
1296/*
1297 * Routine: lck_rw_lock_exclusive_to_shared_gen
0a7de745 1298 * Function:
5ba3f43e
A
1299 * Fast path has already dropped
1300 * our exclusive state and bumped lck_rw_shared_count
1301 * all we need to do here is determine if anyone
1302 * needs to be awakened.
1303 */
1304static void
1305lck_rw_lock_exclusive_to_shared_gen(
0a7de745
A
1306 lck_rw_t *lck,
1307 uint32_t prior_lock_state)
5ba3f43e 1308{
0a7de745
A
1309 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1310 lck_rw_word_t fake_lck;
5ba3f43e
A
1311
1312 /*
1313 * prior_lock state is a snapshot of the 1st word of the
1314 * lock in question... we'll fake up a pointer to it
1315 * and carefully not access anything beyond whats defined
1316 * in the first word of a lck_rw_t
1317 */
1318 fake_lck.data = prior_lock_state;
1319
1320 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
0a7de745 1321 trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
5ba3f43e
A
1322
1323 /*
1324 * don't wake up anyone waiting to take the lock exclusively
1325 * since we hold a read count... when the read count drops to 0,
1326 * the writers will be woken.
1327 *
1328 * wake up any waiting readers if we don't have any writers waiting,
1329 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1330 */
0a7de745 1331 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
5ba3f43e 1332 thread_wakeup(LCK_RW_READER_EVENT(lck));
0a7de745 1333 }
5ba3f43e
A
1334
1335 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
0a7de745 1336 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
5ba3f43e
A
1337
1338#if CONFIG_DTRACE
1339 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1340#endif
1341}
1342
1343
1344/*
1345 * Routine: lck_rw_try_lock
1346 */
1347boolean_t
1348lck_rw_try_lock(
0a7de745
A
1349 lck_rw_t *lck,
1350 lck_rw_type_t lck_rw_type)
5ba3f43e 1351{
0a7de745 1352 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
5ba3f43e 1353 return lck_rw_try_lock_shared(lck);
0a7de745 1354 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
5ba3f43e 1355 return lck_rw_try_lock_exclusive(lck);
0a7de745 1356 } else {
5ba3f43e 1357 panic("lck_rw_try_lock(): Invalid rw lock type: %x", lck_rw_type);
0a7de745 1358 }
5ba3f43e
A
1359 return FALSE;
1360}
1361
1362/*
1363 * Routine: lck_rw_try_lock_shared
1364 */
1365
0a7de745
A
1366boolean_t
1367lck_rw_try_lock_shared(lck_rw_t *lock)
5ba3f43e 1368{
0a7de745 1369 uint32_t data, prev;
5ba3f43e 1370
0a7de745 1371 for (;;) {
5ba3f43e
A
1372 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1373 if (data & LCK_RW_INTERLOCK) {
1374#if __SMP__
1375 atomic_exchange_abort();
1376 lck_rw_interlock_spin(lock);
1377 continue;
1378#else
1379 panic("lck_rw_try_lock_shared(): Interlock locked (%p): %x", lock, data);
1380#endif
1381 }
1382 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1383 atomic_exchange_abort();
0a7de745 1384 return FALSE; /* lock is busy */
5ba3f43e 1385 }
0a7de745
A
1386 data += LCK_RW_SHARED_READER; /* Increment reader refcount */
1387 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
5ba3f43e 1388 break;
0a7de745 1389 }
5ba3f43e
A
1390 cpu_pause();
1391 }
1392#if MACH_ASSERT
1393 thread_t owner = ordered_load_rw_owner(lock);
1394 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1395#endif
1396 current_thread()->rwlock_count++;
0a7de745 1397#if CONFIG_DTRACE
5ba3f43e 1398 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
0a7de745 1399#endif /* CONFIG_DTRACE */
5ba3f43e
A
1400 return TRUE;
1401}
1402
1403
1404/*
1405 * Routine: lck_rw_try_lock_exclusive
1406 */
1407
0a7de745
A
1408boolean_t
1409lck_rw_try_lock_exclusive(lck_rw_t *lock)
5ba3f43e 1410{
0a7de745
A
1411 uint32_t data, prev;
1412 thread_t thread;
5ba3f43e 1413
0a7de745 1414 for (;;) {
5ba3f43e
A
1415 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1416 if (data & LCK_RW_INTERLOCK) {
1417#if __SMP__
1418 atomic_exchange_abort();
1419 lck_rw_interlock_spin(lock);
1420 continue;
1421#else
1422 panic("lck_rw_try_lock_exclusive(): Interlock locked (%p): %x", lock, data);
1423#endif
1424 }
1425 if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1426 atomic_exchange_abort();
1427 return FALSE;
1428 }
1429 data |= LCK_RW_WANT_EXCL;
0a7de745 1430 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
5ba3f43e 1431 break;
0a7de745 1432 }
5ba3f43e
A
1433 cpu_pause();
1434 }
1435 thread = current_thread();
1436 thread->rwlock_count++;
1437#if MACH_ASSERT
1438 thread_t owner = ordered_load_rw_owner(lock);
1439 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1440#endif
1441 ordered_store_rw_owner(lock, thread);
0a7de745 1442#if CONFIG_DTRACE
5ba3f43e 1443 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
0a7de745 1444#endif /* CONFIG_DTRACE */
5ba3f43e
A
1445 return TRUE;
1446}
1447
1448
1449/*
1450 * Routine: lck_rw_unlock
1451 */
1452void
1453lck_rw_unlock(
0a7de745
A
1454 lck_rw_t *lck,
1455 lck_rw_type_t lck_rw_type)
5ba3f43e 1456{
0a7de745 1457 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
5ba3f43e 1458 lck_rw_unlock_shared(lck);
0a7de745 1459 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
5ba3f43e 1460 lck_rw_unlock_exclusive(lck);
0a7de745 1461 } else {
5ba3f43e 1462 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
0a7de745 1463 }
5ba3f43e
A
1464}
1465
1466
1467/*
1468 * Routine: lck_rw_unlock_shared
1469 */
1470void
1471lck_rw_unlock_shared(
0a7de745 1472 lck_rw_t *lck)
5ba3f43e 1473{
0a7de745 1474 lck_rw_type_t ret;
5ba3f43e
A
1475
1476 assertf(lck->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
1477 assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
1478 ret = lck_rw_done(lck);
1479
0a7de745 1480 if (ret != LCK_RW_TYPE_SHARED) {
5ba3f43e 1481 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
0a7de745 1482 }
5ba3f43e
A
1483}
1484
1485
1486/*
1487 * Routine: lck_rw_unlock_exclusive
1488 */
1489void
1490lck_rw_unlock_exclusive(
0a7de745 1491 lck_rw_t *lck)
5ba3f43e 1492{
0a7de745 1493 lck_rw_type_t ret;
5ba3f43e
A
1494
1495 assertf(lck->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
1496 ret = lck_rw_done(lck);
1497
0a7de745 1498 if (ret != LCK_RW_TYPE_EXCLUSIVE) {
5ba3f43e 1499 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
0a7de745 1500 }
5ba3f43e
A
1501}
1502
1503
1504/*
1505 * Routine: lck_rw_lock_exclusive_gen
1506 */
1507static void
1508lck_rw_lock_exclusive_gen(
0a7de745 1509 lck_rw_t *lock)
5ba3f43e 1510{
0a7de745
A
1511 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1512 lck_rw_word_t word;
1513 int slept = 0;
1514 boolean_t gotlock = 0;
1515 boolean_t not_shared_or_upgrade = 0;
1516 wait_result_t res = 0;
1517 boolean_t istate;
5ba3f43e 1518
0a7de745 1519#if CONFIG_DTRACE
5ba3f43e 1520 boolean_t dtrace_ls_initialized = FALSE;
0a7de745 1521 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled = FALSE;
5ba3f43e
A
1522 uint64_t wait_interval = 0;
1523 int readers_at_sleep = 0;
1524#endif
1525
1526 /*
1527 * Try to acquire the lck_rw_want_excl bit.
1528 */
1529 while (!lck_rw_grab(lock, LCK_RW_GRAB_WANT, FALSE)) {
0a7de745 1530#if CONFIG_DTRACE
5ba3f43e
A
1531 if (dtrace_ls_initialized == FALSE) {
1532 dtrace_ls_initialized = TRUE;
1533 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1534 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1535 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1536 if (dtrace_ls_enabled) {
1537 /*
1538 * Either sleeping or spinning is happening,
1539 * start a timing of our delay interval now.
1540 */
1541 readers_at_sleep = lock->lck_rw_shared_count;
1542 wait_interval = mach_absolute_time();
1543 }
1544 }
1545#endif
1546
1547 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1548
1549 gotlock = lck_rw_grab(lock, LCK_RW_GRAB_WANT, TRUE);
1550
1551 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
1552
0a7de745 1553 if (gotlock) {
5ba3f43e 1554 break;
0a7de745 1555 }
5ba3f43e
A
1556 /*
1557 * if we get here, the deadline has expired w/o us
1558 * being able to grab the lock exclusively
1559 * check to see if we're allowed to do a thread_block
1560 */
1561 word.data = ordered_load_rw(lock);
1562 if (word.can_sleep) {
5ba3f43e
A
1563 istate = lck_interlock_lock(lock);
1564 word.data = ordered_load_rw(lock);
1565
1566 if (word.want_excl) {
5ba3f43e
A
1567 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1568
1569 word.w_waiting = 1;
1570 ordered_store_rw(lock, word.data);
1571
1572 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
d9a64523 1573 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
0a7de745 1574 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
5ba3f43e
A
1575 lck_interlock_unlock(lock, istate);
1576
1577 if (res == THREAD_WAITING) {
1578 res = thread_block(THREAD_CONTINUE_NULL);
1579 slept++;
1580 }
1581 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1582 } else {
1583 word.want_excl = 1;
1584 ordered_store_rw(lock, word.data);
1585 lck_interlock_unlock(lock, istate);
1586 break;
1587 }
1588 }
1589 }
1590 /*
1591 * Wait for readers (and upgrades) to finish...
1592 */
1593 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE)) {
0a7de745 1594#if CONFIG_DTRACE
5ba3f43e
A
1595 /*
1596 * Either sleeping or spinning is happening, start
1597 * a timing of our delay interval now. If we set it
1598 * to -1 we don't have accurate data so we cannot later
1599 * decide to record a dtrace spin or sleep event.
1600 */
1601 if (dtrace_ls_initialized == FALSE) {
1602 dtrace_ls_initialized = TRUE;
1603 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1604 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1605 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1606 if (dtrace_ls_enabled) {
1607 /*
1608 * Either sleeping or spinning is happening,
1609 * start a timing of our delay interval now.
1610 */
1611 readers_at_sleep = lock->lck_rw_shared_count;
1612 wait_interval = mach_absolute_time();
1613 }
1614 }
1615#endif
1616
1617 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1618
1619 not_shared_or_upgrade = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, TRUE);
1620
1621 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, not_shared_or_upgrade, 0);
1622
0a7de745 1623 if (not_shared_or_upgrade) {
5ba3f43e 1624 break;
0a7de745 1625 }
5ba3f43e
A
1626 /*
1627 * if we get here, the deadline has expired w/o us
1628 * being able to grab the lock exclusively
1629 * check to see if we're allowed to do a thread_block
1630 */
1631 word.data = ordered_load_rw(lock);
1632 if (word.can_sleep) {
5ba3f43e
A
1633 istate = lck_interlock_lock(lock);
1634 word.data = ordered_load_rw(lock);
1635
1636 if (word.shared_count != 0 || word.want_upgrade) {
1637 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1638
1639 word.w_waiting = 1;
1640 ordered_store_rw(lock, word.data);
1641
1642 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
d9a64523 1643 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
0a7de745 1644 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
5ba3f43e
A
1645 lck_interlock_unlock(lock, istate);
1646
1647 if (res == THREAD_WAITING) {
1648 res = thread_block(THREAD_CONTINUE_NULL);
1649 slept++;
1650 }
1651 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1652 } else {
1653 lck_interlock_unlock(lock, istate);
1654 /*
1655 * must own the lock now, since we checked for
1656 * readers or upgrade owner behind the interlock
1657 * no need for a call to 'lck_rw_drain_status'
1658 */
1659 break;
1660 }
1661 }
1662 }
1663
0a7de745 1664#if CONFIG_DTRACE
5ba3f43e
A
1665 /*
1666 * Decide what latencies we suffered that are Dtrace events.
1667 * If we have set wait_interval, then we either spun or slept.
1668 * At least we get out from under the interlock before we record
1669 * which is the best we can do here to minimize the impact
1670 * of the tracing.
1671 * If we have set wait_interval to -1, then dtrace was not enabled when we
1672 * started sleeping/spinning so we don't record this event.
1673 */
1674 if (dtrace_ls_enabled == TRUE) {
1675 if (slept == 0) {
0a7de745 1676 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
5ba3f43e
A
1677 mach_absolute_time() - wait_interval, 1);
1678 } else {
1679 /*
1680 * For the blocking case, we also record if when we blocked
1681 * it was held for read or write, and how many readers.
1682 * Notice that above we recorded this before we dropped
1683 * the interlock so the count is accurate.
1684 */
0a7de745 1685 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
5ba3f43e
A
1686 mach_absolute_time() - wait_interval, 1,
1687 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1688 }
1689 }
1690 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
0a7de745 1691#endif /* CONFIG_DTRACE */
5ba3f43e
A
1692}
1693
1694/*
1695 * Routine: lck_rw_done
1696 */
1697
0a7de745
A
1698lck_rw_type_t
1699lck_rw_done(lck_rw_t *lock)
5ba3f43e 1700{
0a7de745
A
1701 uint32_t data, prev;
1702 boolean_t once = FALSE;
5ba3f43e 1703
0a7de745 1704 for (;;) {
5ba3f43e 1705 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
0a7de745 1706 if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
5ba3f43e
A
1707#if __SMP__
1708 atomic_exchange_abort();
1709 lck_rw_interlock_spin(lock);
1710 continue;
1711#else
1712 panic("lck_rw_done(): Interlock locked (%p): %x", lock, data);
1713#endif // __SMP__
1714 }
0a7de745 1715 if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
5ba3f43e
A
1716 assertf(lock->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
1717 data -= LCK_RW_SHARED_READER;
0a7de745 1718 if ((data & LCK_RW_SHARED_MASK) == 0) { /* if reader count has now gone to 0, check for waiters */
5ba3f43e 1719 goto check_waiters;
0a7de745
A
1720 }
1721 } else { /* if reader count == 0, must be exclusive lock */
5ba3f43e
A
1722 if (data & LCK_RW_WANT_UPGRADE) {
1723 data &= ~(LCK_RW_WANT_UPGRADE);
1724 } else {
0a7de745 1725 if (data & LCK_RW_WANT_EXCL) {
5ba3f43e 1726 data &= ~(LCK_RW_WANT_EXCL);
0a7de745 1727 } else { /* lock is not 'owned', panic */
5ba3f43e 1728 panic("Releasing non-exclusive RW lock without a reader refcount!");
0a7de745 1729 }
5ba3f43e
A
1730 }
1731 if (!once) {
1732 // Only check for holder and clear it once
1733 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
1734 ordered_store_rw_owner(lock, THREAD_NULL);
1735 once = TRUE;
1736 }
1737check_waiters:
1738 /*
1739 * test the original values to match what
1740 * lck_rw_done_gen is going to do to determine
1741 * which wakeups need to happen...
1742 *
1743 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
1744 */
1745 if (prev & LCK_RW_W_WAITING) {
1746 data &= ~(LCK_RW_W_WAITING);
0a7de745 1747 if ((prev & LCK_RW_PRIV_EXCL) == 0) {
5ba3f43e 1748 data &= ~(LCK_RW_R_WAITING);
0a7de745
A
1749 }
1750 } else {
5ba3f43e 1751 data &= ~(LCK_RW_R_WAITING);
0a7de745 1752 }
5ba3f43e 1753 }
0a7de745 1754 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
5ba3f43e 1755 break;
0a7de745 1756 }
5ba3f43e
A
1757 cpu_pause();
1758 }
1759 return lck_rw_done_gen(lock, prev);
1760}
1761
1762/*
1763 * Routine: lck_rw_done_gen
1764 *
1765 * called from the assembly language wrapper...
1766 * prior_lock_state is the value in the 1st
0a7de745 1767 * word of the lock at the time of a successful
5ba3f43e 1768 * atomic compare and exchange with the new value...
0a7de745 1769 * it represents the state of the lock before we
5ba3f43e 1770 * decremented the rw_shared_count or cleared either
0a7de745 1771 * rw_want_upgrade or rw_want_write and
5ba3f43e 1772 * the lck_x_waiting bits... since the wrapper
0a7de745 1773 * routine has already changed the state atomically,
5ba3f43e
A
1774 * we just need to decide if we should
1775 * wake up anyone and what value to return... we do
1776 * this by examining the state of the lock before
1777 * we changed it
1778 */
1779static lck_rw_type_t
1780lck_rw_done_gen(
0a7de745
A
1781 lck_rw_t *lck,
1782 uint32_t prior_lock_state)
5ba3f43e 1783{
0a7de745
A
1784 lck_rw_word_t fake_lck;
1785 lck_rw_type_t lock_type;
1786 thread_t thread;
1787 uint32_t rwlock_count;
5ba3f43e
A
1788
1789 /*
1790 * prior_lock state is a snapshot of the 1st word of the
1791 * lock in question... we'll fake up a pointer to it
1792 * and carefully not access anything beyond whats defined
1793 * in the first word of a lck_rw_t
1794 */
1795 fake_lck.data = prior_lock_state;
1796
1797 if (fake_lck.shared_count <= 1) {
0a7de745 1798 if (fake_lck.w_waiting) {
5ba3f43e 1799 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
0a7de745 1800 }
5ba3f43e 1801
0a7de745 1802 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
5ba3f43e 1803 thread_wakeup(LCK_RW_READER_EVENT(lck));
0a7de745 1804 }
5ba3f43e 1805 }
0a7de745 1806 if (fake_lck.shared_count) {
5ba3f43e 1807 lock_type = LCK_RW_TYPE_SHARED;
0a7de745 1808 } else {
5ba3f43e 1809 lock_type = LCK_RW_TYPE_EXCLUSIVE;
0a7de745 1810 }
5ba3f43e
A
1811
1812 /* Check if dropping the lock means that we need to unpromote */
1813 thread = current_thread();
1814 rwlock_count = thread->rwlock_count--;
1815#if MACH_LDEBUG
0a7de745 1816 if (rwlock_count == 0) {
5ba3f43e 1817 panic("rw lock count underflow for thread %p", thread);
0a7de745 1818 }
5ba3f43e
A
1819#endif
1820 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1821 /* sched_flags checked without lock, but will be rechecked while clearing */
d9a64523 1822 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
5ba3f43e
A
1823 }
1824#if CONFIG_DTRACE
1825 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
1826#endif
1827 return lock_type;
1828}
1829
1830/*
1831 * Routine: lck_rw_lock_shared_gen
1832 * Function:
1833 * Fast path code has determined that this lock
1834 * is held exclusively... this is where we spin/block
1835 * until we can acquire the lock in the shared mode
1836 */
1837static void
1838lck_rw_lock_shared_gen(
0a7de745 1839 lck_rw_t *lck)
5ba3f43e 1840{
0a7de745
A
1841 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1842 lck_rw_word_t word;
1843 boolean_t gotlock = 0;
1844 int slept = 0;
1845 wait_result_t res = 0;
1846 boolean_t istate;
5ba3f43e 1847
0a7de745 1848#if CONFIG_DTRACE
5ba3f43e
A
1849 uint64_t wait_interval = 0;
1850 int readers_at_sleep = 0;
1851 boolean_t dtrace_ls_initialized = FALSE;
1852 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
1853#endif /* CONFIG_DTRACE */
1854
0a7de745
A
1855 while (!lck_rw_grab(lck, LCK_RW_GRAB_SHARED, FALSE)) {
1856#if CONFIG_DTRACE
5ba3f43e
A
1857 if (dtrace_ls_initialized == FALSE) {
1858 dtrace_ls_initialized = TRUE;
1859 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1860 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1861 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1862 if (dtrace_ls_enabled) {
1863 /*
1864 * Either sleeping or spinning is happening,
1865 * start a timing of our delay interval now.
1866 */
1867 readers_at_sleep = lck->lck_rw_shared_count;
1868 wait_interval = mach_absolute_time();
1869 }
1870 }
1871#endif
1872
1873 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
0a7de745 1874 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
5ba3f43e
A
1875
1876 gotlock = lck_rw_grab(lck, LCK_RW_GRAB_SHARED, TRUE);
1877
1878 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
0a7de745 1879 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, gotlock, 0);
5ba3f43e 1880
0a7de745 1881 if (gotlock) {
5ba3f43e 1882 break;
0a7de745 1883 }
5ba3f43e
A
1884 /*
1885 * if we get here, the deadline has expired w/o us
1886 * being able to grab the lock for read
1887 * check to see if we're allowed to do a thread_block
1888 */
1889 if (lck->lck_rw_can_sleep) {
5ba3f43e
A
1890 istate = lck_interlock_lock(lck);
1891
1892 word.data = ordered_load_rw(lck);
1893 if ((word.want_excl || word.want_upgrade) &&
1894 ((word.shared_count == 0) || word.priv_excl)) {
5ba3f43e 1895 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
0a7de745 1896 trace_lck, word.want_excl, word.want_upgrade, 0, 0);
5ba3f43e
A
1897
1898 word.r_waiting = 1;
1899 ordered_store_rw(lck, word.data);
1900
1901 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
d9a64523 1902 res = assert_wait(LCK_RW_READER_EVENT(lck),
0a7de745 1903 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
5ba3f43e
A
1904 lck_interlock_unlock(lck, istate);
1905
1906 if (res == THREAD_WAITING) {
1907 res = thread_block(THREAD_CONTINUE_NULL);
1908 slept++;
1909 }
1910 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
0a7de745 1911 trace_lck, res, slept, 0, 0);
5ba3f43e
A
1912 } else {
1913 word.shared_count++;
1914 ordered_store_rw(lck, word.data);
1915 lck_interlock_unlock(lck, istate);
1916 break;
1917 }
1918 }
1919 }
1920
0a7de745 1921#if CONFIG_DTRACE
5ba3f43e
A
1922 if (dtrace_ls_enabled == TRUE) {
1923 if (slept == 0) {
0a7de745 1924 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
5ba3f43e 1925 } else {
0a7de745 1926 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
5ba3f43e
A
1927 mach_absolute_time() - wait_interval, 0,
1928 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1929 }
1930 }
1931 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
0a7de745 1932#endif /* CONFIG_DTRACE */
5ba3f43e
A
1933}
1934
1935
1936void
1937lck_rw_assert(
0a7de745
A
1938 lck_rw_t *lck,
1939 unsigned int type)
5ba3f43e
A
1940{
1941 switch (type) {
1942 case LCK_RW_ASSERT_SHARED:
1943 if ((lck->lck_rw_shared_count != 0) &&
1944 (lck->lck_rw_owner == THREAD_NULL)) {
1945 return;
1946 }
1947 break;
1948 case LCK_RW_ASSERT_EXCLUSIVE:
1949 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
0a7de745 1950 (lck->lck_rw_shared_count == 0) &&
5ba3f43e
A
1951 (lck->lck_rw_owner == current_thread())) {
1952 return;
1953 }
1954 break;
1955 case LCK_RW_ASSERT_HELD:
0a7de745
A
1956 if (lck->lck_rw_shared_count != 0) {
1957 return; // Held shared
1958 }
5ba3f43e
A
1959 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
1960 (lck->lck_rw_owner == current_thread())) {
0a7de745 1961 return; // Held exclusive
5ba3f43e
A
1962 }
1963 break;
1964 case LCK_RW_ASSERT_NOTHELD:
1965 if ((lck->lck_rw_shared_count == 0) &&
0a7de745 1966 !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
5ba3f43e
A
1967 (lck->lck_rw_owner == THREAD_NULL)) {
1968 return;
1969 }
1970 break;
1971 default:
1972 break;
1973 }
1974 panic("rw lock (%p)%s held (mode=%u)", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type);
1975}
1976
1977
1978/*
1979 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
1980 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
1981 */
1982boolean_t
0a7de745
A
1983kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck)
1984{
5ba3f43e
A
1985 if (not_in_kdp) {
1986 panic("panic: rw lock exclusive check done outside of kernel debugger");
1987 }
1988 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
1989}
1990
1991/*
1992 * The C portion of the mutex package. These routines are only invoked
1993 * if the optimized assembler routines can't do the work.
1994 */
1995
1996/*
1997 * Forward declaration
1998 */
1999
0a7de745 2000void
5ba3f43e 2001lck_mtx_ext_init(
0a7de745
A
2002 lck_mtx_ext_t * lck,
2003 lck_grp_t * grp,
2004 lck_attr_t * attr);
5ba3f43e
A
2005
2006/*
2007 * Routine: lck_mtx_alloc_init
2008 */
2009lck_mtx_t *
2010lck_mtx_alloc_init(
0a7de745
A
2011 lck_grp_t * grp,
2012 lck_attr_t * attr)
5ba3f43e
A
2013{
2014 lck_mtx_t *lck;
2015
0a7de745 2016 if ((lck = (lck_mtx_t *) kalloc(sizeof(lck_mtx_t))) != 0) {
5ba3f43e 2017 lck_mtx_init(lck, grp, attr);
0a7de745 2018 }
5ba3f43e 2019
0a7de745 2020 return lck;
5ba3f43e
A
2021}
2022
2023/*
2024 * Routine: lck_mtx_free
2025 */
2026void
2027lck_mtx_free(
0a7de745
A
2028 lck_mtx_t * lck,
2029 lck_grp_t * grp)
5ba3f43e
A
2030{
2031 lck_mtx_destroy(lck, grp);
0a7de745 2032 kfree(lck, sizeof(lck_mtx_t));
5ba3f43e
A
2033}
2034
2035/*
2036 * Routine: lck_mtx_init
2037 */
2038void
2039lck_mtx_init(
0a7de745
A
2040 lck_mtx_t * lck,
2041 lck_grp_t * grp,
2042 lck_attr_t * attr)
5ba3f43e 2043{
0a7de745 2044#ifdef BER_XXX
5ba3f43e
A
2045 lck_mtx_ext_t *lck_ext;
2046#endif
2047 lck_attr_t *lck_attr;
2048
0a7de745 2049 if (attr != LCK_ATTR_NULL) {
5ba3f43e 2050 lck_attr = attr;
0a7de745 2051 } else {
5ba3f43e 2052 lck_attr = &LockDefaultLckAttr;
0a7de745 2053 }
5ba3f43e 2054
0a7de745 2055#ifdef BER_XXX
5ba3f43e
A
2056 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2057 if ((lck_ext = (lck_mtx_ext_t *) kalloc(sizeof(lck_mtx_ext_t))) != 0) {
2058 lck_mtx_ext_init(lck_ext, grp, lck_attr);
2059 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2060 lck->lck_mtx_ptr = lck_ext;
2061 lck->lck_mtx_type = LCK_MTX_TYPE;
2062 }
2063 } else
2064#endif
2065 {
0a7de745 2066 lck->lck_mtx_ptr = NULL; // Clear any padding in the union fields below
5ba3f43e 2067 lck->lck_mtx_waiters = 0;
5ba3f43e
A
2068 lck->lck_mtx_type = LCK_MTX_TYPE;
2069 ordered_store_mtx(lck, 0);
2070 }
2071 lck_grp_reference(grp);
2072 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2073}
2074
2075/*
2076 * Routine: lck_mtx_init_ext
2077 */
2078void
2079lck_mtx_init_ext(
0a7de745
A
2080 lck_mtx_t * lck,
2081 lck_mtx_ext_t * lck_ext,
2082 lck_grp_t * grp,
2083 lck_attr_t * attr)
5ba3f43e
A
2084{
2085 lck_attr_t *lck_attr;
2086
0a7de745 2087 if (attr != LCK_ATTR_NULL) {
5ba3f43e 2088 lck_attr = attr;
0a7de745 2089 } else {
5ba3f43e 2090 lck_attr = &LockDefaultLckAttr;
0a7de745 2091 }
5ba3f43e
A
2092
2093 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2094 lck_mtx_ext_init(lck_ext, grp, lck_attr);
2095 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2096 lck->lck_mtx_ptr = lck_ext;
2097 lck->lck_mtx_type = LCK_MTX_TYPE;
2098 } else {
2099 lck->lck_mtx_waiters = 0;
5ba3f43e
A
2100 lck->lck_mtx_type = LCK_MTX_TYPE;
2101 ordered_store_mtx(lck, 0);
2102 }
2103 lck_grp_reference(grp);
2104 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2105}
2106
2107/*
2108 * Routine: lck_mtx_ext_init
2109 */
2110void
2111lck_mtx_ext_init(
0a7de745
A
2112 lck_mtx_ext_t * lck,
2113 lck_grp_t * grp,
2114 lck_attr_t * attr)
5ba3f43e
A
2115{
2116 bzero((void *) lck, sizeof(lck_mtx_ext_t));
2117
2118 lck->lck_mtx.lck_mtx_type = LCK_MTX_TYPE;
2119
2120 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2121 lck->lck_mtx_deb.type = MUTEX_TAG;
2122 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
2123 }
2124 lck->lck_mtx_grp = grp;
2125
0a7de745 2126 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) {
5ba3f43e 2127 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
0a7de745 2128 }
5ba3f43e
A
2129}
2130
2131/* The slow versions */
2132static void lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2133static boolean_t lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread);
2134static void lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2135
0a7de745
A
2136/* The adaptive spin function */
2137static spinwait_result_t lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2138
5ba3f43e
A
2139/*
2140 * Routine: lck_mtx_verify
2141 *
2142 * Verify if a mutex is valid
2143 */
2144static inline void
2145lck_mtx_verify(lck_mtx_t *lock)
2146{
0a7de745 2147 if (lock->lck_mtx_type != LCK_MTX_TYPE) {
5ba3f43e 2148 panic("Invalid mutex %p", lock);
0a7de745
A
2149 }
2150#if DEVELOPMENT || DEBUG
2151 if (lock->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
5ba3f43e 2152 panic("Mutex destroyed %p", lock);
0a7de745
A
2153 }
2154#endif /* DEVELOPMENT || DEBUG */
5ba3f43e
A
2155}
2156
2157/*
2158 * Routine: lck_mtx_check_preemption
2159 *
2160 * Verify preemption is enabled when attempting to acquire a mutex.
2161 */
2162
2163static inline void
2164lck_mtx_check_preemption(lck_mtx_t *lock)
2165{
0a7de745 2166#if DEVELOPMENT || DEBUG
5ba3f43e
A
2167 int pl = get_preemption_level();
2168
0a7de745 2169 if (pl != 0) {
5ba3f43e 2170 panic("Attempt to take mutex with preemption disabled. Lock=%p, level=%d", lock, pl);
0a7de745 2171 }
5ba3f43e
A
2172#else
2173 (void)lock;
2174#endif
2175}
2176
2177/*
2178 * Routine: lck_mtx_lock
2179 */
2180void
2181lck_mtx_lock(lck_mtx_t *lock)
2182{
0a7de745 2183 thread_t thread;
5ba3f43e
A
2184
2185 lck_mtx_verify(lock);
2186 lck_mtx_check_preemption(lock);
2187 thread = current_thread();
cb323159
A
2188 if (os_atomic_cmpxchg(&lock->lck_mtx_data,
2189 0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
0a7de745 2190#if CONFIG_DTRACE
5ba3f43e
A
2191 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
2192#endif /* CONFIG_DTRACE */
2193 return;
2194 }
2195 lck_mtx_lock_contended(lock, thread, FALSE);
2196}
2197
2198/*
0a7de745 2199 * This is the slow version of mutex locking.
5ba3f43e
A
2200 */
2201static void NOINLINE
2202lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
2203{
0a7de745
A
2204 thread_t holding_thread;
2205 uintptr_t state;
2206 int waiters = 0;
2207 spinwait_result_t sw_res;
cb323159 2208 struct turnstile *ts = NULL;
d9a64523
A
2209
2210 /* Loop waiting until I see that the mutex is unowned */
0a7de745
A
2211 for (;;) {
2212 sw_res = lck_mtx_lock_contended_spinwait_arm(lock, thread, interlocked);
2213 interlocked = FALSE;
2214
2215 switch (sw_res) {
2216 case SPINWAIT_ACQUIRED:
cb323159
A
2217 if (ts != NULL) {
2218 interlock_lock(lock);
2219 turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
2220 interlock_unlock(lock);
2221 }
0a7de745
A
2222 goto done;
2223 case SPINWAIT_INTERLOCK:
2224 goto set_owner;
2225 default:
2226 break;
2227 }
2228
5ba3f43e
A
2229 state = ordered_load_mtx(lock);
2230 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
0a7de745 2231 if (holding_thread == NULL) {
5ba3f43e 2232 break;
0a7de745 2233 }
5ba3f43e 2234 ordered_store_mtx(lock, (state | LCK_ILOCK | ARM_LCK_WAITERS)); // Set waiters bit and wait
cb323159 2235 lck_mtx_lock_wait(lock, holding_thread, &ts);
d9a64523 2236 /* returns interlock unlocked */
5ba3f43e 2237 }
d9a64523 2238
0a7de745 2239set_owner:
d9a64523 2240 /* Hooray, I'm the new owner! */
0a7de745
A
2241 state = ordered_load_mtx(lock);
2242
2243 if (state & ARM_LCK_WAITERS) {
2244 /* Skip lck_mtx_lock_acquire if there are no waiters. */
cb323159
A
2245 waiters = lck_mtx_lock_acquire(lock, ts);
2246 /*
2247 * lck_mtx_lock_acquire will call
2248 * turnstile_complete
2249 */
2250 } else {
2251 if (ts != NULL) {
2252 turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
2253 }
0a7de745
A
2254 }
2255
5ba3f43e 2256 state = LCK_MTX_THREAD_TO_STATE(thread);
0a7de745 2257 if (waiters != 0) {
5ba3f43e 2258 state |= ARM_LCK_WAITERS;
0a7de745 2259 }
5ba3f43e 2260#if __SMP__
0a7de745
A
2261 state |= LCK_ILOCK; // Preserve interlock
2262 ordered_store_mtx(lock, state); // Set ownership
2263 interlock_unlock(lock); // Release interlock, enable preemption
5ba3f43e 2264#else
0a7de745 2265 ordered_store_mtx(lock, state); // Set ownership
5ba3f43e
A
2266 enable_preemption();
2267#endif
0a7de745
A
2268
2269done:
5ba3f43e
A
2270 load_memory_barrier();
2271
cb323159
A
2272 assert(thread->turnstile != NULL);
2273
2274 if (ts != NULL) {
2275 turnstile_cleanup();
2276 }
2277
0a7de745 2278#if CONFIG_DTRACE
5ba3f43e
A
2279 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
2280#endif /* CONFIG_DTRACE */
2281}
2282
0a7de745
A
2283/*
2284 * Routine: lck_mtx_lock_spinwait_arm
2285 *
2286 * Invoked trying to acquire a mutex when there is contention but
2287 * the holder is running on another processor. We spin for up to a maximum
2288 * time waiting for the lock to be released.
2289 */
2290static spinwait_result_t
2291lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
2292{
2293 int has_interlock = (int)interlocked;
2294#if __SMP__
2295 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
2296 thread_t holder;
2297 uint64_t overall_deadline;
2298 uint64_t check_owner_deadline;
2299 uint64_t cur_time;
2300 spinwait_result_t retval = SPINWAIT_DID_SPIN;
2301 int loopcount = 0;
2302 uintptr_t state;
2303 boolean_t istate;
2304
2305 if (__improbable(!(lck_mtx_adaptive_spin_mode & ADAPTIVE_SPIN_ENABLE))) {
2306 if (!has_interlock) {
2307 interlock_lock(lock);
2308 }
2309
2310 return SPINWAIT_DID_NOT_SPIN;
2311 }
2312
2313 state = ordered_load_mtx(lock);
2314
2315 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
2316 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, 0, 0);
2317
2318 cur_time = mach_absolute_time();
2319 overall_deadline = cur_time + MutexSpin;
2320 check_owner_deadline = cur_time;
2321
2322 if (has_interlock) {
2323 istate = ml_get_interrupts_enabled();
2324 }
2325
2326 /* Snoop the lock state */
2327 state = ordered_load_mtx(lock);
2328
2329 /*
2330 * Spin while:
2331 * - mutex is locked, and
2332 * - it's locked as a spin lock, and
2333 * - owner is running on another processor, and
2334 * - owner (processor) is not idling, and
2335 * - we haven't spun for long enough.
2336 */
2337 do {
2338 if (!(state & LCK_ILOCK) || has_interlock) {
2339 if (!has_interlock) {
2340 has_interlock = interlock_try_disable_interrupts(lock, &istate);
2341 }
2342
2343 if (has_interlock) {
2344 state = ordered_load_mtx(lock);
2345 holder = LCK_MTX_STATE_TO_THREAD(state);
2346
2347 if (holder == NULL) {
2348 retval = SPINWAIT_INTERLOCK;
2349
2350 if (istate) {
2351 ml_set_interrupts_enabled(istate);
2352 }
2353
2354 break;
2355 }
2356
2357 if (!(holder->machine.machine_thread_flags & MACHINE_THREAD_FLAGS_ON_CPU) ||
2358 (holder->state & TH_IDLE)) {
2359 if (loopcount == 0) {
2360 retval = SPINWAIT_DID_NOT_SPIN;
2361 }
2362
2363 if (istate) {
2364 ml_set_interrupts_enabled(istate);
2365 }
2366
2367 break;
2368 }
2369
2370 interlock_unlock_enable_interrupts(lock, istate);
2371 has_interlock = 0;
2372 }
2373 }
2374
2375 cur_time = mach_absolute_time();
2376
2377 if (cur_time >= overall_deadline) {
2378 break;
2379 }
2380
2381 check_owner_deadline = cur_time + (MutexSpin / SPINWAIT_OWNER_CHECK_COUNT);
2382
2383 if (cur_time < check_owner_deadline) {
2384 machine_delay_until(check_owner_deadline - cur_time, check_owner_deadline);
2385 }
2386
2387 /* Snoop the lock state */
2388 state = ordered_load_mtx(lock);
2389
2390 if (state == 0) {
2391 /* Try to grab the lock. */
2392 if (os_atomic_cmpxchg(&lock->lck_mtx_data,
2393 0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
2394 retval = SPINWAIT_ACQUIRED;
2395 break;
2396 }
2397 }
2398
2399 loopcount++;
2400 } while (TRUE);
2401
2402#if CONFIG_DTRACE
2403 /*
2404 * We've already kept a count via overall_deadline of how long we spun.
2405 * If dtrace is active, then we compute backwards to decide how
2406 * long we spun.
2407 *
2408 * Note that we record a different probe id depending on whether
2409 * this is a direct or indirect mutex. This allows us to
2410 * penalize only lock groups that have debug/stats enabled
2411 * with dtrace processing if desired.
2412 */
2413 if (__probable(lock->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)) {
2414 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, lock,
2415 mach_absolute_time() - (overall_deadline - MutexSpin));
2416 } else {
2417 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, lock,
2418 mach_absolute_time() - (overall_deadline - MutexSpin));
2419 }
2420 /* The lockstat acquire event is recorded by the caller. */
2421#endif
2422
2423 state = ordered_load_mtx(lock);
2424
2425 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
2426 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, retval, 0);
2427#else /* __SMP__ */
2428 /* Spinwaiting is not useful on UP systems. */
2429#pragma unused(lock, thread)
2430 int retval = SPINWAIT_DID_NOT_SPIN;
2431#endif /* __SMP__ */
2432 if ((!has_interlock) && (retval != SPINWAIT_ACQUIRED)) {
2433 /* We must own either the lock or the interlock on return. */
2434 interlock_lock(lock);
2435 }
2436
2437 return retval;
2438}
2439
5ba3f43e
A
2440/*
2441 * Common code for mutex locking as spinlock
2442 */
2443static inline void
2444lck_mtx_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
2445{
0a7de745 2446 uintptr_t state;
5ba3f43e
A
2447
2448 interlock_lock(lock);
2449 state = ordered_load_mtx(lock);
2450 if (LCK_MTX_STATE_TO_THREAD(state)) {
0a7de745 2451 if (allow_held_as_mutex) {
5ba3f43e 2452 lck_mtx_lock_contended(lock, current_thread(), TRUE);
0a7de745 2453 } else {
5ba3f43e
A
2454 // "Always" variants can never block. If the lock is held and blocking is not allowed
2455 // then someone is mixing always and non-always calls on the same lock, which is
2456 // forbidden.
2457 panic("Attempting to block on a lock taken as spin-always %p", lock);
0a7de745 2458 }
5ba3f43e
A
2459 return;
2460 }
0a7de745
A
2461 state &= ARM_LCK_WAITERS; // Preserve waiters bit
2462 state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK); // Add spin tag and maintain interlock
5ba3f43e
A
2463 ordered_store_mtx(lock, state);
2464 load_memory_barrier();
2465
0a7de745 2466#if CONFIG_DTRACE
5ba3f43e
A
2467 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
2468#endif /* CONFIG_DTRACE */
2469}
2470
2471/*
2472 * Routine: lck_mtx_lock_spin
2473 */
2474void
2475lck_mtx_lock_spin(lck_mtx_t *lock)
2476{
2477 lck_mtx_check_preemption(lock);
2478 lck_mtx_lock_spin_internal(lock, TRUE);
2479}
2480
2481/*
2482 * Routine: lck_mtx_lock_spin_always
2483 */
2484void
2485lck_mtx_lock_spin_always(lck_mtx_t *lock)
2486{
2487 lck_mtx_lock_spin_internal(lock, FALSE);
2488}
2489
2490/*
2491 * Routine: lck_mtx_try_lock
2492 */
2493boolean_t
2494lck_mtx_try_lock(lck_mtx_t *lock)
2495{
0a7de745 2496 thread_t thread = current_thread();
5ba3f43e
A
2497
2498 lck_mtx_verify(lock);
cb323159
A
2499 if (os_atomic_cmpxchg(&lock->lck_mtx_data,
2500 0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
0a7de745 2501#if CONFIG_DTRACE
5ba3f43e
A
2502 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, lock, 0);
2503#endif /* CONFIG_DTRACE */
2504 return TRUE;
2505 }
2506 return lck_mtx_try_lock_contended(lock, thread);
2507}
2508
2509static boolean_t NOINLINE
2510lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread)
2511{
0a7de745
A
2512 thread_t holding_thread;
2513 uintptr_t state;
2514 int waiters;
5ba3f43e 2515
0a7de745 2516#if __SMP__
5ba3f43e
A
2517 interlock_lock(lock);
2518 state = ordered_load_mtx(lock);
2519 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2520 if (holding_thread) {
2521 interlock_unlock(lock);
2522 return FALSE;
2523 }
2524#else
2525 disable_preemption_for_thread(thread);
2526 state = ordered_load_mtx(lock);
0a7de745 2527 if (state & LCK_ILOCK) {
5ba3f43e 2528 panic("Unexpected interlock set (%p)", lock);
0a7de745 2529 }
5ba3f43e
A
2530 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2531 if (holding_thread) {
2532 enable_preemption();
2533 return FALSE;
2534 }
2535 state |= LCK_ILOCK;
2536 ordered_store_mtx(lock, state);
0a7de745 2537#endif // __SMP__
cb323159 2538 waiters = lck_mtx_lock_acquire(lock, NULL);
5ba3f43e 2539 state = LCK_MTX_THREAD_TO_STATE(thread);
0a7de745 2540 if (waiters != 0) {
5ba3f43e 2541 state |= ARM_LCK_WAITERS;
0a7de745 2542 }
5ba3f43e 2543#if __SMP__
0a7de745
A
2544 state |= LCK_ILOCK; // Preserve interlock
2545 ordered_store_mtx(lock, state); // Set ownership
2546 interlock_unlock(lock); // Release interlock, enable preemption
5ba3f43e 2547#else
0a7de745 2548 ordered_store_mtx(lock, state); // Set ownership
5ba3f43e
A
2549 enable_preemption();
2550#endif
2551 load_memory_barrier();
cb323159
A
2552
2553 turnstile_cleanup();
2554
5ba3f43e
A
2555 return TRUE;
2556}
2557
2558static inline boolean_t
2559lck_mtx_try_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
2560{
0a7de745 2561 uintptr_t state;
5ba3f43e 2562
0a7de745 2563 if (!interlock_try(lock)) {
5ba3f43e 2564 return FALSE;
0a7de745 2565 }
5ba3f43e 2566 state = ordered_load_mtx(lock);
0a7de745 2567 if (LCK_MTX_STATE_TO_THREAD(state)) {
5ba3f43e 2568 // Lock is held as mutex
0a7de745 2569 if (allow_held_as_mutex) {
5ba3f43e 2570 interlock_unlock(lock);
0a7de745 2571 } else {
5ba3f43e
A
2572 // "Always" variants can never block. If the lock is held as a normal mutex
2573 // then someone is mixing always and non-always calls on the same lock, which is
2574 // forbidden.
2575 panic("Spin-mutex held as full mutex %p", lock);
0a7de745 2576 }
5ba3f43e
A
2577 return FALSE;
2578 }
0a7de745
A
2579 state &= ARM_LCK_WAITERS; // Preserve waiters bit
2580 state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK); // Add spin tag and maintain interlock
5ba3f43e
A
2581 ordered_store_mtx(lock, state);
2582 load_memory_barrier();
2583
0a7de745 2584#if CONFIG_DTRACE
5ba3f43e
A
2585 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, lock, 0);
2586#endif /* CONFIG_DTRACE */
2587 return TRUE;
2588}
2589
2590/*
2591 * Routine: lck_mtx_try_lock_spin
2592 */
2593boolean_t
2594lck_mtx_try_lock_spin(lck_mtx_t *lock)
2595{
2596 return lck_mtx_try_lock_spin_internal(lock, TRUE);
2597}
2598
2599/*
2600 * Routine: lck_mtx_try_lock_spin_always
2601 */
2602boolean_t
2603lck_mtx_try_lock_spin_always(lck_mtx_t *lock)
2604{
2605 return lck_mtx_try_lock_spin_internal(lock, FALSE);
2606}
2607
2608
2609
2610/*
2611 * Routine: lck_mtx_unlock
2612 */
2613void
2614lck_mtx_unlock(lck_mtx_t *lock)
2615{
0a7de745
A
2616 thread_t thread = current_thread();
2617 uintptr_t state;
2618 boolean_t ilk_held = FALSE;
5ba3f43e
A
2619
2620 lck_mtx_verify(lock);
2621
2622 state = ordered_load_mtx(lock);
2623 if (state & LCK_ILOCK) {
0a7de745
A
2624 if (LCK_MTX_STATE_TO_THREAD(state) == (thread_t)LCK_MTX_SPIN_TAG) {
2625 ilk_held = TRUE; // Interlock is held by (presumably) this thread
2626 }
5ba3f43e
A
2627 goto slow_case;
2628 }
2629 // Locked as a mutex
cb323159
A
2630 if (os_atomic_cmpxchg(&lock->lck_mtx_data,
2631 LCK_MTX_THREAD_TO_STATE(thread), 0, release)) {
0a7de745 2632#if CONFIG_DTRACE
5ba3f43e
A
2633 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
2634#endif /* CONFIG_DTRACE */
2635 return;
2636 }
2637slow_case:
2638 lck_mtx_unlock_contended(lock, thread, ilk_held);
2639}
2640
2641static void NOINLINE
2642lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t ilk_held)
2643{
0a7de745 2644 uintptr_t state;
cb323159 2645 boolean_t cleanup = FALSE;
5ba3f43e
A
2646
2647 if (ilk_held) {
2648 state = ordered_load_mtx(lock);
2649 } else {
0a7de745 2650#if __SMP__
5ba3f43e
A
2651 interlock_lock(lock);
2652 state = ordered_load_mtx(lock);
0a7de745 2653 if (thread != LCK_MTX_STATE_TO_THREAD(state)) {
5ba3f43e 2654 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
0a7de745 2655 }
5ba3f43e
A
2656#else
2657 disable_preemption_for_thread(thread);
2658 state = ordered_load_mtx(lock);
0a7de745 2659 if (state & LCK_ILOCK) {
5ba3f43e 2660 panic("lck_mtx_unlock(): Unexpected interlock set (%p)", lock);
0a7de745
A
2661 }
2662 if (thread != LCK_MTX_STATE_TO_THREAD(state)) {
5ba3f43e 2663 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
0a7de745 2664 }
5ba3f43e
A
2665 state |= LCK_ILOCK;
2666 ordered_store_mtx(lock, state);
2667#endif
d9a64523 2668 if (state & ARM_LCK_WAITERS) {
cb323159
A
2669 if (lck_mtx_unlock_wakeup(lock, thread)) {
2670 state = ARM_LCK_WAITERS;
2671 } else {
2672 state = 0;
2673 }
2674 cleanup = TRUE;
2675 goto unlock;
d9a64523 2676 }
5ba3f43e 2677 }
d9a64523 2678 state &= ARM_LCK_WAITERS; /* Clear state, retain waiters bit */
cb323159 2679unlock:
5ba3f43e
A
2680#if __SMP__
2681 state |= LCK_ILOCK;
2682 ordered_store_mtx(lock, state);
2683 interlock_unlock(lock);
2684#else
2685 ordered_store_mtx(lock, state);
2686 enable_preemption();
2687#endif
cb323159
A
2688 if (cleanup) {
2689 /*
2690 * Do not do any turnstile operations outside of this block.
2691 * lock/unlock is called at early stage of boot with single thread,
2692 * when turnstile is not yet initialized.
2693 * Even without contention we can come throught the slow path
2694 * if the mutex is acquired as a spin lock.
2695 */
2696 turnstile_cleanup();
2697 }
5ba3f43e 2698
0a7de745 2699#if CONFIG_DTRACE
5ba3f43e
A
2700 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
2701#endif /* CONFIG_DTRACE */
2702}
2703
2704/*
2705 * Routine: lck_mtx_assert
2706 */
2707void
2708lck_mtx_assert(lck_mtx_t *lock, unsigned int type)
2709{
0a7de745
A
2710 thread_t thread, holder;
2711 uintptr_t state;
5ba3f43e
A
2712
2713 state = ordered_load_mtx(lock);
2714 holder = LCK_MTX_STATE_TO_THREAD(state);
2715 if (holder == (thread_t)LCK_MTX_SPIN_TAG) {
0a7de745
A
2716 // Lock is held in spin mode, owner is unknown.
2717 return; // Punt
5ba3f43e
A
2718 }
2719 thread = current_thread();
2720 if (type == LCK_MTX_ASSERT_OWNED) {
0a7de745 2721 if (thread != holder) {
5ba3f43e 2722 panic("lck_mtx_assert(): mutex (%p) owned", lock);
0a7de745 2723 }
5ba3f43e 2724 } else if (type == LCK_MTX_ASSERT_NOTOWNED) {
0a7de745 2725 if (thread == holder) {
5ba3f43e 2726 panic("lck_mtx_assert(): mutex (%p) not owned", lock);
0a7de745
A
2727 }
2728 } else {
5ba3f43e 2729 panic("lck_mtx_assert(): invalid arg (%u)", type);
0a7de745 2730 }
5ba3f43e
A
2731}
2732
2733/*
2734 * Routine: lck_mtx_ilk_unlock
2735 */
2736boolean_t
2737lck_mtx_ilk_unlock(lck_mtx_t *lock)
2738{
2739 interlock_unlock(lock);
2740 return TRUE;
2741}
2742
2743/*
2744 * Routine: lck_mtx_convert_spin
2745 *
2746 * Convert a mutex held for spin into a held full mutex
2747 */
2748void
2749lck_mtx_convert_spin(lck_mtx_t *lock)
2750{
0a7de745
A
2751 thread_t thread = current_thread();
2752 uintptr_t state;
2753 int waiters;
5ba3f43e
A
2754
2755 state = ordered_load_mtx(lock);
0a7de745
A
2756 if (LCK_MTX_STATE_TO_THREAD(state) == thread) {
2757 return; // Already owned as mutex, return
2758 }
2759 if ((state & LCK_ILOCK) == 0 || (LCK_MTX_STATE_TO_THREAD(state) != (thread_t)LCK_MTX_SPIN_TAG)) {
5ba3f43e 2760 panic("lck_mtx_convert_spin: Not held as spinlock (%p)", lock);
0a7de745
A
2761 }
2762 state &= ~(LCK_MTX_THREAD_MASK); // Clear the spin tag
5ba3f43e 2763 ordered_store_mtx(lock, state);
cb323159 2764 waiters = lck_mtx_lock_acquire(lock, NULL); // Acquire to manage priority boosts
5ba3f43e 2765 state = LCK_MTX_THREAD_TO_STATE(thread);
0a7de745 2766 if (waiters != 0) {
5ba3f43e 2767 state |= ARM_LCK_WAITERS;
0a7de745 2768 }
5ba3f43e
A
2769#if __SMP__
2770 state |= LCK_ILOCK;
0a7de745
A
2771 ordered_store_mtx(lock, state); // Set ownership
2772 interlock_unlock(lock); // Release interlock, enable preemption
5ba3f43e 2773#else
0a7de745 2774 ordered_store_mtx(lock, state); // Set ownership
5ba3f43e
A
2775 enable_preemption();
2776#endif
cb323159 2777 turnstile_cleanup();
5ba3f43e
A
2778}
2779
2780
2781/*
2782 * Routine: lck_mtx_destroy
2783 */
2784void
2785lck_mtx_destroy(
0a7de745
A
2786 lck_mtx_t * lck,
2787 lck_grp_t * grp)
5ba3f43e 2788{
0a7de745 2789 if (lck->lck_mtx_type != LCK_MTX_TYPE) {
5ba3f43e 2790 panic("Destroying invalid mutex %p", lck);
0a7de745
A
2791 }
2792 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
5ba3f43e 2793 panic("Destroying previously destroyed lock %p", lck);
0a7de745 2794 }
5ba3f43e
A
2795 lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
2796 lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED;
2797 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
2798 lck_grp_deallocate(grp);
2799 return;
2800}
2801
2802/*
2803 * Routine: lck_spin_assert
2804 */
2805void
2806lck_spin_assert(lck_spin_t *lock, unsigned int type)
2807{
0a7de745
A
2808 thread_t thread, holder;
2809 uintptr_t state;
5ba3f43e 2810
0a7de745 2811 if (lock->type != LCK_SPIN_TYPE) {
5ba3f43e 2812 panic("Invalid spinlock %p", lock);
0a7de745 2813 }
5ba3f43e
A
2814
2815 state = lock->lck_spin_data;
2816 holder = (thread_t)(state & ~LCK_ILOCK);
2817 thread = current_thread();
2818 if (type == LCK_ASSERT_OWNED) {
0a7de745 2819 if (holder == 0) {
5ba3f43e 2820 panic("Lock not owned %p = %lx", lock, state);
0a7de745
A
2821 }
2822 if (holder != thread) {
5ba3f43e 2823 panic("Lock not owned by current thread %p = %lx", lock, state);
0a7de745
A
2824 }
2825 if ((state & LCK_ILOCK) == 0) {
5ba3f43e 2826 panic("Lock bit not set %p = %lx", lock, state);
0a7de745 2827 }
5ba3f43e
A
2828 } else if (type == LCK_ASSERT_NOTOWNED) {
2829 if (holder != 0) {
0a7de745 2830 if (holder == thread) {
5ba3f43e 2831 panic("Lock owned by current thread %p = %lx", lock, state);
0a7de745 2832 }
5ba3f43e 2833 }
0a7de745 2834 } else {
5ba3f43e 2835 panic("lck_spin_assert(): invalid arg (%u)", type);
0a7de745 2836 }
5ba3f43e
A
2837}
2838
2839boolean_t
2840lck_rw_lock_yield_shared(lck_rw_t *lck, boolean_t force_yield)
2841{
0a7de745 2842 lck_rw_word_t word;
5ba3f43e
A
2843
2844 lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
2845
2846 word.data = ordered_load_rw(lck);
2847 if (word.want_excl || word.want_upgrade || force_yield) {
2848 lck_rw_unlock_shared(lck);
2849 mutex_pause(2);
2850 lck_rw_lock_shared(lck);
2851 return TRUE;
2852 }
2853
2854 return FALSE;
2855}
2856
2857/*
2858 * Routine: kdp_lck_mtx_lock_spin_is_acquired
2859 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2860 */
2861boolean_t
2862kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
2863{
0a7de745 2864 uintptr_t state;
5ba3f43e
A
2865
2866 if (not_in_kdp) {
2867 panic("panic: spinlock acquired check done outside of kernel debugger");
2868 }
2869 state = ordered_load_mtx(lck);
0a7de745 2870 if (state == LCK_MTX_TAG_DESTROYED) {
5ba3f43e 2871 return FALSE;
0a7de745
A
2872 }
2873 if (LCK_MTX_STATE_TO_THREAD(state) || (state & LCK_ILOCK)) {
5ba3f43e 2874 return TRUE;
0a7de745 2875 }
5ba3f43e
A
2876 return FALSE;
2877}
2878
2879void
2880kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2881{
2882 lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
2883 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
2884 uintptr_t state = ordered_load_mtx(mutex);
2885 thread_t holder = LCK_MTX_STATE_TO_THREAD(state);
2886 if ((uintptr_t)holder == (uintptr_t)LCK_MTX_SPIN_TAG) {
2887 waitinfo->owner = STACKSHOT_WAITOWNER_MTXSPIN;
2888 } else {
2889 assertf(state != (uintptr_t)LCK_MTX_TAG_DESTROYED, "state=0x%llx", (uint64_t)state);
2890 assertf(state != (uintptr_t)LCK_MTX_TAG_INDIRECT, "state=0x%llx", (uint64_t)state);
2891 waitinfo->owner = thread_tid(holder);
2892 }
2893}
2894
2895void
2896kdp_rwlck_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2897{
0a7de745
A
2898 lck_rw_t *rwlck = NULL;
2899 switch (waitinfo->wait_type) {
2900 case kThreadWaitKernelRWLockRead:
2901 rwlck = READ_EVENT_TO_RWLOCK(event);
2902 break;
2903 case kThreadWaitKernelRWLockWrite:
2904 case kThreadWaitKernelRWLockUpgrade:
2905 rwlck = WRITE_EVENT_TO_RWLOCK(event);
2906 break;
2907 default:
2908 panic("%s was called with an invalid blocking type", __FUNCTION__);
2909 break;
5ba3f43e
A
2910 }
2911 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
2912 waitinfo->owner = thread_tid(rwlck->lck_rw_owner);
2913}