2 * Copyright (c) 2007-2018 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
33 * Mellon University All Rights Reserved.
35 * Permission to use, copy, modify and distribute this software and its
36 * documentation is hereby granted, provided that both the copyright notice
37 * and this permission notice appear in all copies of the software,
38 * derivative works or modified versions, and any portions thereof, and that
39 * both notices appear in supporting documentation.
41 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
42 * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
43 * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 * Carnegie Mellon requests users of this software to return to
47 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
48 * School of Computer Science Carnegie Mellon University Pittsburgh PA
51 * any improvements or extensions that they make and grant Carnegie Mellon the
52 * rights to redistribute these changes.
56 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * Locking primitives implementation
62 #define LOCK_PRIVATE 1
64 #include <mach_ldebug.h>
66 #include <kern/zalloc.h>
67 #include <kern/lock_stat.h>
68 #include <kern/locks.h>
69 #include <kern/misc_protos.h>
70 #include <kern/thread.h>
71 #include <kern/processor.h>
72 #include <kern/sched_prim.h>
73 #include <kern/debug.h>
74 #include <kern/kcdata.h>
76 #include <arm/cpu_internal.h>
78 #include <arm/cpu_data.h>
80 #include <arm/cpu_data_internal.h>
81 #include <arm/proc_reg.h>
83 #include <machine/atomic.h>
84 #include <machine/machine_cpu.h>
86 #include <sys/kdebug.h>
89 #define DTRACE_RW_SHARED 0x0 //reader
90 #define DTRACE_RW_EXCL 0x1 //writer
91 #define DTRACE_NO_FLAG 0x0 //not applicable
92 #endif /* CONFIG_DTRACE */
94 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
95 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
96 #define LCK_RW_LCK_SHARED_CODE 0x102
97 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
98 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
99 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
102 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
104 // Panic in tests that check lock usage correctness
105 // These are undesirable when in a panic or a debugger is runnning.
106 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
108 #define ADAPTIVE_SPIN_ENABLE 0x1
110 int lck_mtx_adaptive_spin_mode
= ADAPTIVE_SPIN_ENABLE
;
112 #define SPINWAIT_OWNER_CHECK_COUNT 4
115 SPINWAIT_ACQUIRED
, /* Got the lock. */
116 SPINWAIT_INTERLOCK
, /* Got the interlock, no owner, but caller must finish acquiring the lock. */
117 SPINWAIT_DID_SPIN_HIGH_THR
, /* Got the interlock, spun, but failed to get the lock. */
118 SPINWAIT_DID_SPIN_OWNER_NOT_CORE
, /* Got the interlock, spun, but failed to get the lock. */
119 SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION
, /* Got the interlock, spun, but failed to get the lock. */
120 SPINWAIT_DID_SPIN_SLIDING_THR
,/* Got the interlock, spun, but failed to get the lock. */
121 SPINWAIT_DID_NOT_SPIN
, /* Got the interlock, did not spin. */
125 extern uint64_t dtrace_spin_threshold
;
130 extern unsigned int not_in_kdp
;
133 * We often want to know the addresses of the callers
134 * of the various lock routines. However, this information
135 * is only used for debugging and statistics.
138 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
139 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
143 * Eliminate lint complaints about unused local pc variables.
145 #define OBTAIN_PC(pc, l) ++pc
147 #define OBTAIN_PC(pc, l)
152 * Portable lock package implementation of usimple_locks.
156 * Owner thread pointer when lock held in spin mode
158 #define LCK_MTX_SPIN_TAG 0xfffffff0
161 #define interlock_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
162 #define interlock_try(lock) hw_lock_bit_try((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
163 #define interlock_unlock(lock) hw_unlock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
164 #define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT, LCK_GRP_NULL)
165 #define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
167 #define load_memory_barrier() os_atomic_thread_fence(acquire)
169 // Enforce program order of loads and stores.
170 #define ordered_load(target) \
171 os_atomic_load(target, compiler_acq_rel)
172 #define ordered_store(target, value) \
173 os_atomic_store(target, value, compiler_acq_rel)
175 #define ordered_load_mtx(lock) ordered_load(&(lock)->lck_mtx_data)
176 #define ordered_store_mtx(lock, value) ordered_store(&(lock)->lck_mtx_data, (value))
177 #define ordered_load_rw(lock) ordered_load(&(lock)->lck_rw_data)
178 #define ordered_store_rw(lock, value) ordered_store(&(lock)->lck_rw_data, (value))
179 #define ordered_load_rw_owner(lock) ordered_load(&(lock)->lck_rw_owner)
180 #define ordered_store_rw_owner(lock, value) ordered_store(&(lock)->lck_rw_owner, (value))
181 #define ordered_load_hw(lock) ordered_load(&(lock)->lock_data)
182 #define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, (value))
183 #define ordered_load_bit(lock) ordered_load((lock))
184 #define ordered_store_bit(lock, value) ordered_store((lock), (value))
187 // Prevent the compiler from reordering memory operations around this
188 #define compiler_memory_fence() __asm__ volatile ("" ::: "memory")
190 #define LOCK_PANIC_TIMEOUT 0xc00000
191 #define NOINLINE __attribute__((noinline))
195 #define interrupts_disabled(mask) (mask & PSR_INTMASK)
197 #define interrupts_disabled(mask) (mask & DAIF_IRQF)
202 #define enable_fiq() __asm__ volatile ("cpsie f" ::: "memory");
203 #define enable_interrupts() __asm__ volatile ("cpsie if" ::: "memory");
206 ZONE_VIEW_DEFINE(ZV_LCK_SPIN
, "lck_spin",
207 KHEAP_ID_DEFAULT
, sizeof(lck_spin_t
));
209 ZONE_VIEW_DEFINE(ZV_LCK_MTX
, "lck_mtx",
210 KHEAP_ID_DEFAULT
, sizeof(lck_mtx_t
));
212 ZONE_VIEW_DEFINE(ZV_LCK_MTX_EXT
, "lck_mtx_ext",
213 KHEAP_ID_DEFAULT
, sizeof(lck_mtx_ext_t
));
215 ZONE_VIEW_DEFINE(ZV_LCK_RW
, "lck_rw",
216 KHEAP_ID_DEFAULT
, sizeof(lck_rw_t
));
219 * Forward declarations
222 static void lck_rw_lock_shared_gen(lck_rw_t
*lck
);
223 static void lck_rw_lock_exclusive_gen(lck_rw_t
*lck
);
224 static boolean_t
lck_rw_lock_shared_to_exclusive_success(lck_rw_t
*lck
);
225 static boolean_t
lck_rw_lock_shared_to_exclusive_failure(lck_rw_t
*lck
, uint32_t prior_lock_state
);
226 static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t
*lck
, uint32_t prior_lock_state
);
227 static lck_rw_type_t
lck_rw_done_gen(lck_rw_t
*lck
, uint32_t prior_lock_state
);
228 static boolean_t
lck_rw_grab(lck_rw_t
*lock
, int mode
, boolean_t wait
);
231 * atomic exchange API is a low level abstraction of the operations
232 * to atomically read, modify, and write a pointer. This abstraction works
233 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
234 * well as the ARM exclusive instructions.
236 * atomic_exchange_begin() - begin exchange and retrieve current value
237 * atomic_exchange_complete() - conclude an exchange
238 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
240 __unused
static uint32_t
241 load_exclusive32(uint32_t *target
, enum memory_order ord
)
246 if (_os_atomic_mo_has_release(ord
)) {
247 // Pre-load release barrier
248 atomic_thread_fence(memory_order_release
);
250 value
= __builtin_arm_ldrex(target
);
252 if (_os_atomic_mo_has_acquire(ord
)) {
253 value
= __builtin_arm_ldaex(target
); // ldaxr
255 value
= __builtin_arm_ldrex(target
); // ldxr
261 __unused
static boolean_t
262 store_exclusive32(uint32_t *target
, uint32_t value
, enum memory_order ord
)
267 err
= __builtin_arm_strex(value
, target
);
268 if (_os_atomic_mo_has_acquire(ord
)) {
269 // Post-store acquire barrier
270 atomic_thread_fence(memory_order_acquire
);
273 if (_os_atomic_mo_has_release(ord
)) {
274 err
= __builtin_arm_stlex(value
, target
); // stlxr
276 err
= __builtin_arm_strex(value
, target
); // stxr
283 atomic_exchange_begin32(uint32_t *target
, uint32_t *previous
, enum memory_order ord
)
287 #if __ARM_ATOMICS_8_1
288 ord
= memory_order_relaxed
;
290 val
= load_exclusive32(target
, ord
);
296 atomic_exchange_complete32(uint32_t *target
, uint32_t previous
, uint32_t newval
, enum memory_order ord
)
298 #if __ARM_ATOMICS_8_1
299 return __c11_atomic_compare_exchange_strong((_Atomic
uint32_t *)target
, &previous
, newval
, ord
, memory_order_relaxed
);
301 (void)previous
; // Previous not needed, monitor is held
302 return store_exclusive32(target
, newval
, ord
);
307 atomic_exchange_abort(void)
309 os_atomic_clear_exclusive();
313 atomic_test_and_set32(uint32_t *target
, uint32_t test_mask
, uint32_t set_mask
, enum memory_order ord
, boolean_t wait
)
315 uint32_t value
, prev
;
318 value
= atomic_exchange_begin32(target
, &prev
, ord
);
319 if (value
& test_mask
) {
321 wait_for_event(); // Wait with monitor held
323 atomic_exchange_abort(); // Clear exclusive monitor
328 if (atomic_exchange_complete32(target
, prev
, value
, ord
)) {
335 hw_atomic_test_and_set32(uint32_t *target
, uint32_t test_mask
, uint32_t set_mask
, enum memory_order ord
, boolean_t wait
)
337 return atomic_test_and_set32(target
, test_mask
, set_mask
, ord
, wait
);
341 * To help _disable_preemption() inline everywhere with LTO,
342 * we keep these nice non inlineable functions as the panic()
343 * codegen setup is quite large and for weird reasons causes a frame.
347 _disable_preemption_overflow(void)
349 panic("Preemption count overflow");
353 _disable_preemption(void)
355 thread_t thread
= current_thread();
356 unsigned int count
= thread
->machine
.preemption_count
;
358 if (__improbable(++count
== 0)) {
359 _disable_preemption_overflow();
362 os_atomic_store(&thread
->machine
.preemption_count
, count
, compiler_acq_rel
);
366 * This function checks whether an AST_URGENT has been pended.
368 * It is called once the preemption has been reenabled, which means the thread
369 * may have been preempted right before this was called, and when this function
370 * actually performs the check, we've changed CPU.
372 * This race is however benign: the point of AST_URGENT is to trigger a context
373 * switch, so if one happened, there's nothing left to check for, and AST_URGENT
374 * was cleared in the process.
376 * It follows that this check cannot have false negatives, which allows us
377 * to avoid fiddling with interrupt state for the vast majority of cases
378 * when the check will actually be negative.
381 kernel_preempt_check(thread_t thread
)
383 cpu_data_t
*cpu_data_ptr
;
387 #define INTERRUPT_MASK PSR_IRQF
389 #define INTERRUPT_MASK DAIF_IRQF
393 * This check is racy and could load from another CPU's pending_ast mask,
394 * but as described above, this can't have false negatives.
396 cpu_data_ptr
= os_atomic_load(&thread
->machine
.CpuDatap
, compiler_acq_rel
);
397 if (__probable((cpu_data_ptr
->cpu_pending_ast
& AST_URGENT
) == 0)) {
401 /* If interrupts are masked, we can't take an AST here */
402 state
= get_interrupts();
403 if ((state
& INTERRUPT_MASK
) == 0) {
404 disable_interrupts_noread(); // Disable interrupts
407 * Reload cpu_data_ptr: a context switch would cause it to change.
408 * Now that interrupts are disabled, this will debounce false positives.
410 cpu_data_ptr
= os_atomic_load(&thread
->machine
.CpuDatap
, compiler_acq_rel
);
411 if (thread
->machine
.CpuDatap
->cpu_pending_ast
& AST_URGENT
) {
413 #if __ARM_USER_PROTECT__
414 uintptr_t up
= arm_user_protect_begin(thread
);
415 #endif // __ARM_USER_PROTECT__
418 ast_taken_kernel(); // Handle urgent AST
420 #if __ARM_USER_PROTECT__
421 arm_user_protect_end(thread
, up
, TRUE
);
422 #endif // __ARM_USER_PROTECT__
424 return; // Return early on arm only due to FIQ enabling
427 restore_interrupts(state
); // Enable interrupts
432 * To help _enable_preemption() inline everywhere with LTO,
433 * we keep these nice non inlineable functions as the panic()
434 * codegen setup is quite large and for weird reasons causes a frame.
438 _enable_preemption_underflow(void)
440 panic("Preemption count underflow");
444 _enable_preemption(void)
446 thread_t thread
= current_thread();
447 unsigned int count
= thread
->machine
.preemption_count
;
449 if (__improbable(count
== 0)) {
450 _enable_preemption_underflow();
454 os_atomic_store(&thread
->machine
.preemption_count
, count
, compiler_acq_rel
);
456 kernel_preempt_check(thread
);
459 os_compiler_barrier();
463 get_preemption_level(void)
465 return current_thread()->machine
.preemption_count
;
469 * Routine: lck_spin_alloc_init
478 lck
= zalloc(ZV_LCK_SPIN
);
479 lck_spin_init(lck
, grp
, attr
);
484 * Routine: lck_spin_free
491 lck_spin_destroy(lck
, grp
);
492 zfree(ZV_LCK_SPIN
, lck
);
496 * Routine: lck_spin_init
502 __unused lck_attr_t
* attr
)
504 lck
->type
= LCK_SPIN_TYPE
;
505 hw_lock_init(&lck
->hwlock
);
507 lck_grp_reference(grp
);
508 lck_grp_lckcnt_incr(grp
, LCK_TYPE_SPIN
);
513 * arm_usimple_lock is a lck_spin_t without a group or attributes
515 MARK_AS_HIBERNATE_TEXT
void inline
516 arm_usimple_lock_init(simple_lock_t lck
, __unused
unsigned short initial_value
)
518 lck
->type
= LCK_SPIN_TYPE
;
519 hw_lock_init(&lck
->hwlock
);
524 * Routine: lck_spin_lock
527 lck_spin_lock(lck_spin_t
*lock
)
529 #if DEVELOPMENT || DEBUG
530 if (lock
->type
!= LCK_SPIN_TYPE
) {
531 panic("Invalid spinlock %p", lock
);
533 #endif // DEVELOPMENT || DEBUG
534 hw_lock_lock(&lock
->hwlock
, LCK_GRP_NULL
);
538 lck_spin_lock_grp(lck_spin_t
*lock
, lck_grp_t
*grp
)
541 #if DEVELOPMENT || DEBUG
542 if (lock
->type
!= LCK_SPIN_TYPE
) {
543 panic("Invalid spinlock %p", lock
);
545 #endif // DEVELOPMENT || DEBUG
546 hw_lock_lock(&lock
->hwlock
, grp
);
550 * Routine: lck_spin_lock_nopreempt
553 lck_spin_lock_nopreempt(lck_spin_t
*lock
)
555 #if DEVELOPMENT || DEBUG
556 if (lock
->type
!= LCK_SPIN_TYPE
) {
557 panic("Invalid spinlock %p", lock
);
559 #endif // DEVELOPMENT || DEBUG
560 hw_lock_lock_nopreempt(&lock
->hwlock
, LCK_GRP_NULL
);
564 lck_spin_lock_nopreempt_grp(lck_spin_t
*lock
, lck_grp_t
*grp
)
567 #if DEVELOPMENT || DEBUG
568 if (lock
->type
!= LCK_SPIN_TYPE
) {
569 panic("Invalid spinlock %p", lock
);
571 #endif // DEVELOPMENT || DEBUG
572 hw_lock_lock_nopreempt(&lock
->hwlock
, grp
);
576 * Routine: lck_spin_try_lock
579 lck_spin_try_lock(lck_spin_t
*lock
)
581 return hw_lock_try(&lock
->hwlock
, LCK_GRP_NULL
);
585 lck_spin_try_lock_grp(lck_spin_t
*lock
, lck_grp_t
*grp
)
588 return hw_lock_try(&lock
->hwlock
, grp
);
592 * Routine: lck_spin_try_lock_nopreempt
595 lck_spin_try_lock_nopreempt(lck_spin_t
*lock
)
597 return hw_lock_try_nopreempt(&lock
->hwlock
, LCK_GRP_NULL
);
601 lck_spin_try_lock_nopreempt_grp(lck_spin_t
*lock
, lck_grp_t
*grp
)
604 return hw_lock_try_nopreempt(&lock
->hwlock
, grp
);
608 * Routine: lck_spin_unlock
611 lck_spin_unlock(lck_spin_t
*lock
)
613 #if DEVELOPMENT || DEBUG
614 if ((LCK_MTX_STATE_TO_THREAD(lock
->lck_spin_data
) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
615 panic("Spinlock not owned by thread %p = %lx", lock
, lock
->lck_spin_data
);
617 if (lock
->type
!= LCK_SPIN_TYPE
) {
618 panic("Invalid spinlock type %p", lock
);
620 #endif // DEVELOPMENT || DEBUG
621 hw_lock_unlock(&lock
->hwlock
);
625 * Routine: lck_spin_unlock_nopreempt
628 lck_spin_unlock_nopreempt(lck_spin_t
*lock
)
630 #if DEVELOPMENT || DEBUG
631 if ((LCK_MTX_STATE_TO_THREAD(lock
->lck_spin_data
) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
632 panic("Spinlock not owned by thread %p = %lx", lock
, lock
->lck_spin_data
);
634 if (lock
->type
!= LCK_SPIN_TYPE
) {
635 panic("Invalid spinlock type %p", lock
);
637 #endif // DEVELOPMENT || DEBUG
638 hw_lock_unlock_nopreempt(&lock
->hwlock
);
642 * Routine: lck_spin_destroy
649 if (lck
->lck_spin_data
== LCK_SPIN_TAG_DESTROYED
) {
652 lck
->lck_spin_data
= LCK_SPIN_TAG_DESTROYED
;
654 lck_grp_lckcnt_decr(grp
, LCK_TYPE_SPIN
);
655 lck_grp_deallocate(grp
);
660 * Routine: kdp_lck_spin_is_acquired
661 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
664 kdp_lck_spin_is_acquired(lck_spin_t
*lck
)
667 panic("panic: spinlock acquired check done outside of kernel debugger");
669 return ((lck
->lck_spin_data
& ~LCK_SPIN_TAG_DESTROYED
) != 0) ? TRUE
:FALSE
;
673 * Initialize a usimple_lock.
675 * No change in preemption state.
682 simple_lock_init((simple_lock_t
) l
, tag
);
687 * Acquire a usimple_lock.
689 * Returns with preemption disabled. Note
690 * that the hw_lock routines are responsible for
691 * maintaining preemption state.
696 LCK_GRP_ARG(lck_grp_t
*grp
))
698 simple_lock((simple_lock_t
) l
, LCK_GRP_PROBEARG(grp
));
702 extern void sync(void);
705 * Release a usimple_lock.
707 * Returns with preemption enabled. Note
708 * that the hw_lock routines are responsible for
709 * maintaining preemption state.
715 simple_unlock((simple_lock_t
)l
);
720 * Conditionally acquire a usimple_lock.
722 * On success, returns with preemption disabled.
723 * On failure, returns with preemption in the same state
724 * as when first invoked. Note that the hw_lock routines
725 * are responsible for maintaining preemption state.
727 * XXX No stats are gathered on a miss; I preserved this
728 * behavior from the original assembly-language code, but
729 * doesn't it make sense to log misses? XXX
735 LCK_GRP_ARG(lck_grp_t
*grp
))
737 return simple_lock_try((simple_lock_t
) l
, grp
);
741 * The C portion of the shared/exclusive locks package.
745 * compute the deadline to spin against when
746 * waiting for a change of state on a lck_rw_t
748 static inline uint64_t
749 lck_rw_deadline_for_spin(lck_rw_t
*lck
)
753 word
.data
= ordered_load_rw(lck
);
754 if (word
.can_sleep
) {
755 if (word
.r_waiting
|| word
.w_waiting
|| (word
.shared_count
> machine_info
.max_cpus
)) {
757 * there are already threads waiting on this lock... this
758 * implies that they have spun beyond their deadlines waiting for
759 * the desired state to show up so we will not bother spinning at this time...
761 * the current number of threads sharing this lock exceeds our capacity to run them
762 * concurrently and since all states we're going to spin for require the rw_shared_count
763 * to be at 0, we'll not bother spinning since the latency for this to happen is
766 return mach_absolute_time();
768 return mach_absolute_time() + MutexSpin
;
770 return mach_absolute_time() + (100000LL * 1000000000LL);
775 lck_rw_drain_status(lck_rw_t
*lock
, uint32_t status_mask
, boolean_t wait __unused
)
777 uint64_t deadline
= 0;
781 deadline
= lck_rw_deadline_for_spin(lock
);
785 data
= load_exclusive32(&lock
->lck_rw_data
, memory_order_acquire_smp
);
786 if ((data
& status_mask
) == 0) {
792 os_atomic_clear_exclusive();
794 if (!wait
|| (mach_absolute_time() >= deadline
)) {
798 os_atomic_clear_exclusive();
803 * Spin while interlock is held.
806 lck_rw_interlock_spin(lck_rw_t
*lock
)
811 data
= load_exclusive32(&lock
->lck_rw_data
, memory_order_relaxed
);
812 if (data
& LCK_RW_INTERLOCK
) {
815 os_atomic_clear_exclusive();
822 * We disable interrupts while holding the RW interlock to prevent an
823 * interrupt from exacerbating hold time.
824 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
826 static inline boolean_t
827 lck_interlock_lock(lck_rw_t
*lck
)
831 istate
= ml_set_interrupts_enabled(FALSE
);
832 lck_rw_ilk_lock(lck
);
837 lck_interlock_unlock(lck_rw_t
*lck
, boolean_t istate
)
839 lck_rw_ilk_unlock(lck
);
840 ml_set_interrupts_enabled(istate
);
844 #define LCK_RW_GRAB_WANT 0
845 #define LCK_RW_GRAB_SHARED 1
848 lck_rw_grab(lck_rw_t
*lock
, int mode
, boolean_t wait
)
850 uint64_t deadline
= 0;
855 deadline
= lck_rw_deadline_for_spin(lock
);
859 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_acquire_smp
);
860 if (data
& LCK_RW_INTERLOCK
) {
861 atomic_exchange_abort();
862 lck_rw_interlock_spin(lock
);
866 if (mode
== LCK_RW_GRAB_WANT
) {
867 if ((data
& LCK_RW_WANT_EXCL
) == 0) {
868 data
|= LCK_RW_WANT_EXCL
;
871 } else { // LCK_RW_GRAB_SHARED
872 if (((data
& (LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
)) == 0) ||
873 (((data
& LCK_RW_SHARED_MASK
)) && ((data
& LCK_RW_PRIV_EXCL
) == 0))) {
874 data
+= LCK_RW_SHARED_READER
;
879 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
)) {
883 if (wait
) { // Non-waiting
886 atomic_exchange_abort();
888 if (!wait
|| (mach_absolute_time() >= deadline
)) {
897 * Routine: lck_rw_alloc_init
906 lck
= zalloc_flags(ZV_LCK_RW
, Z_WAITOK
| Z_ZERO
);
907 lck_rw_init(lck
, grp
, attr
);
912 * Routine: lck_rw_free
919 lck_rw_destroy(lck
, grp
);
920 zfree(ZV_LCK_RW
, lck
);
924 * Routine: lck_rw_init
932 if (attr
== LCK_ATTR_NULL
) {
933 attr
= &LockDefaultLckAttr
;
935 memset(lck
, 0, sizeof(lck_rw_t
));
936 lck
->lck_rw_can_sleep
= TRUE
;
937 if ((attr
->lck_attr_val
& LCK_ATTR_RW_SHARED_PRIORITY
) == 0) {
938 lck
->lck_rw_priv_excl
= TRUE
;
941 lck_grp_reference(grp
);
942 lck_grp_lckcnt_incr(grp
, LCK_TYPE_RW
);
947 * Routine: lck_rw_destroy
954 if (lck
->lck_rw_tag
== LCK_RW_TAG_DESTROYED
) {
958 lck_rw_assert(lck
, LCK_RW_ASSERT_NOTHELD
);
960 lck
->lck_rw_tag
= LCK_RW_TAG_DESTROYED
;
961 lck_grp_lckcnt_decr(grp
, LCK_TYPE_RW
);
962 lck_grp_deallocate(grp
);
967 * Routine: lck_rw_lock
972 lck_rw_type_t lck_rw_type
)
974 if (lck_rw_type
== LCK_RW_TYPE_SHARED
) {
975 lck_rw_lock_shared(lck
);
976 } else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
) {
977 lck_rw_lock_exclusive(lck
);
979 panic("lck_rw_lock(): Invalid RW lock type: %x", lck_rw_type
);
983 #define LCK_RW_LOCK_EXCLUSIVE_TAS(lck) (atomic_test_and_set32(&(lck)->lck_rw_data, \
984 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK), \
985 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE))
988 * Routine: lck_rw_lock_exclusive_check_contended
991 lck_rw_lock_exclusive_check_contended(lck_rw_t
*lock
)
993 thread_t thread
= current_thread();
994 bool contended
= false;
996 if (lock
->lck_rw_can_sleep
) {
997 thread
->rwlock_count
++;
998 } else if (get_preemption_level() == 0) {
999 panic("Taking non-sleepable RW lock with preemption enabled");
1001 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock
)) {
1003 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE
, lock
, DTRACE_RW_EXCL
);
1004 #endif /* CONFIG_DTRACE */
1007 lck_rw_lock_exclusive_gen(lock
);
1010 thread_t owner
= ordered_load_rw_owner(lock
);
1011 assertf(owner
== THREAD_NULL
, "state=0x%x, owner=%p", ordered_load_rw(lock
), owner
);
1013 ordered_store_rw_owner(lock
, thread
);
1018 * Routine: lck_rw_lock_exclusive
1021 lck_rw_lock_exclusive(lck_rw_t
*lock
)
1023 thread_t thread
= current_thread();
1025 if (lock
->lck_rw_can_sleep
) {
1026 thread
->rwlock_count
++;
1027 } else if (get_preemption_level() == 0) {
1028 panic("Taking non-sleepable RW lock with preemption enabled");
1030 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock
)) {
1032 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE
, lock
, DTRACE_RW_EXCL
);
1033 #endif /* CONFIG_DTRACE */
1035 lck_rw_lock_exclusive_gen(lock
);
1038 thread_t owner
= ordered_load_rw_owner(lock
);
1039 assertf(owner
== THREAD_NULL
, "state=0x%x, owner=%p", ordered_load_rw(lock
), owner
);
1041 ordered_store_rw_owner(lock
, thread
);
1045 * Routine: lck_rw_lock_shared
1048 lck_rw_lock_shared(lck_rw_t
*lock
)
1050 uint32_t data
, prev
;
1052 if (lock
->lck_rw_can_sleep
) {
1053 current_thread()->rwlock_count
++;
1054 } else if (get_preemption_level() == 0) {
1055 panic("Taking non-sleepable RW lock with preemption enabled");
1058 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_acquire_smp
);
1059 if (data
& (LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
| LCK_RW_INTERLOCK
)) {
1060 atomic_exchange_abort();
1061 lck_rw_lock_shared_gen(lock
);
1064 data
+= LCK_RW_SHARED_READER
;
1065 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
)) {
1071 thread_t owner
= ordered_load_rw_owner(lock
);
1072 assertf(owner
== THREAD_NULL
, "state=0x%x, owner=%p", ordered_load_rw(lock
), owner
);
1075 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE
, lock
, DTRACE_RW_SHARED
);
1076 #endif /* CONFIG_DTRACE */
1081 * Routine: lck_rw_lock_shared_to_exclusive
1083 * False returned upon failure, in this case the shared lock is dropped.
1086 lck_rw_lock_shared_to_exclusive(lck_rw_t
*lock
)
1088 uint32_t data
, prev
;
1091 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_acquire_smp
);
1092 if (data
& LCK_RW_INTERLOCK
) {
1093 atomic_exchange_abort();
1094 lck_rw_interlock_spin(lock
);
1097 if (data
& LCK_RW_WANT_UPGRADE
) {
1098 data
-= LCK_RW_SHARED_READER
;
1099 if ((data
& LCK_RW_SHARED_MASK
) == 0) { /* we were the last reader */
1100 data
&= ~(LCK_RW_W_WAITING
); /* so clear the wait indicator */
1102 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
)) {
1103 return lck_rw_lock_shared_to_exclusive_failure(lock
, prev
);
1106 data
|= LCK_RW_WANT_UPGRADE
; /* ask for WANT_UPGRADE */
1107 data
-= LCK_RW_SHARED_READER
; /* and shed our read count */
1108 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
)) {
1114 /* we now own the WANT_UPGRADE */
1115 if (data
& LCK_RW_SHARED_MASK
) { /* check to see if all of the readers are drained */
1116 lck_rw_lock_shared_to_exclusive_success(lock
); /* if not, we need to go wait */
1119 thread_t owner
= ordered_load_rw_owner(lock
);
1120 assertf(owner
== THREAD_NULL
, "state=0x%x, owner=%p", ordered_load_rw(lock
), owner
);
1122 ordered_store_rw_owner(lock
, current_thread());
1124 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
, lock
, 0);
1125 #endif /* CONFIG_DTRACE */
1131 * Routine: lck_rw_lock_shared_to_exclusive_failure
1133 * Fast path code has already dropped our read
1134 * count and determined that someone else owns 'lck_rw_want_upgrade'
1135 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1136 * all we need to do here is determine if a wakeup is needed
1139 lck_rw_lock_shared_to_exclusive_failure(
1141 uint32_t prior_lock_state
)
1143 thread_t thread
= current_thread();
1144 uint32_t rwlock_count
;
1146 /* Check if dropping the lock means that we need to unpromote */
1147 if (lck
->lck_rw_can_sleep
) {
1148 rwlock_count
= thread
->rwlock_count
--;
1150 rwlock_count
= UINT32_MAX
;
1153 if (rwlock_count
== 0) {
1154 panic("rw lock count underflow for thread %p", thread
);
1157 if ((prior_lock_state
& LCK_RW_W_WAITING
) &&
1158 ((prior_lock_state
& LCK_RW_SHARED_MASK
) == LCK_RW_SHARED_READER
)) {
1160 * Someone else has requested upgrade.
1161 * Since we've released the read lock, wake
1162 * him up if he's blocked waiting
1164 thread_wakeup(LCK_RW_WRITER_EVENT(lck
));
1167 if ((rwlock_count
== 1 /* field now 0 */) && (thread
->sched_flags
& TH_SFLAG_RW_PROMOTED
)) {
1168 /* sched_flags checked without lock, but will be rechecked while clearing */
1169 lck_rw_clear_promotion(thread
, unslide_for_kdebug(lck
));
1172 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_CODE
) | DBG_FUNC_NONE
,
1173 VM_KERNEL_UNSLIDE_OR_PERM(lck
), lck
->lck_rw_shared_count
, lck
->lck_rw_want_upgrade
, 0, 0);
1179 * Routine: lck_rw_lock_shared_to_exclusive_success
1181 * assembly fast path code has already dropped our read
1182 * count and successfully acquired 'lck_rw_want_upgrade'
1183 * we just need to wait for the rest of the readers to drain
1184 * and then we can return as the exclusive holder of this lock
1187 lck_rw_lock_shared_to_exclusive_success(
1190 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lock
);
1195 boolean_t not_shared
;
1198 uint64_t wait_interval
= 0;
1199 int readers_at_sleep
= 0;
1200 boolean_t dtrace_ls_initialized
= FALSE
;
1201 boolean_t dtrace_rwl_shared_to_excl_spin
, dtrace_rwl_shared_to_excl_block
, dtrace_ls_enabled
= FALSE
;
1204 while (!lck_rw_drain_status(lock
, LCK_RW_SHARED_MASK
, FALSE
)) {
1205 word
.data
= ordered_load_rw(lock
);
1207 if (dtrace_ls_initialized
== FALSE
) {
1208 dtrace_ls_initialized
= TRUE
;
1209 dtrace_rwl_shared_to_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
] != 0);
1210 dtrace_rwl_shared_to_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
] != 0);
1211 dtrace_ls_enabled
= dtrace_rwl_shared_to_excl_spin
|| dtrace_rwl_shared_to_excl_block
;
1212 if (dtrace_ls_enabled
) {
1214 * Either sleeping or spinning is happening,
1215 * start a timing of our delay interval now.
1217 readers_at_sleep
= word
.shared_count
;
1218 wait_interval
= mach_absolute_time();
1223 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_START
,
1224 trace_lck
, word
.shared_count
, 0, 0, 0);
1226 not_shared
= lck_rw_drain_status(lock
, LCK_RW_SHARED_MASK
, TRUE
);
1228 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_END
,
1229 trace_lck
, lock
->lck_rw_shared_count
, 0, 0, 0);
1236 * if we get here, the spin deadline in lck_rw_wait_on_status()
1237 * has expired w/o the rw_shared_count having drained to 0
1238 * check to see if we're allowed to do a thread_block
1240 if (word
.can_sleep
) {
1241 istate
= lck_interlock_lock(lock
);
1243 word
.data
= ordered_load_rw(lock
);
1244 if (word
.shared_count
!= 0) {
1245 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_START
,
1246 trace_lck
, word
.shared_count
, 0, 0, 0);
1249 ordered_store_rw(lock
, word
.data
);
1251 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade
);
1252 res
= assert_wait(LCK_RW_WRITER_EVENT(lock
),
1253 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1254 lck_interlock_unlock(lock
, istate
);
1256 if (res
== THREAD_WAITING
) {
1257 res
= thread_block(THREAD_CONTINUE_NULL
);
1260 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_END
,
1261 trace_lck
, res
, slept
, 0, 0);
1263 lck_interlock_unlock(lock
, istate
);
1270 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1272 if (dtrace_ls_enabled
== TRUE
) {
1274 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
, lock
, mach_absolute_time() - wait_interval
, 0);
1276 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
, lock
,
1277 mach_absolute_time() - wait_interval
, 1,
1278 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1281 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
, lock
, 1);
1288 * Routine: lck_rw_lock_exclusive_to_shared
1292 lck_rw_lock_exclusive_to_shared(lck_rw_t
*lock
)
1294 uint32_t data
, prev
;
1296 assertf(lock
->lck_rw_owner
== current_thread(), "state=0x%x, owner=%p", lock
->lck_rw_data
, lock
->lck_rw_owner
);
1297 ordered_store_rw_owner(lock
, THREAD_NULL
);
1299 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_release_smp
);
1300 if (data
& LCK_RW_INTERLOCK
) {
1301 atomic_exchange_abort();
1302 lck_rw_interlock_spin(lock
); /* wait for interlock to clear */
1305 data
+= LCK_RW_SHARED_READER
;
1306 if (data
& LCK_RW_WANT_UPGRADE
) {
1307 data
&= ~(LCK_RW_WANT_UPGRADE
);
1309 data
&= ~(LCK_RW_WANT_EXCL
);
1311 if (!((prev
& LCK_RW_W_WAITING
) && (prev
& LCK_RW_PRIV_EXCL
))) {
1312 data
&= ~(LCK_RW_W_WAITING
);
1314 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_release_smp
)) {
1319 return lck_rw_lock_exclusive_to_shared_gen(lock
, prev
);
1323 * Routine: lck_rw_lock_exclusive_to_shared_gen
1325 * Fast path has already dropped
1326 * our exclusive state and bumped lck_rw_shared_count
1327 * all we need to do here is determine if anyone
1328 * needs to be awakened.
1331 lck_rw_lock_exclusive_to_shared_gen(
1333 uint32_t prior_lock_state
)
1335 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lck
);
1336 lck_rw_word_t fake_lck
;
1339 * prior_lock state is a snapshot of the 1st word of the
1340 * lock in question... we'll fake up a pointer to it
1341 * and carefully not access anything beyond whats defined
1342 * in the first word of a lck_rw_t
1344 fake_lck
.data
= prior_lock_state
;
1346 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_START
,
1347 trace_lck
, fake_lck
->want_excl
, fake_lck
->want_upgrade
, 0, 0);
1350 * don't wake up anyone waiting to take the lock exclusively
1351 * since we hold a read count... when the read count drops to 0,
1352 * the writers will be woken.
1354 * wake up any waiting readers if we don't have any writers waiting,
1355 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1357 if (!(fake_lck
.priv_excl
&& fake_lck
.w_waiting
) && fake_lck
.r_waiting
) {
1358 thread_wakeup(LCK_RW_READER_EVENT(lck
));
1361 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_END
,
1362 trace_lck
, lck
->lck_rw_want_excl
, lck
->lck_rw_want_upgrade
, lck
->lck_rw_shared_count
, 0);
1365 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE
, lck
, 0);
1371 * Routine: lck_rw_try_lock
1376 lck_rw_type_t lck_rw_type
)
1378 if (lck_rw_type
== LCK_RW_TYPE_SHARED
) {
1379 return lck_rw_try_lock_shared(lck
);
1380 } else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
) {
1381 return lck_rw_try_lock_exclusive(lck
);
1383 panic("lck_rw_try_lock(): Invalid rw lock type: %x", lck_rw_type
);
1389 * Routine: lck_rw_try_lock_shared
1393 lck_rw_try_lock_shared(lck_rw_t
*lock
)
1395 uint32_t data
, prev
;
1398 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_acquire_smp
);
1399 if (data
& LCK_RW_INTERLOCK
) {
1400 atomic_exchange_abort();
1401 lck_rw_interlock_spin(lock
);
1404 if (data
& (LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
)) {
1405 atomic_exchange_abort();
1406 return FALSE
; /* lock is busy */
1408 data
+= LCK_RW_SHARED_READER
; /* Increment reader refcount */
1409 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
)) {
1415 thread_t owner
= ordered_load_rw_owner(lock
);
1416 assertf(owner
== THREAD_NULL
, "state=0x%x, owner=%p", ordered_load_rw(lock
), owner
);
1419 if (lock
->lck_rw_can_sleep
) {
1420 current_thread()->rwlock_count
++;
1421 } else if (get_preemption_level() == 0) {
1422 panic("Taking non-sleepable RW lock with preemption enabled");
1426 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE
, lock
, DTRACE_RW_SHARED
);
1427 #endif /* CONFIG_DTRACE */
1433 * Routine: lck_rw_try_lock_exclusive
1437 lck_rw_try_lock_exclusive(lck_rw_t
*lock
)
1439 uint32_t data
, prev
;
1443 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_acquire_smp
);
1444 if (data
& LCK_RW_INTERLOCK
) {
1445 atomic_exchange_abort();
1446 lck_rw_interlock_spin(lock
);
1449 if (data
& (LCK_RW_SHARED_MASK
| LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
)) {
1450 atomic_exchange_abort();
1453 data
|= LCK_RW_WANT_EXCL
;
1454 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
)) {
1459 thread
= current_thread();
1460 if (lock
->lck_rw_can_sleep
) {
1461 thread
->rwlock_count
++;
1462 } else if (get_preemption_level() == 0) {
1463 panic("Taking non-sleepable RW lock with preemption enabled");
1466 thread_t owner
= ordered_load_rw_owner(lock
);
1467 assertf(owner
== THREAD_NULL
, "state=0x%x, owner=%p", ordered_load_rw(lock
), owner
);
1469 ordered_store_rw_owner(lock
, thread
);
1471 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE
, lock
, DTRACE_RW_EXCL
);
1472 #endif /* CONFIG_DTRACE */
1478 * Routine: lck_rw_unlock
1483 lck_rw_type_t lck_rw_type
)
1485 if (lck_rw_type
== LCK_RW_TYPE_SHARED
) {
1486 lck_rw_unlock_shared(lck
);
1487 } else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
) {
1488 lck_rw_unlock_exclusive(lck
);
1490 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type
);
1496 * Routine: lck_rw_unlock_shared
1499 lck_rw_unlock_shared(
1504 assertf(lck
->lck_rw_owner
== THREAD_NULL
, "state=0x%x, owner=%p", lck
->lck_rw_data
, lck
->lck_rw_owner
);
1505 assertf(lck
->lck_rw_shared_count
> 0, "shared_count=0x%x", lck
->lck_rw_shared_count
);
1506 ret
= lck_rw_done(lck
);
1508 if (ret
!= LCK_RW_TYPE_SHARED
) {
1509 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck
, ret
);
1515 * Routine: lck_rw_unlock_exclusive
1518 lck_rw_unlock_exclusive(
1523 assertf(lck
->lck_rw_owner
== current_thread(), "state=0x%x, owner=%p", lck
->lck_rw_data
, lck
->lck_rw_owner
);
1524 ret
= lck_rw_done(lck
);
1526 if (ret
!= LCK_RW_TYPE_EXCLUSIVE
) {
1527 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck
, ret
);
1533 * Routine: lck_rw_lock_exclusive_gen
1536 lck_rw_lock_exclusive_gen(
1539 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lock
);
1542 boolean_t gotlock
= 0;
1543 boolean_t not_shared_or_upgrade
= 0;
1544 wait_result_t res
= 0;
1548 boolean_t dtrace_ls_initialized
= FALSE
;
1549 boolean_t dtrace_rwl_excl_spin
, dtrace_rwl_excl_block
, dtrace_ls_enabled
= FALSE
;
1550 uint64_t wait_interval
= 0;
1551 int readers_at_sleep
= 0;
1555 * Try to acquire the lck_rw_want_excl bit.
1557 while (!lck_rw_grab(lock
, LCK_RW_GRAB_WANT
, FALSE
)) {
1559 if (dtrace_ls_initialized
== FALSE
) {
1560 dtrace_ls_initialized
= TRUE
;
1561 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
1562 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
1563 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
1564 if (dtrace_ls_enabled
) {
1566 * Either sleeping or spinning is happening,
1567 * start a timing of our delay interval now.
1569 readers_at_sleep
= lock
->lck_rw_shared_count
;
1570 wait_interval
= mach_absolute_time();
1575 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1577 gotlock
= lck_rw_grab(lock
, LCK_RW_GRAB_WANT
, TRUE
);
1579 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_END
, trace_lck
, 0, 0, gotlock
, 0);
1585 * if we get here, the deadline has expired w/o us
1586 * being able to grab the lock exclusively
1587 * check to see if we're allowed to do a thread_block
1589 word
.data
= ordered_load_rw(lock
);
1590 if (word
.can_sleep
) {
1591 istate
= lck_interlock_lock(lock
);
1592 word
.data
= ordered_load_rw(lock
);
1594 if (word
.want_excl
) {
1595 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1598 ordered_store_rw(lock
, word
.data
);
1600 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite
);
1601 res
= assert_wait(LCK_RW_WRITER_EVENT(lock
),
1602 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1603 lck_interlock_unlock(lock
, istate
);
1605 if (res
== THREAD_WAITING
) {
1606 res
= thread_block(THREAD_CONTINUE_NULL
);
1609 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_END
, trace_lck
, res
, slept
, 0, 0);
1612 ordered_store_rw(lock
, word
.data
);
1613 lck_interlock_unlock(lock
, istate
);
1619 * Wait for readers (and upgrades) to finish...
1621 while (!lck_rw_drain_status(lock
, LCK_RW_SHARED_MASK
| LCK_RW_WANT_UPGRADE
, FALSE
)) {
1624 * Either sleeping or spinning is happening, start
1625 * a timing of our delay interval now. If we set it
1626 * to -1 we don't have accurate data so we cannot later
1627 * decide to record a dtrace spin or sleep event.
1629 if (dtrace_ls_initialized
== FALSE
) {
1630 dtrace_ls_initialized
= TRUE
;
1631 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
1632 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
1633 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
1634 if (dtrace_ls_enabled
) {
1636 * Either sleeping or spinning is happening,
1637 * start a timing of our delay interval now.
1639 readers_at_sleep
= lock
->lck_rw_shared_count
;
1640 wait_interval
= mach_absolute_time();
1645 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1647 not_shared_or_upgrade
= lck_rw_drain_status(lock
, LCK_RW_SHARED_MASK
| LCK_RW_WANT_UPGRADE
, TRUE
);
1649 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_END
, trace_lck
, 0, 0, not_shared_or_upgrade
, 0);
1651 if (not_shared_or_upgrade
) {
1655 * if we get here, the deadline has expired w/o us
1656 * being able to grab the lock exclusively
1657 * check to see if we're allowed to do a thread_block
1659 word
.data
= ordered_load_rw(lock
);
1660 if (word
.can_sleep
) {
1661 istate
= lck_interlock_lock(lock
);
1662 word
.data
= ordered_load_rw(lock
);
1664 if (word
.shared_count
!= 0 || word
.want_upgrade
) {
1665 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1668 ordered_store_rw(lock
, word
.data
);
1670 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite
);
1671 res
= assert_wait(LCK_RW_WRITER_EVENT(lock
),
1672 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1673 lck_interlock_unlock(lock
, istate
);
1675 if (res
== THREAD_WAITING
) {
1676 res
= thread_block(THREAD_CONTINUE_NULL
);
1679 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_END
, trace_lck
, res
, slept
, 0, 0);
1681 lck_interlock_unlock(lock
, istate
);
1683 * must own the lock now, since we checked for
1684 * readers or upgrade owner behind the interlock
1685 * no need for a call to 'lck_rw_drain_status'
1694 * Decide what latencies we suffered that are Dtrace events.
1695 * If we have set wait_interval, then we either spun or slept.
1696 * At least we get out from under the interlock before we record
1697 * which is the best we can do here to minimize the impact
1699 * If we have set wait_interval to -1, then dtrace was not enabled when we
1700 * started sleeping/spinning so we don't record this event.
1702 if (dtrace_ls_enabled
== TRUE
) {
1704 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN
, lock
,
1705 mach_absolute_time() - wait_interval
, 1);
1708 * For the blocking case, we also record if when we blocked
1709 * it was held for read or write, and how many readers.
1710 * Notice that above we recorded this before we dropped
1711 * the interlock so the count is accurate.
1713 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK
, lock
,
1714 mach_absolute_time() - wait_interval
, 1,
1715 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1718 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE
, lock
, 1);
1719 #endif /* CONFIG_DTRACE */
1723 * Routine: lck_rw_done
1727 lck_rw_done(lck_rw_t
*lock
)
1729 uint32_t data
, prev
;
1730 boolean_t once
= FALSE
;
1733 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_release_smp
);
1734 if (data
& LCK_RW_INTERLOCK
) { /* wait for interlock to clear */
1735 atomic_exchange_abort();
1736 lck_rw_interlock_spin(lock
);
1739 if (data
& LCK_RW_SHARED_MASK
) { /* lock is held shared */
1740 assertf(lock
->lck_rw_owner
== THREAD_NULL
, "state=0x%x, owner=%p", lock
->lck_rw_data
, lock
->lck_rw_owner
);
1741 data
-= LCK_RW_SHARED_READER
;
1742 if ((data
& LCK_RW_SHARED_MASK
) == 0) { /* if reader count has now gone to 0, check for waiters */
1745 } else { /* if reader count == 0, must be exclusive lock */
1746 if (data
& LCK_RW_WANT_UPGRADE
) {
1747 data
&= ~(LCK_RW_WANT_UPGRADE
);
1749 if (data
& LCK_RW_WANT_EXCL
) {
1750 data
&= ~(LCK_RW_WANT_EXCL
);
1751 } else { /* lock is not 'owned', panic */
1752 panic("Releasing non-exclusive RW lock without a reader refcount!");
1756 // Only check for holder and clear it once
1757 assertf(lock
->lck_rw_owner
== current_thread(), "state=0x%x, owner=%p", lock
->lck_rw_data
, lock
->lck_rw_owner
);
1758 ordered_store_rw_owner(lock
, THREAD_NULL
);
1763 * test the original values to match what
1764 * lck_rw_done_gen is going to do to determine
1765 * which wakeups need to happen...
1767 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
1769 if (prev
& LCK_RW_W_WAITING
) {
1770 data
&= ~(LCK_RW_W_WAITING
);
1771 if ((prev
& LCK_RW_PRIV_EXCL
) == 0) {
1772 data
&= ~(LCK_RW_R_WAITING
);
1775 data
&= ~(LCK_RW_R_WAITING
);
1778 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_release_smp
)) {
1783 return lck_rw_done_gen(lock
, prev
);
1787 * Routine: lck_rw_done_gen
1789 * called from the assembly language wrapper...
1790 * prior_lock_state is the value in the 1st
1791 * word of the lock at the time of a successful
1792 * atomic compare and exchange with the new value...
1793 * it represents the state of the lock before we
1794 * decremented the rw_shared_count or cleared either
1795 * rw_want_upgrade or rw_want_write and
1796 * the lck_x_waiting bits... since the wrapper
1797 * routine has already changed the state atomically,
1798 * we just need to decide if we should
1799 * wake up anyone and what value to return... we do
1800 * this by examining the state of the lock before
1803 static lck_rw_type_t
1806 uint32_t prior_lock_state
)
1808 lck_rw_word_t fake_lck
;
1809 lck_rw_type_t lock_type
;
1811 uint32_t rwlock_count
;
1814 * prior_lock state is a snapshot of the 1st word of the
1815 * lock in question... we'll fake up a pointer to it
1816 * and carefully not access anything beyond whats defined
1817 * in the first word of a lck_rw_t
1819 fake_lck
.data
= prior_lock_state
;
1821 if (fake_lck
.shared_count
<= 1) {
1822 if (fake_lck
.w_waiting
) {
1823 thread_wakeup(LCK_RW_WRITER_EVENT(lck
));
1826 if (!(fake_lck
.priv_excl
&& fake_lck
.w_waiting
) && fake_lck
.r_waiting
) {
1827 thread_wakeup(LCK_RW_READER_EVENT(lck
));
1830 if (fake_lck
.shared_count
) {
1831 lock_type
= LCK_RW_TYPE_SHARED
;
1833 lock_type
= LCK_RW_TYPE_EXCLUSIVE
;
1836 /* Check if dropping the lock means that we need to unpromote */
1837 thread
= current_thread();
1838 if (fake_lck
.can_sleep
) {
1839 rwlock_count
= thread
->rwlock_count
--;
1841 rwlock_count
= UINT32_MAX
;
1844 if (rwlock_count
== 0) {
1845 panic("rw lock count underflow for thread %p", thread
);
1848 if ((rwlock_count
== 1 /* field now 0 */) && (thread
->sched_flags
& TH_SFLAG_RW_PROMOTED
)) {
1849 /* sched_flags checked without lock, but will be rechecked while clearing */
1850 lck_rw_clear_promotion(thread
, unslide_for_kdebug(lck
));
1853 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE
, lck
, lock_type
== LCK_RW_TYPE_SHARED
? 0 : 1);
1859 * Routine: lck_rw_lock_shared_gen
1861 * Fast path code has determined that this lock
1862 * is held exclusively... this is where we spin/block
1863 * until we can acquire the lock in the shared mode
1866 lck_rw_lock_shared_gen(
1869 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lck
);
1871 boolean_t gotlock
= 0;
1873 wait_result_t res
= 0;
1877 uint64_t wait_interval
= 0;
1878 int readers_at_sleep
= 0;
1879 boolean_t dtrace_ls_initialized
= FALSE
;
1880 boolean_t dtrace_rwl_shared_spin
, dtrace_rwl_shared_block
, dtrace_ls_enabled
= FALSE
;
1881 #endif /* CONFIG_DTRACE */
1883 while (!lck_rw_grab(lck
, LCK_RW_GRAB_SHARED
, FALSE
)) {
1885 if (dtrace_ls_initialized
== FALSE
) {
1886 dtrace_ls_initialized
= TRUE
;
1887 dtrace_rwl_shared_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_SPIN
] != 0);
1888 dtrace_rwl_shared_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_BLOCK
] != 0);
1889 dtrace_ls_enabled
= dtrace_rwl_shared_spin
|| dtrace_rwl_shared_block
;
1890 if (dtrace_ls_enabled
) {
1892 * Either sleeping or spinning is happening,
1893 * start a timing of our delay interval now.
1895 readers_at_sleep
= lck
->lck_rw_shared_count
;
1896 wait_interval
= mach_absolute_time();
1901 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_START
,
1902 trace_lck
, lck
->lck_rw_want_excl
, lck
->lck_rw_want_upgrade
, 0, 0);
1904 gotlock
= lck_rw_grab(lck
, LCK_RW_GRAB_SHARED
, TRUE
);
1906 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_END
,
1907 trace_lck
, lck
->lck_rw_want_excl
, lck
->lck_rw_want_upgrade
, gotlock
, 0);
1913 * if we get here, the deadline has expired w/o us
1914 * being able to grab the lock for read
1915 * check to see if we're allowed to do a thread_block
1917 if (lck
->lck_rw_can_sleep
) {
1918 istate
= lck_interlock_lock(lck
);
1920 word
.data
= ordered_load_rw(lck
);
1921 if ((word
.want_excl
|| word
.want_upgrade
) &&
1922 ((word
.shared_count
== 0) || word
.priv_excl
)) {
1923 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_START
,
1924 trace_lck
, word
.want_excl
, word
.want_upgrade
, 0, 0);
1927 ordered_store_rw(lck
, word
.data
);
1929 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead
);
1930 res
= assert_wait(LCK_RW_READER_EVENT(lck
),
1931 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1932 lck_interlock_unlock(lck
, istate
);
1934 if (res
== THREAD_WAITING
) {
1935 res
= thread_block(THREAD_CONTINUE_NULL
);
1938 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_END
,
1939 trace_lck
, res
, slept
, 0, 0);
1941 word
.shared_count
++;
1942 ordered_store_rw(lck
, word
.data
);
1943 lck_interlock_unlock(lck
, istate
);
1950 if (dtrace_ls_enabled
== TRUE
) {
1952 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN
, lck
, mach_absolute_time() - wait_interval
, 0);
1954 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK
, lck
,
1955 mach_absolute_time() - wait_interval
, 0,
1956 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1959 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE
, lck
, 0);
1960 #endif /* CONFIG_DTRACE */
1964 * Required to verify thread ownership for exclusive locks by virtue of PPL
1973 case LCK_RW_ASSERT_SHARED
:
1974 if ((lck
->lck_rw_shared_count
!= 0) &&
1975 (lck
->lck_rw_owner
== THREAD_NULL
)) {
1979 case LCK_RW_ASSERT_EXCLUSIVE
:
1980 if ((lck
->lck_rw_want_excl
|| lck
->lck_rw_want_upgrade
) &&
1981 (lck
->lck_rw_shared_count
== 0) &&
1982 (lck
->lck_rw_owner
== current_thread())) {
1986 case LCK_RW_ASSERT_HELD
:
1987 if (lck
->lck_rw_shared_count
!= 0) {
1988 return; // Held shared
1990 if ((lck
->lck_rw_want_excl
|| lck
->lck_rw_want_upgrade
) &&
1991 (lck
->lck_rw_owner
== current_thread())) {
1992 return; // Held exclusive
1995 case LCK_RW_ASSERT_NOTHELD
:
1996 if ((lck
->lck_rw_shared_count
== 0) &&
1997 !(lck
->lck_rw_want_excl
|| lck
->lck_rw_want_upgrade
) &&
1998 (lck
->lck_rw_owner
== THREAD_NULL
)) {
2005 panic("rw lock (%p)%s held (mode=%u)", lck
, (type
== LCK_RW_ASSERT_NOTHELD
? "" : " not"), type
);
2010 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
2011 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2014 kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t
*lck
)
2017 panic("panic: rw lock exclusive check done outside of kernel debugger");
2019 return ((lck
->lck_rw_want_upgrade
|| lck
->lck_rw_want_excl
) && (lck
->lck_rw_shared_count
== 0)) ? TRUE
: FALSE
;
2023 * The C portion of the mutex package. These routines are only invoked
2024 * if the optimized assembler routines can't do the work.
2028 * Forward declaration
2033 lck_mtx_ext_t
* lck
,
2038 * Routine: lck_mtx_alloc_init
2047 lck
= zalloc(ZV_LCK_MTX
);
2048 lck_mtx_init(lck
, grp
, attr
);
2053 * Routine: lck_mtx_free
2060 lck_mtx_destroy(lck
, grp
);
2061 zfree(ZV_LCK_MTX
, lck
);
2065 * Routine: lck_mtx_init
2074 lck_mtx_ext_t
*lck_ext
;
2076 lck_attr_t
*lck_attr
;
2078 if (attr
!= LCK_ATTR_NULL
) {
2081 lck_attr
= &LockDefaultLckAttr
;
2085 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2086 lck_ext
= zalloc(ZV_LCK_MTX_EXT
);
2087 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
2088 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
2089 lck
->lck_mtx_ptr
= lck_ext
;
2090 lck
->lck_mtx_type
= LCK_MTX_TYPE
;
2094 lck
->lck_mtx_ptr
= NULL
; // Clear any padding in the union fields below
2095 lck
->lck_mtx_waiters
= 0;
2096 lck
->lck_mtx_type
= LCK_MTX_TYPE
;
2097 ordered_store_mtx(lck
, 0);
2099 lck_grp_reference(grp
);
2100 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
2104 * Routine: lck_mtx_init_ext
2109 lck_mtx_ext_t
* lck_ext
,
2113 lck_attr_t
*lck_attr
;
2115 if (attr
!= LCK_ATTR_NULL
) {
2118 lck_attr
= &LockDefaultLckAttr
;
2121 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2122 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
2123 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
2124 lck
->lck_mtx_ptr
= lck_ext
;
2125 lck
->lck_mtx_type
= LCK_MTX_TYPE
;
2127 lck
->lck_mtx_waiters
= 0;
2128 lck
->lck_mtx_type
= LCK_MTX_TYPE
;
2129 ordered_store_mtx(lck
, 0);
2131 lck_grp_reference(grp
);
2132 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
2136 * Routine: lck_mtx_ext_init
2140 lck_mtx_ext_t
* lck
,
2144 bzero((void *) lck
, sizeof(lck_mtx_ext_t
));
2146 lck
->lck_mtx
.lck_mtx_type
= LCK_MTX_TYPE
;
2148 if ((attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2149 lck
->lck_mtx_deb
.type
= MUTEX_TAG
;
2150 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_DEBUG
;
2152 lck
->lck_mtx_grp
= grp
;
2154 if (grp
->lck_grp_attr
& LCK_GRP_ATTR_STAT
) {
2155 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_STAT
;
2159 /* The slow versions */
2160 static void lck_mtx_lock_contended(lck_mtx_t
*lock
, thread_t thread
, boolean_t interlocked
);
2161 static boolean_t
lck_mtx_try_lock_contended(lck_mtx_t
*lock
, thread_t thread
);
2162 static void lck_mtx_unlock_contended(lck_mtx_t
*lock
, thread_t thread
, boolean_t interlocked
);
2164 /* The adaptive spin function */
2165 static spinwait_result_t
lck_mtx_lock_contended_spinwait_arm(lck_mtx_t
*lock
, thread_t thread
, boolean_t interlocked
);
2168 * Routine: lck_mtx_verify
2170 * Verify if a mutex is valid
2173 lck_mtx_verify(lck_mtx_t
*lock
)
2175 if (lock
->lck_mtx_type
!= LCK_MTX_TYPE
) {
2176 panic("Invalid mutex %p", lock
);
2178 #if DEVELOPMENT || DEBUG
2179 if (lock
->lck_mtx_tag
== LCK_MTX_TAG_DESTROYED
) {
2180 panic("Mutex destroyed %p", lock
);
2182 #endif /* DEVELOPMENT || DEBUG */
2186 * Routine: lck_mtx_check_preemption
2188 * Verify preemption is enabled when attempting to acquire a mutex.
2192 lck_mtx_check_preemption(lck_mtx_t
*lock
)
2194 #if DEVELOPMENT || DEBUG
2195 if (current_cpu_datap()->cpu_hibernate
) {
2199 int pl
= get_preemption_level();
2202 panic("Attempt to take mutex with preemption disabled. Lock=%p, level=%d", lock
, pl
);
2210 * Routine: lck_mtx_lock
2213 lck_mtx_lock(lck_mtx_t
*lock
)
2217 lck_mtx_verify(lock
);
2218 lck_mtx_check_preemption(lock
);
2219 thread
= current_thread();
2220 if (os_atomic_cmpxchg(&lock
->lck_mtx_data
,
2221 0, LCK_MTX_THREAD_TO_STATE(thread
), acquire
)) {
2223 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE
, lock
, 0);
2224 #endif /* CONFIG_DTRACE */
2227 lck_mtx_lock_contended(lock
, thread
, FALSE
);
2231 * This is the slow version of mutex locking.
2233 static void NOINLINE
2234 lck_mtx_lock_contended(lck_mtx_t
*lock
, thread_t thread
, boolean_t interlocked
)
2236 thread_t holding_thread
;
2239 spinwait_result_t sw_res
;
2240 struct turnstile
*ts
= NULL
;
2242 /* Loop waiting until I see that the mutex is unowned */
2244 sw_res
= lck_mtx_lock_contended_spinwait_arm(lock
, thread
, interlocked
);
2245 interlocked
= FALSE
;
2248 case SPINWAIT_ACQUIRED
:
2250 interlock_lock(lock
);
2251 turnstile_complete((uintptr_t)lock
, NULL
, NULL
, TURNSTILE_KERNEL_MUTEX
);
2252 interlock_unlock(lock
);
2255 case SPINWAIT_INTERLOCK
:
2261 state
= ordered_load_mtx(lock
);
2262 holding_thread
= LCK_MTX_STATE_TO_THREAD(state
);
2263 if (holding_thread
== NULL
) {
2266 ordered_store_mtx(lock
, (state
| LCK_ILOCK
| ARM_LCK_WAITERS
)); // Set waiters bit and wait
2267 lck_mtx_lock_wait(lock
, holding_thread
, &ts
);
2268 /* returns interlock unlocked */
2272 /* Hooray, I'm the new owner! */
2273 state
= ordered_load_mtx(lock
);
2275 if (state
& ARM_LCK_WAITERS
) {
2276 /* Skip lck_mtx_lock_acquire if there are no waiters. */
2277 waiters
= lck_mtx_lock_acquire(lock
, ts
);
2279 * lck_mtx_lock_acquire will call
2280 * turnstile_complete
2284 turnstile_complete((uintptr_t)lock
, NULL
, NULL
, TURNSTILE_KERNEL_MUTEX
);
2288 state
= LCK_MTX_THREAD_TO_STATE(thread
);
2290 state
|= ARM_LCK_WAITERS
;
2292 state
|= LCK_ILOCK
; // Preserve interlock
2293 ordered_store_mtx(lock
, state
); // Set ownership
2294 interlock_unlock(lock
); // Release interlock, enable preemption
2297 load_memory_barrier();
2299 assert(thread
->turnstile
!= NULL
);
2302 turnstile_cleanup();
2306 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE
, lock
, 0);
2307 #endif /* CONFIG_DTRACE */
2311 * Routine: lck_mtx_lock_spinwait_arm
2313 * Invoked trying to acquire a mutex when there is contention but
2314 * the holder is running on another processor. We spin for up to a maximum
2315 * time waiting for the lock to be released.
2317 static spinwait_result_t
2318 lck_mtx_lock_contended_spinwait_arm(lck_mtx_t
*lock
, thread_t thread
, boolean_t interlocked
)
2320 int has_interlock
= (int)interlocked
;
2321 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lock
);
2322 thread_t owner
, prev_owner
;
2323 uint64_t window_deadline
, sliding_deadline
, high_deadline
;
2324 uint64_t start_time
, cur_time
, avg_hold_time
, bias
, delta
;
2326 uint i
, prev_owner_cpu
;
2327 int total_hold_time_samples
, window_hold_time_samples
, unfairness
;
2328 bool owner_on_core
, adjust
;
2329 uintptr_t state
, new_state
, waiters
;
2330 spinwait_result_t retval
= SPINWAIT_DID_SPIN_HIGH_THR
;
2332 if (__improbable(!(lck_mtx_adaptive_spin_mode
& ADAPTIVE_SPIN_ENABLE
))) {
2333 if (!has_interlock
) {
2334 interlock_lock(lock
);
2337 return SPINWAIT_DID_NOT_SPIN
;
2340 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_SPIN_CODE
) | DBG_FUNC_START
,
2341 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state
)), lock
->lck_mtx_waiters
, 0, 0);
2343 start_time
= mach_absolute_time();
2345 * window_deadline represents the "learning" phase.
2346 * The thread collects statistics about the lock during
2347 * window_deadline and then it makes a decision on whether to spin more
2348 * or block according to the concurrency behavior
2351 * Every thread can spin at least low_MutexSpin.
2353 window_deadline
= start_time
+ low_MutexSpin
;
2355 * Sliding_deadline is the adjusted spin deadline
2356 * computed after the "learning" phase.
2358 sliding_deadline
= window_deadline
;
2360 * High_deadline is a hard deadline. No thread
2361 * can spin more than this deadline.
2363 if (high_MutexSpin
>= 0) {
2364 high_deadline
= start_time
+ high_MutexSpin
;
2366 high_deadline
= start_time
+ low_MutexSpin
* real_ncpus
;
2370 * Do not know yet which is the owner cpu.
2371 * Initialize prev_owner_cpu with next cpu.
2373 prev_owner_cpu
= (cpu_number() + 1) % real_ncpus
;
2374 total_hold_time_samples
= 0;
2375 window_hold_time_samples
= 0;
2378 bias
= (os_hash_kernel_pointer(lock
) + cpu_number()) % real_ncpus
;
2380 /* Snoop the lock state */
2381 state
= ordered_load_mtx(lock
);
2382 owner
= LCK_MTX_STATE_TO_THREAD(state
);
2385 if (has_interlock
) {
2386 if (owner
== NULL
) {
2387 retval
= SPINWAIT_INTERLOCK
;
2391 * We are holding the interlock, so
2392 * we can safely dereference owner.
2394 if (!machine_thread_on_core(owner
) || (owner
->state
& TH_IDLE
)) {
2395 retval
= SPINWAIT_DID_NOT_SPIN
;
2399 interlock_unlock(lock
);
2405 * - mutex is locked, and
2406 * - it's locked as a spin lock, and
2407 * - owner is running on another processor, and
2408 * - we haven't spun for long enough.
2412 * Try to acquire the lock.
2414 owner
= LCK_MTX_STATE_TO_THREAD(state
);
2415 if (owner
== NULL
) {
2416 waiters
= state
& ARM_LCK_WAITERS
;
2419 * preserve the waiter bit
2420 * and try acquire the interlock.
2421 * Note: we will successfully acquire
2422 * the interlock only if we can also
2425 new_state
= ARM_LCK_WAITERS
| LCK_ILOCK
;
2427 retval
= SPINWAIT_INTERLOCK
;
2428 disable_preemption();
2430 new_state
= LCK_MTX_THREAD_TO_STATE(thread
);
2431 retval
= SPINWAIT_ACQUIRED
;
2435 * The cmpxchg will succed only if the lock
2436 * is not owned (doesn't have an owner set)
2437 * and it is not interlocked.
2438 * It will not fail if there are waiters.
2440 if (os_atomic_cmpxchgv(&lock
->lck_mtx_data
,
2441 waiters
, new_state
, &state
, acquire
)) {
2446 enable_preemption();
2451 cur_time
= mach_absolute_time();
2454 * Never spin past high_deadline.
2456 if (cur_time
>= high_deadline
) {
2457 retval
= SPINWAIT_DID_SPIN_HIGH_THR
;
2462 * Check if owner is on core. If not block.
2464 owner
= LCK_MTX_STATE_TO_THREAD(state
);
2467 owner_on_core
= FALSE
;
2469 disable_preemption();
2470 state
= ordered_load_mtx(lock
);
2471 owner
= LCK_MTX_STATE_TO_THREAD(state
);
2474 * For scalability we want to check if the owner is on core
2475 * without locking the mutex interlock.
2476 * If we do not lock the mutex interlock, the owner that we see might be
2477 * invalid, so we cannot dereference it. Therefore we cannot check
2478 * any field of the thread to tell us if it is on core.
2479 * Check if the thread that is running on the other cpus matches the owner.
2483 cpu_data_t
*cpu_data_ptr
= CpuDataEntries
[i
].cpu_data_vaddr
;
2484 if ((cpu_data_ptr
!= NULL
) && (cpu_data_ptr
->cpu_active_thread
== owner
)) {
2485 owner_on_core
= TRUE
;
2488 if (++i
>= real_ncpus
) {
2491 } while (i
!= prev_owner_cpu
);
2492 enable_preemption();
2494 if (owner_on_core
) {
2498 state
= ordered_load_mtx(lock
);
2499 owner
= LCK_MTX_STATE_TO_THREAD(state
);
2500 if (owner
== prev_owner
) {
2502 * Owner is not on core.
2505 if (loopcount
== 0) {
2506 retval
= SPINWAIT_DID_NOT_SPIN
;
2508 retval
= SPINWAIT_DID_SPIN_OWNER_NOT_CORE
;
2513 * Fall through if the owner changed while we were scanning.
2514 * The new owner could potentially be on core, so loop
2519 enable_preemption();
2524 * Save how many times we see the owner changing.
2525 * We can roughly estimate the the mutex hold
2526 * time and the fairness with that.
2528 if (owner
!= prev_owner
) {
2530 total_hold_time_samples
++;
2531 window_hold_time_samples
++;
2535 * Learning window expired.
2536 * Try to adjust the sliding_deadline.
2538 if (cur_time
>= window_deadline
) {
2540 * If there was not contention during the window
2543 if (window_hold_time_samples
< 1) {
2544 retval
= SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION
;
2550 * For a fair lock, we'd wait for at most (NCPU-1) periods,
2551 * but the lock is unfair, so let's try to estimate by how much.
2553 unfairness
= total_hold_time_samples
/ real_ncpus
;
2555 if (unfairness
== 0) {
2557 * We observed the owner changing `total_hold_time_samples` times which
2558 * let us estimate the average hold time of this mutex for the duration
2560 * avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
2562 * In this case spin at max avg_hold_time * (real_ncpus - 1)
2564 delta
= cur_time
- start_time
;
2565 sliding_deadline
= start_time
+ (delta
* (real_ncpus
- 1)) / total_hold_time_samples
;
2568 * In this case at least one of the other cpus was able to get the lock twice
2569 * while I was spinning.
2570 * We could spin longer but it won't necessarily help if the system is unfair.
2571 * Try to randomize the wait to reduce contention.
2573 * We compute how much time we could potentially spin
2574 * and distribute it over the cpus.
2576 * bias is an integer between 0 and real_ncpus.
2577 * distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
2579 delta
= high_deadline
- cur_time
;
2580 sliding_deadline
= cur_time
+ ((delta
* bias
) / real_ncpus
);
2585 window_deadline
+= low_MutexSpin
;
2586 window_hold_time_samples
= 0;
2590 * Stop spinning if we past
2591 * the adjusted deadline.
2593 if (cur_time
>= sliding_deadline
) {
2594 retval
= SPINWAIT_DID_SPIN_SLIDING_THR
;
2599 * We want to arm the monitor for wfe,
2600 * so load exclusively the lock.
2603 * we rely on the fact that wfe will
2604 * eventually return even if the cache line
2605 * is not modified. This way we will keep
2606 * looping and checking if the deadlines expired.
2608 state
= os_atomic_load_exclusive(&lock
->lck_mtx_data
, relaxed
);
2609 owner
= LCK_MTX_STATE_TO_THREAD(state
);
2610 if (owner
!= NULL
) {
2612 state
= ordered_load_mtx(lock
);
2614 atomic_exchange_abort();
2623 * Note that we record a different probe id depending on whether
2624 * this is a direct or indirect mutex. This allows us to
2625 * penalize only lock groups that have debug/stats enabled
2626 * with dtrace processing if desired.
2628 if (__probable(lock
->lck_mtx_tag
!= LCK_MTX_TAG_INDIRECT
)) {
2629 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN
, lock
,
2630 mach_absolute_time() - start_time
);
2632 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN
, lock
,
2633 mach_absolute_time() - start_time
);
2635 /* The lockstat acquire event is recorded by the caller. */
2638 state
= ordered_load_mtx(lock
);
2640 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_SPIN_CODE
) | DBG_FUNC_END
,
2641 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state
)), lock
->lck_mtx_waiters
, retval
, 0);
2642 if ((!has_interlock
) && (retval
!= SPINWAIT_ACQUIRED
)) {
2643 /* We must own either the lock or the interlock on return. */
2644 interlock_lock(lock
);
2652 * Common code for mutex locking as spinlock
2655 lck_mtx_lock_spin_internal(lck_mtx_t
*lock
, boolean_t allow_held_as_mutex
)
2659 interlock_lock(lock
);
2660 state
= ordered_load_mtx(lock
);
2661 if (LCK_MTX_STATE_TO_THREAD(state
)) {
2662 if (allow_held_as_mutex
) {
2663 lck_mtx_lock_contended(lock
, current_thread(), TRUE
);
2665 // "Always" variants can never block. If the lock is held and blocking is not allowed
2666 // then someone is mixing always and non-always calls on the same lock, which is
2668 panic("Attempting to block on a lock taken as spin-always %p", lock
);
2672 state
&= ARM_LCK_WAITERS
; // Preserve waiters bit
2673 state
|= (LCK_MTX_SPIN_TAG
| LCK_ILOCK
); // Add spin tag and maintain interlock
2674 ordered_store_mtx(lock
, state
);
2675 load_memory_barrier();
2678 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE
, lock
, 0);
2679 #endif /* CONFIG_DTRACE */
2683 * Routine: lck_mtx_lock_spin
2686 lck_mtx_lock_spin(lck_mtx_t
*lock
)
2688 lck_mtx_check_preemption(lock
);
2689 lck_mtx_lock_spin_internal(lock
, TRUE
);
2693 * Routine: lck_mtx_lock_spin_always
2696 lck_mtx_lock_spin_always(lck_mtx_t
*lock
)
2698 lck_mtx_lock_spin_internal(lock
, FALSE
);
2702 * Routine: lck_mtx_try_lock
2705 lck_mtx_try_lock(lck_mtx_t
*lock
)
2707 thread_t thread
= current_thread();
2709 lck_mtx_verify(lock
);
2710 if (os_atomic_cmpxchg(&lock
->lck_mtx_data
,
2711 0, LCK_MTX_THREAD_TO_STATE(thread
), acquire
)) {
2713 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE
, lock
, 0);
2714 #endif /* CONFIG_DTRACE */
2717 return lck_mtx_try_lock_contended(lock
, thread
);
2720 static boolean_t NOINLINE
2721 lck_mtx_try_lock_contended(lck_mtx_t
*lock
, thread_t thread
)
2723 thread_t holding_thread
;
2727 interlock_lock(lock
);
2728 state
= ordered_load_mtx(lock
);
2729 holding_thread
= LCK_MTX_STATE_TO_THREAD(state
);
2730 if (holding_thread
) {
2731 interlock_unlock(lock
);
2734 waiters
= lck_mtx_lock_acquire(lock
, NULL
);
2735 state
= LCK_MTX_THREAD_TO_STATE(thread
);
2737 state
|= ARM_LCK_WAITERS
;
2739 state
|= LCK_ILOCK
; // Preserve interlock
2740 ordered_store_mtx(lock
, state
); // Set ownership
2741 interlock_unlock(lock
); // Release interlock, enable preemption
2742 load_memory_barrier();
2744 turnstile_cleanup();
2749 static inline boolean_t
2750 lck_mtx_try_lock_spin_internal(lck_mtx_t
*lock
, boolean_t allow_held_as_mutex
)
2754 if (!interlock_try(lock
)) {
2757 state
= ordered_load_mtx(lock
);
2758 if (LCK_MTX_STATE_TO_THREAD(state
)) {
2759 // Lock is held as mutex
2760 if (allow_held_as_mutex
) {
2761 interlock_unlock(lock
);
2763 // "Always" variants can never block. If the lock is held as a normal mutex
2764 // then someone is mixing always and non-always calls on the same lock, which is
2766 panic("Spin-mutex held as full mutex %p", lock
);
2770 state
&= ARM_LCK_WAITERS
; // Preserve waiters bit
2771 state
|= (LCK_MTX_SPIN_TAG
| LCK_ILOCK
); // Add spin tag and maintain interlock
2772 ordered_store_mtx(lock
, state
);
2773 load_memory_barrier();
2776 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE
, lock
, 0);
2777 #endif /* CONFIG_DTRACE */
2782 * Routine: lck_mtx_try_lock_spin
2785 lck_mtx_try_lock_spin(lck_mtx_t
*lock
)
2787 return lck_mtx_try_lock_spin_internal(lock
, TRUE
);
2791 * Routine: lck_mtx_try_lock_spin_always
2794 lck_mtx_try_lock_spin_always(lck_mtx_t
*lock
)
2796 return lck_mtx_try_lock_spin_internal(lock
, FALSE
);
2802 * Routine: lck_mtx_unlock
2805 lck_mtx_unlock(lck_mtx_t
*lock
)
2807 thread_t thread
= current_thread();
2809 boolean_t ilk_held
= FALSE
;
2811 lck_mtx_verify(lock
);
2813 state
= ordered_load_mtx(lock
);
2814 if (state
& LCK_ILOCK
) {
2815 if (LCK_MTX_STATE_TO_THREAD(state
) == (thread_t
)LCK_MTX_SPIN_TAG
) {
2816 ilk_held
= TRUE
; // Interlock is held by (presumably) this thread
2820 // Locked as a mutex
2821 if (os_atomic_cmpxchg(&lock
->lck_mtx_data
,
2822 LCK_MTX_THREAD_TO_STATE(thread
), 0, release
)) {
2824 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE
, lock
, 0);
2825 #endif /* CONFIG_DTRACE */
2829 lck_mtx_unlock_contended(lock
, thread
, ilk_held
);
2832 static void NOINLINE
2833 lck_mtx_unlock_contended(lck_mtx_t
*lock
, thread_t thread
, boolean_t ilk_held
)
2836 boolean_t cleanup
= FALSE
;
2839 state
= ordered_load_mtx(lock
);
2841 interlock_lock(lock
);
2842 state
= ordered_load_mtx(lock
);
2843 if (thread
!= LCK_MTX_STATE_TO_THREAD(state
)) {
2844 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock
);
2846 if (state
& ARM_LCK_WAITERS
) {
2847 if (lck_mtx_unlock_wakeup(lock
, thread
)) {
2848 state
= ARM_LCK_WAITERS
;
2856 state
&= ARM_LCK_WAITERS
; /* Clear state, retain waiters bit */
2859 ordered_store_mtx(lock
, state
);
2860 interlock_unlock(lock
);
2863 * Do not do any turnstile operations outside of this block.
2864 * lock/unlock is called at early stage of boot with single thread,
2865 * when turnstile is not yet initialized.
2866 * Even without contention we can come throught the slow path
2867 * if the mutex is acquired as a spin lock.
2869 turnstile_cleanup();
2873 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE
, lock
, 0);
2874 #endif /* CONFIG_DTRACE */
2878 * Routine: lck_mtx_assert
2881 lck_mtx_assert(lck_mtx_t
*lock
, unsigned int type
)
2883 thread_t thread
, holder
;
2886 state
= ordered_load_mtx(lock
);
2887 holder
= LCK_MTX_STATE_TO_THREAD(state
);
2888 if (holder
== (thread_t
)LCK_MTX_SPIN_TAG
) {
2889 // Lock is held in spin mode, owner is unknown.
2892 thread
= current_thread();
2893 if (type
== LCK_MTX_ASSERT_OWNED
) {
2894 if (thread
!= holder
) {
2895 panic("lck_mtx_assert(): mutex (%p) owned", lock
);
2897 } else if (type
== LCK_MTX_ASSERT_NOTOWNED
) {
2898 if (thread
== holder
) {
2899 panic("lck_mtx_assert(): mutex (%p) not owned", lock
);
2902 panic("lck_mtx_assert(): invalid arg (%u)", type
);
2907 * Routine: lck_mtx_ilk_unlock
2910 lck_mtx_ilk_unlock(lck_mtx_t
*lock
)
2912 interlock_unlock(lock
);
2917 * Routine: lck_mtx_convert_spin
2919 * Convert a mutex held for spin into a held full mutex
2922 lck_mtx_convert_spin(lck_mtx_t
*lock
)
2924 thread_t thread
= current_thread();
2928 state
= ordered_load_mtx(lock
);
2929 if (LCK_MTX_STATE_TO_THREAD(state
) == thread
) {
2930 return; // Already owned as mutex, return
2932 if ((state
& LCK_ILOCK
) == 0 || (LCK_MTX_STATE_TO_THREAD(state
) != (thread_t
)LCK_MTX_SPIN_TAG
)) {
2933 panic("lck_mtx_convert_spin: Not held as spinlock (%p)", lock
);
2935 state
&= ~(LCK_MTX_THREAD_MASK
); // Clear the spin tag
2936 ordered_store_mtx(lock
, state
);
2937 waiters
= lck_mtx_lock_acquire(lock
, NULL
); // Acquire to manage priority boosts
2938 state
= LCK_MTX_THREAD_TO_STATE(thread
);
2940 state
|= ARM_LCK_WAITERS
;
2943 ordered_store_mtx(lock
, state
); // Set ownership
2944 interlock_unlock(lock
); // Release interlock, enable preemption
2945 turnstile_cleanup();
2950 * Routine: lck_mtx_destroy
2957 if (lck
->lck_mtx_type
!= LCK_MTX_TYPE
) {
2958 panic("Destroying invalid mutex %p", lck
);
2960 if (lck
->lck_mtx_tag
== LCK_MTX_TAG_DESTROYED
) {
2961 panic("Destroying previously destroyed lock %p", lck
);
2963 lck_mtx_assert(lck
, LCK_MTX_ASSERT_NOTOWNED
);
2964 lck
->lck_mtx_tag
= LCK_MTX_TAG_DESTROYED
;
2965 lck_grp_lckcnt_decr(grp
, LCK_TYPE_MTX
);
2966 lck_grp_deallocate(grp
);
2971 * Routine: lck_spin_assert
2974 lck_spin_assert(lck_spin_t
*lock
, unsigned int type
)
2976 thread_t thread
, holder
;
2979 if (lock
->type
!= LCK_SPIN_TYPE
) {
2980 panic("Invalid spinlock %p", lock
);
2983 state
= lock
->lck_spin_data
;
2984 holder
= (thread_t
)(state
& ~LCK_ILOCK
);
2985 thread
= current_thread();
2986 if (type
== LCK_ASSERT_OWNED
) {
2988 panic("Lock not owned %p = %lx", lock
, state
);
2990 if (holder
!= thread
) {
2991 panic("Lock not owned by current thread %p = %lx", lock
, state
);
2993 if ((state
& LCK_ILOCK
) == 0) {
2994 panic("Lock bit not set %p = %lx", lock
, state
);
2996 } else if (type
== LCK_ASSERT_NOTOWNED
) {
2998 if (holder
== thread
) {
2999 panic("Lock owned by current thread %p = %lx", lock
, state
);
3003 panic("lck_spin_assert(): invalid arg (%u)", type
);
3008 lck_rw_lock_yield_shared(lck_rw_t
*lck
, boolean_t force_yield
)
3012 lck_rw_assert(lck
, LCK_RW_ASSERT_SHARED
);
3014 word
.data
= ordered_load_rw(lck
);
3015 if (word
.want_excl
|| word
.want_upgrade
|| force_yield
) {
3016 lck_rw_unlock_shared(lck
);
3018 lck_rw_lock_shared(lck
);
3026 * Routine: kdp_lck_mtx_lock_spin_is_acquired
3027 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
3030 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t
*lck
)
3035 panic("panic: spinlock acquired check done outside of kernel debugger");
3037 state
= ordered_load_mtx(lck
);
3038 if (state
== LCK_MTX_TAG_DESTROYED
) {
3041 if (LCK_MTX_STATE_TO_THREAD(state
) || (state
& LCK_ILOCK
)) {
3048 kdp_lck_mtx_find_owner(__unused
struct waitq
* waitq
, event64_t event
, thread_waitinfo_t
* waitinfo
)
3050 lck_mtx_t
* mutex
= LCK_EVENT_TO_MUTEX(event
);
3051 waitinfo
->context
= VM_KERNEL_UNSLIDE_OR_PERM(mutex
);
3052 uintptr_t state
= ordered_load_mtx(mutex
);
3053 thread_t holder
= LCK_MTX_STATE_TO_THREAD(state
);
3054 if ((uintptr_t)holder
== (uintptr_t)LCK_MTX_SPIN_TAG
) {
3055 waitinfo
->owner
= STACKSHOT_WAITOWNER_MTXSPIN
;
3057 assertf(state
!= (uintptr_t)LCK_MTX_TAG_DESTROYED
, "state=0x%llx", (uint64_t)state
);
3058 assertf(state
!= (uintptr_t)LCK_MTX_TAG_INDIRECT
, "state=0x%llx", (uint64_t)state
);
3059 waitinfo
->owner
= thread_tid(holder
);
3064 kdp_rwlck_find_owner(__unused
struct waitq
* waitq
, event64_t event
, thread_waitinfo_t
* waitinfo
)
3066 lck_rw_t
*rwlck
= NULL
;
3067 switch (waitinfo
->wait_type
) {
3068 case kThreadWaitKernelRWLockRead
:
3069 rwlck
= READ_EVENT_TO_RWLOCK(event
);
3071 case kThreadWaitKernelRWLockWrite
:
3072 case kThreadWaitKernelRWLockUpgrade
:
3073 rwlck
= WRITE_EVENT_TO_RWLOCK(event
);
3076 panic("%s was called with an invalid blocking type", __FUNCTION__
);
3079 waitinfo
->context
= VM_KERNEL_UNSLIDE_OR_PERM(rwlck
);
3080 waitinfo
->owner
= thread_tid(rwlck
->lck_rw_owner
);