2 * Copyright (c) 2007-2018 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
33 * Mellon University All Rights Reserved.
35 * Permission to use, copy, modify and distribute this software and its
36 * documentation is hereby granted, provided that both the copyright notice
37 * and this permission notice appear in all copies of the software,
38 * derivative works or modified versions, and any portions thereof, and that
39 * both notices appear in supporting documentation.
41 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
42 * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
43 * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 * Carnegie Mellon requests users of this software to return to
47 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
48 * School of Computer Science Carnegie Mellon University Pittsburgh PA
51 * any improvements or extensions that they make and grant Carnegie Mellon the
52 * rights to redistribute these changes.
56 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * Locking primitives implementation
62 #define LOCK_PRIVATE 1
64 #include <mach_ldebug.h>
66 #include <kern/kalloc.h>
67 #include <kern/lock_stat.h>
68 #include <kern/locks.h>
69 #include <kern/misc_protos.h>
70 #include <kern/thread.h>
71 #include <kern/processor.h>
72 #include <kern/sched_prim.h>
73 #include <kern/debug.h>
74 #include <kern/kcdata.h>
77 #include <arm/cpu_data_internal.h>
78 #include <arm/proc_reg.h>
80 #include <machine/atomic.h>
81 #include <machine/machine_cpu.h>
83 #include <sys/kdebug.h>
86 #define DTRACE_RW_SHARED 0x0 //reader
87 #define DTRACE_RW_EXCL 0x1 //writer
88 #define DTRACE_NO_FLAG 0x0 //not applicable
89 #endif /* CONFIG_DTRACE */
91 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
92 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
93 #define LCK_RW_LCK_SHARED_CODE 0x102
94 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
95 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
96 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
99 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
101 // Panic in tests that check lock usage correctness
102 // These are undesirable when in a panic or a debugger is runnning.
103 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
105 unsigned int LcksOpts
= 0;
107 #define ADAPTIVE_SPIN_ENABLE 0x1
110 int lck_mtx_adaptive_spin_mode
= ADAPTIVE_SPIN_ENABLE
;
112 int lck_mtx_adaptive_spin_mode
= 0;
115 #define SPINWAIT_OWNER_CHECK_COUNT 4
118 SPINWAIT_ACQUIRED
, /* Got the lock. */
119 SPINWAIT_INTERLOCK
, /* Got the interlock, no owner, but caller must finish acquiring the lock. */
120 SPINWAIT_DID_SPIN
, /* Got the interlock, spun, but failed to get the lock. */
121 SPINWAIT_DID_NOT_SPIN
, /* Got the interlock, did not spin. */
124 #if CONFIG_DTRACE && __SMP__
125 extern uint64_t dtrace_spin_threshold
;
130 extern unsigned int not_in_kdp
;
133 * We often want to know the addresses of the callers
134 * of the various lock routines. However, this information
135 * is only used for debugging and statistics.
138 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
139 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
143 * Eliminate lint complaints about unused local pc variables.
145 #define OBTAIN_PC(pc, l) ++pc
147 #define OBTAIN_PC(pc, l)
152 * Portable lock package implementation of usimple_locks.
156 * Owner thread pointer when lock held in spin mode
158 #define LCK_MTX_SPIN_TAG 0xfffffff0
161 #define interlock_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
162 #define interlock_try(lock) hw_lock_bit_try((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
163 #define interlock_unlock(lock) hw_unlock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
164 #define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT, LCK_GRP_NULL)
165 #define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
167 #define load_memory_barrier() os_atomic_thread_fence(acquire)
169 // Enforce program order of loads and stores.
170 #define ordered_load(target) \
171 os_atomic_load(target, compiler_acq_rel)
172 #define ordered_store(target, value) \
173 os_atomic_store(target, value, compiler_acq_rel)
175 #define ordered_load_mtx(lock) ordered_load(&(lock)->lck_mtx_data)
176 #define ordered_store_mtx(lock, value) ordered_store(&(lock)->lck_mtx_data, (value))
177 #define ordered_load_rw(lock) ordered_load(&(lock)->lck_rw_data)
178 #define ordered_store_rw(lock, value) ordered_store(&(lock)->lck_rw_data, (value))
179 #define ordered_load_rw_owner(lock) ordered_load(&(lock)->lck_rw_owner)
180 #define ordered_store_rw_owner(lock, value) ordered_store(&(lock)->lck_rw_owner, (value))
181 #define ordered_load_hw(lock) ordered_load(&(lock)->lock_data)
182 #define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, (value))
183 #define ordered_load_bit(lock) ordered_load((lock))
184 #define ordered_store_bit(lock, value) ordered_store((lock), (value))
187 // Prevent the compiler from reordering memory operations around this
188 #define compiler_memory_fence() __asm__ volatile ("" ::: "memory")
190 #define LOCK_PANIC_TIMEOUT 0xc00000
191 #define NOINLINE __attribute__((noinline))
195 #define interrupts_disabled(mask) (mask & PSR_INTMASK)
197 #define interrupts_disabled(mask) (mask & DAIF_IRQF)
202 #define enable_fiq() __asm__ volatile ("cpsie f" ::: "memory");
203 #define enable_interrupts() __asm__ volatile ("cpsie if" ::: "memory");
207 * Forward declarations
210 static void lck_rw_lock_shared_gen(lck_rw_t
*lck
);
211 static void lck_rw_lock_exclusive_gen(lck_rw_t
*lck
);
212 static boolean_t
lck_rw_lock_shared_to_exclusive_success(lck_rw_t
*lck
);
213 static boolean_t
lck_rw_lock_shared_to_exclusive_failure(lck_rw_t
*lck
, uint32_t prior_lock_state
);
214 static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t
*lck
, uint32_t prior_lock_state
);
215 static lck_rw_type_t
lck_rw_done_gen(lck_rw_t
*lck
, uint32_t prior_lock_state
);
216 static boolean_t
lck_rw_grab(lck_rw_t
*lock
, int mode
, boolean_t wait
);
219 * atomic exchange API is a low level abstraction of the operations
220 * to atomically read, modify, and write a pointer. This abstraction works
221 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
222 * well as the ARM exclusive instructions.
224 * atomic_exchange_begin() - begin exchange and retrieve current value
225 * atomic_exchange_complete() - conclude an exchange
226 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
228 __unused
static uint32_t
229 load_exclusive32(uint32_t *target
, enum memory_order ord
)
234 if (memory_order_has_release(ord
)) {
235 // Pre-load release barrier
236 atomic_thread_fence(memory_order_release
);
238 value
= __builtin_arm_ldrex(target
);
240 if (memory_order_has_acquire(ord
)) {
241 value
= __builtin_arm_ldaex(target
); // ldaxr
243 value
= __builtin_arm_ldrex(target
); // ldxr
249 __unused
static boolean_t
250 store_exclusive32(uint32_t *target
, uint32_t value
, enum memory_order ord
)
255 err
= __builtin_arm_strex(value
, target
);
256 if (memory_order_has_acquire(ord
)) {
257 // Post-store acquire barrier
258 atomic_thread_fence(memory_order_acquire
);
261 if (memory_order_has_release(ord
)) {
262 err
= __builtin_arm_stlex(value
, target
); // stlxr
264 err
= __builtin_arm_strex(value
, target
); // stxr
271 atomic_exchange_begin32(uint32_t *target
, uint32_t *previous
, enum memory_order ord
)
275 #if __ARM_ATOMICS_8_1
276 ord
= memory_order_relaxed
;
278 val
= load_exclusive32(target
, ord
);
284 atomic_exchange_complete32(uint32_t *target
, uint32_t previous
, uint32_t newval
, enum memory_order ord
)
286 #if __ARM_ATOMICS_8_1
287 return __c11_atomic_compare_exchange_strong((_Atomic
uint32_t *)target
, &previous
, newval
, ord
, memory_order_relaxed
);
289 (void)previous
; // Previous not needed, monitor is held
290 return store_exclusive32(target
, newval
, ord
);
295 atomic_exchange_abort(void)
297 os_atomic_clear_exclusive();
301 atomic_test_and_set32(uint32_t *target
, uint32_t test_mask
, uint32_t set_mask
, enum memory_order ord
, boolean_t wait
)
303 uint32_t value
, prev
;
306 value
= atomic_exchange_begin32(target
, &prev
, ord
);
307 if (value
& test_mask
) {
309 wait_for_event(); // Wait with monitor held
311 atomic_exchange_abort(); // Clear exclusive monitor
316 if (atomic_exchange_complete32(target
, prev
, value
, ord
)) {
323 hw_atomic_test_and_set32(uint32_t *target
, uint32_t test_mask
, uint32_t set_mask
, enum memory_order ord
, boolean_t wait
)
325 return atomic_test_and_set32(target
, test_mask
, set_mask
, ord
, wait
);
329 _disable_preemption(void)
331 thread_t thread
= current_thread();
332 unsigned int count
= thread
->machine
.preemption_count
;
335 if (__improbable(count
== 0)) {
336 panic("Preemption count overflow");
339 os_atomic_store(&thread
->machine
.preemption_count
, count
, compiler_acq_rel
);
343 * This function checks whether an AST_URGENT has been pended.
345 * It is called once the preemption has been reenabled, which means the thread
346 * may have been preempted right before this was called, and when this function
347 * actually performs the check, we've changed CPU.
349 * This race is however benign: the point of AST_URGENT is to trigger a context
350 * switch, so if one happened, there's nothing left to check for, and AST_URGENT
351 * was cleared in the process.
353 * It follows that this check cannot have false negatives, which allows us
354 * to avoid fiddling with interrupt state for the vast majority of cases
355 * when the check will actually be negative.
358 kernel_preempt_check(thread_t thread
)
360 cpu_data_t
*cpu_data_ptr
;
364 #define INTERRUPT_MASK PSR_IRQF
366 #define INTERRUPT_MASK DAIF_IRQF
370 * This check is racy and could load from another CPU's pending_ast mask,
371 * but as described above, this can't have false negatives.
373 cpu_data_ptr
= os_atomic_load(&thread
->machine
.CpuDatap
, compiler_acq_rel
);
374 if (__probable((cpu_data_ptr
->cpu_pending_ast
& AST_URGENT
) == 0)) {
378 /* If interrupts are masked, we can't take an AST here */
379 state
= get_interrupts();
380 if ((state
& INTERRUPT_MASK
) == 0) {
381 disable_interrupts_noread(); // Disable interrupts
384 * Reload cpu_data_ptr: a context switch would cause it to change.
385 * Now that interrupts are disabled, this will debounce false positives.
387 cpu_data_ptr
= os_atomic_load(&thread
->machine
.CpuDatap
, compiler_acq_rel
);
388 if (thread
->machine
.CpuDatap
->cpu_pending_ast
& AST_URGENT
) {
390 #if __ARM_USER_PROTECT__
391 uintptr_t up
= arm_user_protect_begin(thread
);
392 #endif // __ARM_USER_PROTECT__
395 ast_taken_kernel(); // Handle urgent AST
397 #if __ARM_USER_PROTECT__
398 arm_user_protect_end(thread
, up
, TRUE
);
399 #endif // __ARM_USER_PROTECT__
401 return; // Return early on arm only due to FIQ enabling
404 restore_interrupts(state
); // Enable interrupts
409 _enable_preemption(void)
411 thread_t thread
= current_thread();
412 unsigned int count
= thread
->machine
.preemption_count
;
414 if (__improbable(count
== 0)) {
415 panic("Preemption count underflow");
419 os_atomic_store(&thread
->machine
.preemption_count
, count
, compiler_acq_rel
);
421 kernel_preempt_check(thread
);
426 get_preemption_level(void)
428 return current_thread()->machine
.preemption_count
;
432 static inline boolean_t
433 interlock_try_disable_interrupts(
437 *istate
= ml_set_interrupts_enabled(FALSE
);
439 if (interlock_try(mutex
)) {
442 ml_set_interrupts_enabled(*istate
);
448 interlock_unlock_enable_interrupts(
452 interlock_unlock(mutex
);
453 ml_set_interrupts_enabled(istate
);
458 * Routine: lck_spin_alloc_init
467 if ((lck
= (lck_spin_t
*) kalloc(sizeof(lck_spin_t
))) != 0) {
468 lck_spin_init(lck
, grp
, attr
);
475 * Routine: lck_spin_free
482 lck_spin_destroy(lck
, grp
);
483 kfree(lck
, sizeof(lck_spin_t
));
487 * Routine: lck_spin_init
493 __unused lck_attr_t
* attr
)
495 lck
->type
= LCK_SPIN_TYPE
;
496 hw_lock_init(&lck
->hwlock
);
498 lck_grp_reference(grp
);
499 lck_grp_lckcnt_incr(grp
, LCK_TYPE_SPIN
);
504 * arm_usimple_lock is a lck_spin_t without a group or attributes
507 arm_usimple_lock_init(simple_lock_t lck
, __unused
unsigned short initial_value
)
509 lck
->type
= LCK_SPIN_TYPE
;
510 hw_lock_init(&lck
->hwlock
);
515 * Routine: lck_spin_lock
518 lck_spin_lock(lck_spin_t
*lock
)
520 #if DEVELOPMENT || DEBUG
521 if (lock
->type
!= LCK_SPIN_TYPE
) {
522 panic("Invalid spinlock %p", lock
);
524 #endif // DEVELOPMENT || DEBUG
525 hw_lock_lock(&lock
->hwlock
, LCK_GRP_NULL
);
529 lck_spin_lock_grp(lck_spin_t
*lock
, lck_grp_t
*grp
)
532 #if DEVELOPMENT || DEBUG
533 if (lock
->type
!= LCK_SPIN_TYPE
) {
534 panic("Invalid spinlock %p", lock
);
536 #endif // DEVELOPMENT || DEBUG
537 hw_lock_lock(&lock
->hwlock
, grp
);
541 * Routine: lck_spin_lock_nopreempt
544 lck_spin_lock_nopreempt(lck_spin_t
*lock
)
546 #if DEVELOPMENT || DEBUG
547 if (lock
->type
!= LCK_SPIN_TYPE
) {
548 panic("Invalid spinlock %p", lock
);
550 #endif // DEVELOPMENT || DEBUG
551 hw_lock_lock_nopreempt(&lock
->hwlock
, LCK_GRP_NULL
);
555 lck_spin_lock_nopreempt_grp(lck_spin_t
*lock
, lck_grp_t
*grp
)
558 #if DEVELOPMENT || DEBUG
559 if (lock
->type
!= LCK_SPIN_TYPE
) {
560 panic("Invalid spinlock %p", lock
);
562 #endif // DEVELOPMENT || DEBUG
563 hw_lock_lock_nopreempt(&lock
->hwlock
, grp
);
567 * Routine: lck_spin_try_lock
570 lck_spin_try_lock(lck_spin_t
*lock
)
572 return hw_lock_try(&lock
->hwlock
, LCK_GRP_NULL
);
576 lck_spin_try_lock_grp(lck_spin_t
*lock
, lck_grp_t
*grp
)
579 return hw_lock_try(&lock
->hwlock
, grp
);
583 * Routine: lck_spin_try_lock_nopreempt
586 lck_spin_try_lock_nopreempt(lck_spin_t
*lock
)
588 return hw_lock_try_nopreempt(&lock
->hwlock
, LCK_GRP_NULL
);
592 lck_spin_try_lock_nopreempt_grp(lck_spin_t
*lock
, lck_grp_t
*grp
)
595 return hw_lock_try_nopreempt(&lock
->hwlock
, grp
);
599 * Routine: lck_spin_unlock
602 lck_spin_unlock(lck_spin_t
*lock
)
604 #if DEVELOPMENT || DEBUG
605 if ((LCK_MTX_STATE_TO_THREAD(lock
->lck_spin_data
) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
606 panic("Spinlock not owned by thread %p = %lx", lock
, lock
->lck_spin_data
);
608 if (lock
->type
!= LCK_SPIN_TYPE
) {
609 panic("Invalid spinlock type %p", lock
);
611 #endif // DEVELOPMENT || DEBUG
612 hw_lock_unlock(&lock
->hwlock
);
616 * Routine: lck_spin_unlock_nopreempt
619 lck_spin_unlock_nopreempt(lck_spin_t
*lock
)
621 #if DEVELOPMENT || DEBUG
622 if ((LCK_MTX_STATE_TO_THREAD(lock
->lck_spin_data
) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
623 panic("Spinlock not owned by thread %p = %lx", lock
, lock
->lck_spin_data
);
625 if (lock
->type
!= LCK_SPIN_TYPE
) {
626 panic("Invalid spinlock type %p", lock
);
628 #endif // DEVELOPMENT || DEBUG
629 hw_lock_unlock_nopreempt(&lock
->hwlock
);
633 * Routine: lck_spin_destroy
640 if (lck
->lck_spin_data
== LCK_SPIN_TAG_DESTROYED
) {
643 lck
->lck_spin_data
= LCK_SPIN_TAG_DESTROYED
;
645 lck_grp_lckcnt_decr(grp
, LCK_TYPE_SPIN
);
646 lck_grp_deallocate(grp
);
651 * Routine: kdp_lck_spin_is_acquired
652 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
655 kdp_lck_spin_is_acquired(lck_spin_t
*lck
)
658 panic("panic: spinlock acquired check done outside of kernel debugger");
660 return ((lck
->lck_spin_data
& ~LCK_SPIN_TAG_DESTROYED
) != 0) ? TRUE
:FALSE
;
664 * Initialize a usimple_lock.
666 * No change in preemption state.
673 simple_lock_init((simple_lock_t
) l
, tag
);
678 * Acquire a usimple_lock.
680 * Returns with preemption disabled. Note
681 * that the hw_lock routines are responsible for
682 * maintaining preemption state.
687 LCK_GRP_ARG(lck_grp_t
*grp
))
689 simple_lock((simple_lock_t
) l
, LCK_GRP_PROBEARG(grp
));
693 extern void sync(void);
696 * Release a usimple_lock.
698 * Returns with preemption enabled. Note
699 * that the hw_lock routines are responsible for
700 * maintaining preemption state.
706 simple_unlock((simple_lock_t
)l
);
711 * Conditionally acquire a usimple_lock.
713 * On success, returns with preemption disabled.
714 * On failure, returns with preemption in the same state
715 * as when first invoked. Note that the hw_lock routines
716 * are responsible for maintaining preemption state.
718 * XXX No stats are gathered on a miss; I preserved this
719 * behavior from the original assembly-language code, but
720 * doesn't it make sense to log misses? XXX
726 LCK_GRP_ARG(lck_grp_t
*grp
))
728 return simple_lock_try((simple_lock_t
) l
, grp
);
732 * The C portion of the shared/exclusive locks package.
736 * compute the deadline to spin against when
737 * waiting for a change of state on a lck_rw_t
740 static inline uint64_t
741 lck_rw_deadline_for_spin(lck_rw_t
*lck
)
745 word
.data
= ordered_load_rw(lck
);
746 if (word
.can_sleep
) {
747 if (word
.r_waiting
|| word
.w_waiting
|| (word
.shared_count
> machine_info
.max_cpus
)) {
749 * there are already threads waiting on this lock... this
750 * implies that they have spun beyond their deadlines waiting for
751 * the desired state to show up so we will not bother spinning at this time...
753 * the current number of threads sharing this lock exceeds our capacity to run them
754 * concurrently and since all states we're going to spin for require the rw_shared_count
755 * to be at 0, we'll not bother spinning since the latency for this to happen is
758 return mach_absolute_time();
760 return mach_absolute_time() + MutexSpin
;
762 return mach_absolute_time() + (100000LL * 1000000000LL);
768 lck_rw_drain_status(lck_rw_t
*lock
, uint32_t status_mask
, boolean_t wait __unused
)
771 uint64_t deadline
= 0;
775 deadline
= lck_rw_deadline_for_spin(lock
);
779 data
= load_exclusive32(&lock
->lck_rw_data
, memory_order_acquire_smp
);
780 if ((data
& status_mask
) == 0) {
786 os_atomic_clear_exclusive();
788 if (!wait
|| (mach_absolute_time() >= deadline
)) {
792 os_atomic_clear_exclusive();
797 data
= ordered_load_rw(lock
);
798 if ((data
& status_mask
) == 0) {
807 * Spin while interlock is held.
810 lck_rw_interlock_spin(lck_rw_t
*lock
)
816 data
= load_exclusive32(&lock
->lck_rw_data
, memory_order_relaxed
);
817 if (data
& LCK_RW_INTERLOCK
) {
820 os_atomic_clear_exclusive();
825 panic("lck_rw_interlock_spin(): Interlock locked %p %x", lock
, lock
->lck_rw_data
);
830 * We disable interrupts while holding the RW interlock to prevent an
831 * interrupt from exacerbating hold time.
832 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
834 static inline boolean_t
835 lck_interlock_lock(lck_rw_t
*lck
)
839 istate
= ml_set_interrupts_enabled(FALSE
);
840 lck_rw_ilk_lock(lck
);
845 lck_interlock_unlock(lck_rw_t
*lck
, boolean_t istate
)
847 lck_rw_ilk_unlock(lck
);
848 ml_set_interrupts_enabled(istate
);
852 #define LCK_RW_GRAB_WANT 0
853 #define LCK_RW_GRAB_SHARED 1
856 lck_rw_grab(lck_rw_t
*lock
, int mode
, boolean_t wait
)
858 uint64_t deadline
= 0;
864 deadline
= lck_rw_deadline_for_spin(lock
);
867 wait
= FALSE
; // Don't spin on UP systems
871 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_acquire_smp
);
872 if (data
& LCK_RW_INTERLOCK
) {
873 atomic_exchange_abort();
874 lck_rw_interlock_spin(lock
);
878 if (mode
== LCK_RW_GRAB_WANT
) {
879 if ((data
& LCK_RW_WANT_EXCL
) == 0) {
880 data
|= LCK_RW_WANT_EXCL
;
883 } else { // LCK_RW_GRAB_SHARED
884 if (((data
& (LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
)) == 0) ||
885 (((data
& LCK_RW_SHARED_MASK
)) && ((data
& LCK_RW_PRIV_EXCL
) == 0))) {
886 data
+= LCK_RW_SHARED_READER
;
891 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
)) {
895 if (wait
) { // Non-waiting
898 atomic_exchange_abort();
900 if (!wait
|| (mach_absolute_time() >= deadline
)) {
909 * Routine: lck_rw_alloc_init
918 if ((lck
= (lck_rw_t
*)kalloc(sizeof(lck_rw_t
))) != 0) {
919 lck_rw_init(lck
, grp
, attr
);
926 * Routine: lck_rw_free
933 lck_rw_destroy(lck
, grp
);
934 kfree(lck
, sizeof(lck_rw_t
));
938 * Routine: lck_rw_init
946 if (attr
== LCK_ATTR_NULL
) {
947 attr
= &LockDefaultLckAttr
;
949 memset(lck
, 0, sizeof(lck_rw_t
));
950 lck
->lck_rw_can_sleep
= TRUE
;
951 if ((attr
->lck_attr_val
& LCK_ATTR_RW_SHARED_PRIORITY
) == 0) {
952 lck
->lck_rw_priv_excl
= TRUE
;
955 lck_grp_reference(grp
);
956 lck_grp_lckcnt_incr(grp
, LCK_TYPE_RW
);
961 * Routine: lck_rw_destroy
968 if (lck
->lck_rw_tag
== LCK_RW_TAG_DESTROYED
) {
972 lck_rw_assert(lck
, LCK_RW_ASSERT_NOTHELD
);
974 lck
->lck_rw_tag
= LCK_RW_TAG_DESTROYED
;
975 lck_grp_lckcnt_decr(grp
, LCK_TYPE_RW
);
976 lck_grp_deallocate(grp
);
981 * Routine: lck_rw_lock
986 lck_rw_type_t lck_rw_type
)
988 if (lck_rw_type
== LCK_RW_TYPE_SHARED
) {
989 lck_rw_lock_shared(lck
);
990 } else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
) {
991 lck_rw_lock_exclusive(lck
);
993 panic("lck_rw_lock(): Invalid RW lock type: %x", lck_rw_type
);
998 * Routine: lck_rw_lock_exclusive
1001 lck_rw_lock_exclusive(lck_rw_t
*lock
)
1003 thread_t thread
= current_thread();
1005 thread
->rwlock_count
++;
1006 if (atomic_test_and_set32(&lock
->lck_rw_data
,
1007 (LCK_RW_SHARED_MASK
| LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
| LCK_RW_INTERLOCK
),
1008 LCK_RW_WANT_EXCL
, memory_order_acquire_smp
, FALSE
)) {
1010 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE
, lock
, DTRACE_RW_EXCL
);
1011 #endif /* CONFIG_DTRACE */
1013 lck_rw_lock_exclusive_gen(lock
);
1016 thread_t owner
= ordered_load_rw_owner(lock
);
1017 assertf(owner
== THREAD_NULL
, "state=0x%x, owner=%p", ordered_load_rw(lock
), owner
);
1019 ordered_store_rw_owner(lock
, thread
);
1023 * Routine: lck_rw_lock_shared
1026 lck_rw_lock_shared(lck_rw_t
*lock
)
1028 uint32_t data
, prev
;
1030 current_thread()->rwlock_count
++;
1032 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_acquire_smp
);
1033 if (data
& (LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
| LCK_RW_INTERLOCK
)) {
1034 atomic_exchange_abort();
1035 lck_rw_lock_shared_gen(lock
);
1038 data
+= LCK_RW_SHARED_READER
;
1039 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
)) {
1045 thread_t owner
= ordered_load_rw_owner(lock
);
1046 assertf(owner
== THREAD_NULL
, "state=0x%x, owner=%p", ordered_load_rw(lock
), owner
);
1049 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE
, lock
, DTRACE_RW_SHARED
);
1050 #endif /* CONFIG_DTRACE */
1055 * Routine: lck_rw_lock_shared_to_exclusive
1057 * False returned upon failure, in this case the shared lock is dropped.
1060 lck_rw_lock_shared_to_exclusive(lck_rw_t
*lock
)
1062 uint32_t data
, prev
;
1065 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_acquire_smp
);
1066 if (data
& LCK_RW_INTERLOCK
) {
1067 atomic_exchange_abort();
1068 lck_rw_interlock_spin(lock
);
1071 if (data
& LCK_RW_WANT_UPGRADE
) {
1072 data
-= LCK_RW_SHARED_READER
;
1073 if ((data
& LCK_RW_SHARED_MASK
) == 0) { /* we were the last reader */
1074 data
&= ~(LCK_RW_W_WAITING
); /* so clear the wait indicator */
1076 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
)) {
1077 return lck_rw_lock_shared_to_exclusive_failure(lock
, prev
);
1080 data
|= LCK_RW_WANT_UPGRADE
; /* ask for WANT_UPGRADE */
1081 data
-= LCK_RW_SHARED_READER
; /* and shed our read count */
1082 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
)) {
1088 /* we now own the WANT_UPGRADE */
1089 if (data
& LCK_RW_SHARED_MASK
) { /* check to see if all of the readers are drained */
1090 lck_rw_lock_shared_to_exclusive_success(lock
); /* if not, we need to go wait */
1093 thread_t owner
= ordered_load_rw_owner(lock
);
1094 assertf(owner
== THREAD_NULL
, "state=0x%x, owner=%p", ordered_load_rw(lock
), owner
);
1096 ordered_store_rw_owner(lock
, current_thread());
1098 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
, lock
, 0);
1099 #endif /* CONFIG_DTRACE */
1105 * Routine: lck_rw_lock_shared_to_exclusive_failure
1107 * Fast path code has already dropped our read
1108 * count and determined that someone else owns 'lck_rw_want_upgrade'
1109 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1110 * all we need to do here is determine if a wakeup is needed
1113 lck_rw_lock_shared_to_exclusive_failure(
1115 uint32_t prior_lock_state
)
1117 thread_t thread
= current_thread();
1118 uint32_t rwlock_count
;
1120 /* Check if dropping the lock means that we need to unpromote */
1121 rwlock_count
= thread
->rwlock_count
--;
1123 if (rwlock_count
== 0) {
1124 panic("rw lock count underflow for thread %p", thread
);
1127 if ((prior_lock_state
& LCK_RW_W_WAITING
) &&
1128 ((prior_lock_state
& LCK_RW_SHARED_MASK
) == LCK_RW_SHARED_READER
)) {
1130 * Someone else has requested upgrade.
1131 * Since we've released the read lock, wake
1132 * him up if he's blocked waiting
1134 thread_wakeup(LCK_RW_WRITER_EVENT(lck
));
1137 if ((rwlock_count
== 1 /* field now 0 */) && (thread
->sched_flags
& TH_SFLAG_RW_PROMOTED
)) {
1138 /* sched_flags checked without lock, but will be rechecked while clearing */
1139 lck_rw_clear_promotion(thread
, unslide_for_kdebug(lck
));
1142 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_CODE
) | DBG_FUNC_NONE
,
1143 VM_KERNEL_UNSLIDE_OR_PERM(lck
), lck
->lck_rw_shared_count
, lck
->lck_rw_want_upgrade
, 0, 0);
1149 * Routine: lck_rw_lock_shared_to_exclusive_success
1151 * assembly fast path code has already dropped our read
1152 * count and successfully acquired 'lck_rw_want_upgrade'
1153 * we just need to wait for the rest of the readers to drain
1154 * and then we can return as the exclusive holder of this lock
1157 lck_rw_lock_shared_to_exclusive_success(
1160 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lock
);
1165 boolean_t not_shared
;
1168 uint64_t wait_interval
= 0;
1169 int readers_at_sleep
= 0;
1170 boolean_t dtrace_ls_initialized
= FALSE
;
1171 boolean_t dtrace_rwl_shared_to_excl_spin
, dtrace_rwl_shared_to_excl_block
, dtrace_ls_enabled
= FALSE
;
1174 while (!lck_rw_drain_status(lock
, LCK_RW_SHARED_MASK
, FALSE
)) {
1175 word
.data
= ordered_load_rw(lock
);
1177 if (dtrace_ls_initialized
== FALSE
) {
1178 dtrace_ls_initialized
= TRUE
;
1179 dtrace_rwl_shared_to_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
] != 0);
1180 dtrace_rwl_shared_to_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
] != 0);
1181 dtrace_ls_enabled
= dtrace_rwl_shared_to_excl_spin
|| dtrace_rwl_shared_to_excl_block
;
1182 if (dtrace_ls_enabled
) {
1184 * Either sleeping or spinning is happening,
1185 * start a timing of our delay interval now.
1187 readers_at_sleep
= word
.shared_count
;
1188 wait_interval
= mach_absolute_time();
1193 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_START
,
1194 trace_lck
, word
.shared_count
, 0, 0, 0);
1196 not_shared
= lck_rw_drain_status(lock
, LCK_RW_SHARED_MASK
, TRUE
);
1198 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_END
,
1199 trace_lck
, lock
->lck_rw_shared_count
, 0, 0, 0);
1206 * if we get here, the spin deadline in lck_rw_wait_on_status()
1207 * has expired w/o the rw_shared_count having drained to 0
1208 * check to see if we're allowed to do a thread_block
1210 if (word
.can_sleep
) {
1211 istate
= lck_interlock_lock(lock
);
1213 word
.data
= ordered_load_rw(lock
);
1214 if (word
.shared_count
!= 0) {
1215 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_START
,
1216 trace_lck
, word
.shared_count
, 0, 0, 0);
1219 ordered_store_rw(lock
, word
.data
);
1221 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade
);
1222 res
= assert_wait(LCK_RW_WRITER_EVENT(lock
),
1223 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1224 lck_interlock_unlock(lock
, istate
);
1226 if (res
== THREAD_WAITING
) {
1227 res
= thread_block(THREAD_CONTINUE_NULL
);
1230 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_END
,
1231 trace_lck
, res
, slept
, 0, 0);
1233 lck_interlock_unlock(lock
, istate
);
1240 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1242 if (dtrace_ls_enabled
== TRUE
) {
1244 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
, lock
, mach_absolute_time() - wait_interval
, 0);
1246 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
, lock
,
1247 mach_absolute_time() - wait_interval
, 1,
1248 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1251 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
, lock
, 1);
1258 * Routine: lck_rw_lock_exclusive_to_shared
1262 lck_rw_lock_exclusive_to_shared(lck_rw_t
*lock
)
1264 uint32_t data
, prev
;
1266 assertf(lock
->lck_rw_owner
== current_thread(), "state=0x%x, owner=%p", lock
->lck_rw_data
, lock
->lck_rw_owner
);
1267 ordered_store_rw_owner(lock
, THREAD_NULL
);
1269 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_release_smp
);
1270 if (data
& LCK_RW_INTERLOCK
) {
1272 atomic_exchange_abort();
1273 lck_rw_interlock_spin(lock
); /* wait for interlock to clear */
1276 panic("lck_rw_lock_exclusive_to_shared(): Interlock locked (%p): %x", lock
, data
);
1279 data
+= LCK_RW_SHARED_READER
;
1280 if (data
& LCK_RW_WANT_UPGRADE
) {
1281 data
&= ~(LCK_RW_WANT_UPGRADE
);
1283 data
&= ~(LCK_RW_WANT_EXCL
);
1285 if (!((prev
& LCK_RW_W_WAITING
) && (prev
& LCK_RW_PRIV_EXCL
))) {
1286 data
&= ~(LCK_RW_W_WAITING
);
1288 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_release_smp
)) {
1293 return lck_rw_lock_exclusive_to_shared_gen(lock
, prev
);
1297 * Routine: lck_rw_lock_exclusive_to_shared_gen
1299 * Fast path has already dropped
1300 * our exclusive state and bumped lck_rw_shared_count
1301 * all we need to do here is determine if anyone
1302 * needs to be awakened.
1305 lck_rw_lock_exclusive_to_shared_gen(
1307 uint32_t prior_lock_state
)
1309 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lck
);
1310 lck_rw_word_t fake_lck
;
1313 * prior_lock state is a snapshot of the 1st word of the
1314 * lock in question... we'll fake up a pointer to it
1315 * and carefully not access anything beyond whats defined
1316 * in the first word of a lck_rw_t
1318 fake_lck
.data
= prior_lock_state
;
1320 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_START
,
1321 trace_lck
, fake_lck
->want_excl
, fake_lck
->want_upgrade
, 0, 0);
1324 * don't wake up anyone waiting to take the lock exclusively
1325 * since we hold a read count... when the read count drops to 0,
1326 * the writers will be woken.
1328 * wake up any waiting readers if we don't have any writers waiting,
1329 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1331 if (!(fake_lck
.priv_excl
&& fake_lck
.w_waiting
) && fake_lck
.r_waiting
) {
1332 thread_wakeup(LCK_RW_READER_EVENT(lck
));
1335 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_END
,
1336 trace_lck
, lck
->lck_rw_want_excl
, lck
->lck_rw_want_upgrade
, lck
->lck_rw_shared_count
, 0);
1339 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE
, lck
, 0);
1345 * Routine: lck_rw_try_lock
1350 lck_rw_type_t lck_rw_type
)
1352 if (lck_rw_type
== LCK_RW_TYPE_SHARED
) {
1353 return lck_rw_try_lock_shared(lck
);
1354 } else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
) {
1355 return lck_rw_try_lock_exclusive(lck
);
1357 panic("lck_rw_try_lock(): Invalid rw lock type: %x", lck_rw_type
);
1363 * Routine: lck_rw_try_lock_shared
1367 lck_rw_try_lock_shared(lck_rw_t
*lock
)
1369 uint32_t data
, prev
;
1372 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_acquire_smp
);
1373 if (data
& LCK_RW_INTERLOCK
) {
1375 atomic_exchange_abort();
1376 lck_rw_interlock_spin(lock
);
1379 panic("lck_rw_try_lock_shared(): Interlock locked (%p): %x", lock
, data
);
1382 if (data
& (LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
)) {
1383 atomic_exchange_abort();
1384 return FALSE
; /* lock is busy */
1386 data
+= LCK_RW_SHARED_READER
; /* Increment reader refcount */
1387 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
)) {
1393 thread_t owner
= ordered_load_rw_owner(lock
);
1394 assertf(owner
== THREAD_NULL
, "state=0x%x, owner=%p", ordered_load_rw(lock
), owner
);
1396 current_thread()->rwlock_count
++;
1398 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE
, lock
, DTRACE_RW_SHARED
);
1399 #endif /* CONFIG_DTRACE */
1405 * Routine: lck_rw_try_lock_exclusive
1409 lck_rw_try_lock_exclusive(lck_rw_t
*lock
)
1411 uint32_t data
, prev
;
1415 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_acquire_smp
);
1416 if (data
& LCK_RW_INTERLOCK
) {
1418 atomic_exchange_abort();
1419 lck_rw_interlock_spin(lock
);
1422 panic("lck_rw_try_lock_exclusive(): Interlock locked (%p): %x", lock
, data
);
1425 if (data
& (LCK_RW_SHARED_MASK
| LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
)) {
1426 atomic_exchange_abort();
1429 data
|= LCK_RW_WANT_EXCL
;
1430 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
)) {
1435 thread
= current_thread();
1436 thread
->rwlock_count
++;
1438 thread_t owner
= ordered_load_rw_owner(lock
);
1439 assertf(owner
== THREAD_NULL
, "state=0x%x, owner=%p", ordered_load_rw(lock
), owner
);
1441 ordered_store_rw_owner(lock
, thread
);
1443 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE
, lock
, DTRACE_RW_EXCL
);
1444 #endif /* CONFIG_DTRACE */
1450 * Routine: lck_rw_unlock
1455 lck_rw_type_t lck_rw_type
)
1457 if (lck_rw_type
== LCK_RW_TYPE_SHARED
) {
1458 lck_rw_unlock_shared(lck
);
1459 } else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
) {
1460 lck_rw_unlock_exclusive(lck
);
1462 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type
);
1468 * Routine: lck_rw_unlock_shared
1471 lck_rw_unlock_shared(
1476 assertf(lck
->lck_rw_owner
== THREAD_NULL
, "state=0x%x, owner=%p", lck
->lck_rw_data
, lck
->lck_rw_owner
);
1477 assertf(lck
->lck_rw_shared_count
> 0, "shared_count=0x%x", lck
->lck_rw_shared_count
);
1478 ret
= lck_rw_done(lck
);
1480 if (ret
!= LCK_RW_TYPE_SHARED
) {
1481 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck
, ret
);
1487 * Routine: lck_rw_unlock_exclusive
1490 lck_rw_unlock_exclusive(
1495 assertf(lck
->lck_rw_owner
== current_thread(), "state=0x%x, owner=%p", lck
->lck_rw_data
, lck
->lck_rw_owner
);
1496 ret
= lck_rw_done(lck
);
1498 if (ret
!= LCK_RW_TYPE_EXCLUSIVE
) {
1499 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck
, ret
);
1505 * Routine: lck_rw_lock_exclusive_gen
1508 lck_rw_lock_exclusive_gen(
1511 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lock
);
1514 boolean_t gotlock
= 0;
1515 boolean_t not_shared_or_upgrade
= 0;
1516 wait_result_t res
= 0;
1520 boolean_t dtrace_ls_initialized
= FALSE
;
1521 boolean_t dtrace_rwl_excl_spin
, dtrace_rwl_excl_block
, dtrace_ls_enabled
= FALSE
;
1522 uint64_t wait_interval
= 0;
1523 int readers_at_sleep
= 0;
1527 * Try to acquire the lck_rw_want_excl bit.
1529 while (!lck_rw_grab(lock
, LCK_RW_GRAB_WANT
, FALSE
)) {
1531 if (dtrace_ls_initialized
== FALSE
) {
1532 dtrace_ls_initialized
= TRUE
;
1533 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
1534 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
1535 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
1536 if (dtrace_ls_enabled
) {
1538 * Either sleeping or spinning is happening,
1539 * start a timing of our delay interval now.
1541 readers_at_sleep
= lock
->lck_rw_shared_count
;
1542 wait_interval
= mach_absolute_time();
1547 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1549 gotlock
= lck_rw_grab(lock
, LCK_RW_GRAB_WANT
, TRUE
);
1551 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_END
, trace_lck
, 0, 0, gotlock
, 0);
1557 * if we get here, the deadline has expired w/o us
1558 * being able to grab the lock exclusively
1559 * check to see if we're allowed to do a thread_block
1561 word
.data
= ordered_load_rw(lock
);
1562 if (word
.can_sleep
) {
1563 istate
= lck_interlock_lock(lock
);
1564 word
.data
= ordered_load_rw(lock
);
1566 if (word
.want_excl
) {
1567 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1570 ordered_store_rw(lock
, word
.data
);
1572 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite
);
1573 res
= assert_wait(LCK_RW_WRITER_EVENT(lock
),
1574 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1575 lck_interlock_unlock(lock
, istate
);
1577 if (res
== THREAD_WAITING
) {
1578 res
= thread_block(THREAD_CONTINUE_NULL
);
1581 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_END
, trace_lck
, res
, slept
, 0, 0);
1584 ordered_store_rw(lock
, word
.data
);
1585 lck_interlock_unlock(lock
, istate
);
1591 * Wait for readers (and upgrades) to finish...
1593 while (!lck_rw_drain_status(lock
, LCK_RW_SHARED_MASK
| LCK_RW_WANT_UPGRADE
, FALSE
)) {
1596 * Either sleeping or spinning is happening, start
1597 * a timing of our delay interval now. If we set it
1598 * to -1 we don't have accurate data so we cannot later
1599 * decide to record a dtrace spin or sleep event.
1601 if (dtrace_ls_initialized
== FALSE
) {
1602 dtrace_ls_initialized
= TRUE
;
1603 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
1604 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
1605 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
1606 if (dtrace_ls_enabled
) {
1608 * Either sleeping or spinning is happening,
1609 * start a timing of our delay interval now.
1611 readers_at_sleep
= lock
->lck_rw_shared_count
;
1612 wait_interval
= mach_absolute_time();
1617 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1619 not_shared_or_upgrade
= lck_rw_drain_status(lock
, LCK_RW_SHARED_MASK
| LCK_RW_WANT_UPGRADE
, TRUE
);
1621 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_END
, trace_lck
, 0, 0, not_shared_or_upgrade
, 0);
1623 if (not_shared_or_upgrade
) {
1627 * if we get here, the deadline has expired w/o us
1628 * being able to grab the lock exclusively
1629 * check to see if we're allowed to do a thread_block
1631 word
.data
= ordered_load_rw(lock
);
1632 if (word
.can_sleep
) {
1633 istate
= lck_interlock_lock(lock
);
1634 word
.data
= ordered_load_rw(lock
);
1636 if (word
.shared_count
!= 0 || word
.want_upgrade
) {
1637 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1640 ordered_store_rw(lock
, word
.data
);
1642 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite
);
1643 res
= assert_wait(LCK_RW_WRITER_EVENT(lock
),
1644 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1645 lck_interlock_unlock(lock
, istate
);
1647 if (res
== THREAD_WAITING
) {
1648 res
= thread_block(THREAD_CONTINUE_NULL
);
1651 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_END
, trace_lck
, res
, slept
, 0, 0);
1653 lck_interlock_unlock(lock
, istate
);
1655 * must own the lock now, since we checked for
1656 * readers or upgrade owner behind the interlock
1657 * no need for a call to 'lck_rw_drain_status'
1666 * Decide what latencies we suffered that are Dtrace events.
1667 * If we have set wait_interval, then we either spun or slept.
1668 * At least we get out from under the interlock before we record
1669 * which is the best we can do here to minimize the impact
1671 * If we have set wait_interval to -1, then dtrace was not enabled when we
1672 * started sleeping/spinning so we don't record this event.
1674 if (dtrace_ls_enabled
== TRUE
) {
1676 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN
, lock
,
1677 mach_absolute_time() - wait_interval
, 1);
1680 * For the blocking case, we also record if when we blocked
1681 * it was held for read or write, and how many readers.
1682 * Notice that above we recorded this before we dropped
1683 * the interlock so the count is accurate.
1685 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK
, lock
,
1686 mach_absolute_time() - wait_interval
, 1,
1687 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1690 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE
, lock
, 1);
1691 #endif /* CONFIG_DTRACE */
1695 * Routine: lck_rw_done
1699 lck_rw_done(lck_rw_t
*lock
)
1701 uint32_t data
, prev
;
1702 boolean_t once
= FALSE
;
1705 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_release_smp
);
1706 if (data
& LCK_RW_INTERLOCK
) { /* wait for interlock to clear */
1708 atomic_exchange_abort();
1709 lck_rw_interlock_spin(lock
);
1712 panic("lck_rw_done(): Interlock locked (%p): %x", lock
, data
);
1715 if (data
& LCK_RW_SHARED_MASK
) { /* lock is held shared */
1716 assertf(lock
->lck_rw_owner
== THREAD_NULL
, "state=0x%x, owner=%p", lock
->lck_rw_data
, lock
->lck_rw_owner
);
1717 data
-= LCK_RW_SHARED_READER
;
1718 if ((data
& LCK_RW_SHARED_MASK
) == 0) { /* if reader count has now gone to 0, check for waiters */
1721 } else { /* if reader count == 0, must be exclusive lock */
1722 if (data
& LCK_RW_WANT_UPGRADE
) {
1723 data
&= ~(LCK_RW_WANT_UPGRADE
);
1725 if (data
& LCK_RW_WANT_EXCL
) {
1726 data
&= ~(LCK_RW_WANT_EXCL
);
1727 } else { /* lock is not 'owned', panic */
1728 panic("Releasing non-exclusive RW lock without a reader refcount!");
1732 // Only check for holder and clear it once
1733 assertf(lock
->lck_rw_owner
== current_thread(), "state=0x%x, owner=%p", lock
->lck_rw_data
, lock
->lck_rw_owner
);
1734 ordered_store_rw_owner(lock
, THREAD_NULL
);
1739 * test the original values to match what
1740 * lck_rw_done_gen is going to do to determine
1741 * which wakeups need to happen...
1743 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
1745 if (prev
& LCK_RW_W_WAITING
) {
1746 data
&= ~(LCK_RW_W_WAITING
);
1747 if ((prev
& LCK_RW_PRIV_EXCL
) == 0) {
1748 data
&= ~(LCK_RW_R_WAITING
);
1751 data
&= ~(LCK_RW_R_WAITING
);
1754 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_release_smp
)) {
1759 return lck_rw_done_gen(lock
, prev
);
1763 * Routine: lck_rw_done_gen
1765 * called from the assembly language wrapper...
1766 * prior_lock_state is the value in the 1st
1767 * word of the lock at the time of a successful
1768 * atomic compare and exchange with the new value...
1769 * it represents the state of the lock before we
1770 * decremented the rw_shared_count or cleared either
1771 * rw_want_upgrade or rw_want_write and
1772 * the lck_x_waiting bits... since the wrapper
1773 * routine has already changed the state atomically,
1774 * we just need to decide if we should
1775 * wake up anyone and what value to return... we do
1776 * this by examining the state of the lock before
1779 static lck_rw_type_t
1782 uint32_t prior_lock_state
)
1784 lck_rw_word_t fake_lck
;
1785 lck_rw_type_t lock_type
;
1787 uint32_t rwlock_count
;
1790 * prior_lock state is a snapshot of the 1st word of the
1791 * lock in question... we'll fake up a pointer to it
1792 * and carefully not access anything beyond whats defined
1793 * in the first word of a lck_rw_t
1795 fake_lck
.data
= prior_lock_state
;
1797 if (fake_lck
.shared_count
<= 1) {
1798 if (fake_lck
.w_waiting
) {
1799 thread_wakeup(LCK_RW_WRITER_EVENT(lck
));
1802 if (!(fake_lck
.priv_excl
&& fake_lck
.w_waiting
) && fake_lck
.r_waiting
) {
1803 thread_wakeup(LCK_RW_READER_EVENT(lck
));
1806 if (fake_lck
.shared_count
) {
1807 lock_type
= LCK_RW_TYPE_SHARED
;
1809 lock_type
= LCK_RW_TYPE_EXCLUSIVE
;
1812 /* Check if dropping the lock means that we need to unpromote */
1813 thread
= current_thread();
1814 rwlock_count
= thread
->rwlock_count
--;
1816 if (rwlock_count
== 0) {
1817 panic("rw lock count underflow for thread %p", thread
);
1820 if ((rwlock_count
== 1 /* field now 0 */) && (thread
->sched_flags
& TH_SFLAG_RW_PROMOTED
)) {
1821 /* sched_flags checked without lock, but will be rechecked while clearing */
1822 lck_rw_clear_promotion(thread
, unslide_for_kdebug(lck
));
1825 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE
, lck
, lock_type
== LCK_RW_TYPE_SHARED
? 0 : 1);
1831 * Routine: lck_rw_lock_shared_gen
1833 * Fast path code has determined that this lock
1834 * is held exclusively... this is where we spin/block
1835 * until we can acquire the lock in the shared mode
1838 lck_rw_lock_shared_gen(
1841 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lck
);
1843 boolean_t gotlock
= 0;
1845 wait_result_t res
= 0;
1849 uint64_t wait_interval
= 0;
1850 int readers_at_sleep
= 0;
1851 boolean_t dtrace_ls_initialized
= FALSE
;
1852 boolean_t dtrace_rwl_shared_spin
, dtrace_rwl_shared_block
, dtrace_ls_enabled
= FALSE
;
1853 #endif /* CONFIG_DTRACE */
1855 while (!lck_rw_grab(lck
, LCK_RW_GRAB_SHARED
, FALSE
)) {
1857 if (dtrace_ls_initialized
== FALSE
) {
1858 dtrace_ls_initialized
= TRUE
;
1859 dtrace_rwl_shared_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_SPIN
] != 0);
1860 dtrace_rwl_shared_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_BLOCK
] != 0);
1861 dtrace_ls_enabled
= dtrace_rwl_shared_spin
|| dtrace_rwl_shared_block
;
1862 if (dtrace_ls_enabled
) {
1864 * Either sleeping or spinning is happening,
1865 * start a timing of our delay interval now.
1867 readers_at_sleep
= lck
->lck_rw_shared_count
;
1868 wait_interval
= mach_absolute_time();
1873 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_START
,
1874 trace_lck
, lck
->lck_rw_want_excl
, lck
->lck_rw_want_upgrade
, 0, 0);
1876 gotlock
= lck_rw_grab(lck
, LCK_RW_GRAB_SHARED
, TRUE
);
1878 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_END
,
1879 trace_lck
, lck
->lck_rw_want_excl
, lck
->lck_rw_want_upgrade
, gotlock
, 0);
1885 * if we get here, the deadline has expired w/o us
1886 * being able to grab the lock for read
1887 * check to see if we're allowed to do a thread_block
1889 if (lck
->lck_rw_can_sleep
) {
1890 istate
= lck_interlock_lock(lck
);
1892 word
.data
= ordered_load_rw(lck
);
1893 if ((word
.want_excl
|| word
.want_upgrade
) &&
1894 ((word
.shared_count
== 0) || word
.priv_excl
)) {
1895 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_START
,
1896 trace_lck
, word
.want_excl
, word
.want_upgrade
, 0, 0);
1899 ordered_store_rw(lck
, word
.data
);
1901 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead
);
1902 res
= assert_wait(LCK_RW_READER_EVENT(lck
),
1903 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1904 lck_interlock_unlock(lck
, istate
);
1906 if (res
== THREAD_WAITING
) {
1907 res
= thread_block(THREAD_CONTINUE_NULL
);
1910 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_END
,
1911 trace_lck
, res
, slept
, 0, 0);
1913 word
.shared_count
++;
1914 ordered_store_rw(lck
, word
.data
);
1915 lck_interlock_unlock(lck
, istate
);
1922 if (dtrace_ls_enabled
== TRUE
) {
1924 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN
, lck
, mach_absolute_time() - wait_interval
, 0);
1926 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK
, lck
,
1927 mach_absolute_time() - wait_interval
, 0,
1928 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1931 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE
, lck
, 0);
1932 #endif /* CONFIG_DTRACE */
1942 case LCK_RW_ASSERT_SHARED
:
1943 if ((lck
->lck_rw_shared_count
!= 0) &&
1944 (lck
->lck_rw_owner
== THREAD_NULL
)) {
1948 case LCK_RW_ASSERT_EXCLUSIVE
:
1949 if ((lck
->lck_rw_want_excl
|| lck
->lck_rw_want_upgrade
) &&
1950 (lck
->lck_rw_shared_count
== 0) &&
1951 (lck
->lck_rw_owner
== current_thread())) {
1955 case LCK_RW_ASSERT_HELD
:
1956 if (lck
->lck_rw_shared_count
!= 0) {
1957 return; // Held shared
1959 if ((lck
->lck_rw_want_excl
|| lck
->lck_rw_want_upgrade
) &&
1960 (lck
->lck_rw_owner
== current_thread())) {
1961 return; // Held exclusive
1964 case LCK_RW_ASSERT_NOTHELD
:
1965 if ((lck
->lck_rw_shared_count
== 0) &&
1966 !(lck
->lck_rw_want_excl
|| lck
->lck_rw_want_upgrade
) &&
1967 (lck
->lck_rw_owner
== THREAD_NULL
)) {
1974 panic("rw lock (%p)%s held (mode=%u)", lck
, (type
== LCK_RW_ASSERT_NOTHELD
? "" : " not"), type
);
1979 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
1980 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
1983 kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t
*lck
)
1986 panic("panic: rw lock exclusive check done outside of kernel debugger");
1988 return ((lck
->lck_rw_want_upgrade
|| lck
->lck_rw_want_excl
) && (lck
->lck_rw_shared_count
== 0)) ? TRUE
: FALSE
;
1992 * The C portion of the mutex package. These routines are only invoked
1993 * if the optimized assembler routines can't do the work.
1997 * Forward declaration
2002 lck_mtx_ext_t
* lck
,
2007 * Routine: lck_mtx_alloc_init
2016 if ((lck
= (lck_mtx_t
*) kalloc(sizeof(lck_mtx_t
))) != 0) {
2017 lck_mtx_init(lck
, grp
, attr
);
2024 * Routine: lck_mtx_free
2031 lck_mtx_destroy(lck
, grp
);
2032 kfree(lck
, sizeof(lck_mtx_t
));
2036 * Routine: lck_mtx_init
2045 lck_mtx_ext_t
*lck_ext
;
2047 lck_attr_t
*lck_attr
;
2049 if (attr
!= LCK_ATTR_NULL
) {
2052 lck_attr
= &LockDefaultLckAttr
;
2056 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2057 if ((lck_ext
= (lck_mtx_ext_t
*) kalloc(sizeof(lck_mtx_ext_t
))) != 0) {
2058 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
2059 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
2060 lck
->lck_mtx_ptr
= lck_ext
;
2061 lck
->lck_mtx_type
= LCK_MTX_TYPE
;
2066 lck
->lck_mtx_ptr
= NULL
; // Clear any padding in the union fields below
2067 lck
->lck_mtx_waiters
= 0;
2068 lck
->lck_mtx_type
= LCK_MTX_TYPE
;
2069 ordered_store_mtx(lck
, 0);
2071 lck_grp_reference(grp
);
2072 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
2076 * Routine: lck_mtx_init_ext
2081 lck_mtx_ext_t
* lck_ext
,
2085 lck_attr_t
*lck_attr
;
2087 if (attr
!= LCK_ATTR_NULL
) {
2090 lck_attr
= &LockDefaultLckAttr
;
2093 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2094 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
2095 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
2096 lck
->lck_mtx_ptr
= lck_ext
;
2097 lck
->lck_mtx_type
= LCK_MTX_TYPE
;
2099 lck
->lck_mtx_waiters
= 0;
2100 lck
->lck_mtx_type
= LCK_MTX_TYPE
;
2101 ordered_store_mtx(lck
, 0);
2103 lck_grp_reference(grp
);
2104 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
2108 * Routine: lck_mtx_ext_init
2112 lck_mtx_ext_t
* lck
,
2116 bzero((void *) lck
, sizeof(lck_mtx_ext_t
));
2118 lck
->lck_mtx
.lck_mtx_type
= LCK_MTX_TYPE
;
2120 if ((attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2121 lck
->lck_mtx_deb
.type
= MUTEX_TAG
;
2122 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_DEBUG
;
2124 lck
->lck_mtx_grp
= grp
;
2126 if (grp
->lck_grp_attr
& LCK_GRP_ATTR_STAT
) {
2127 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_STAT
;
2131 /* The slow versions */
2132 static void lck_mtx_lock_contended(lck_mtx_t
*lock
, thread_t thread
, boolean_t interlocked
);
2133 static boolean_t
lck_mtx_try_lock_contended(lck_mtx_t
*lock
, thread_t thread
);
2134 static void lck_mtx_unlock_contended(lck_mtx_t
*lock
, thread_t thread
, boolean_t interlocked
);
2136 /* The adaptive spin function */
2137 static spinwait_result_t
lck_mtx_lock_contended_spinwait_arm(lck_mtx_t
*lock
, thread_t thread
, boolean_t interlocked
);
2140 * Routine: lck_mtx_verify
2142 * Verify if a mutex is valid
2145 lck_mtx_verify(lck_mtx_t
*lock
)
2147 if (lock
->lck_mtx_type
!= LCK_MTX_TYPE
) {
2148 panic("Invalid mutex %p", lock
);
2150 #if DEVELOPMENT || DEBUG
2151 if (lock
->lck_mtx_tag
== LCK_MTX_TAG_DESTROYED
) {
2152 panic("Mutex destroyed %p", lock
);
2154 #endif /* DEVELOPMENT || DEBUG */
2158 * Routine: lck_mtx_check_preemption
2160 * Verify preemption is enabled when attempting to acquire a mutex.
2164 lck_mtx_check_preemption(lck_mtx_t
*lock
)
2166 #if DEVELOPMENT || DEBUG
2167 int pl
= get_preemption_level();
2170 panic("Attempt to take mutex with preemption disabled. Lock=%p, level=%d", lock
, pl
);
2178 * Routine: lck_mtx_lock
2181 lck_mtx_lock(lck_mtx_t
*lock
)
2185 lck_mtx_verify(lock
);
2186 lck_mtx_check_preemption(lock
);
2187 thread
= current_thread();
2188 if (os_atomic_cmpxchg(&lock
->lck_mtx_data
,
2189 0, LCK_MTX_THREAD_TO_STATE(thread
), acquire
)) {
2191 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE
, lock
, 0);
2192 #endif /* CONFIG_DTRACE */
2195 lck_mtx_lock_contended(lock
, thread
, FALSE
);
2199 * This is the slow version of mutex locking.
2201 static void NOINLINE
2202 lck_mtx_lock_contended(lck_mtx_t
*lock
, thread_t thread
, boolean_t interlocked
)
2204 thread_t holding_thread
;
2207 spinwait_result_t sw_res
;
2208 struct turnstile
*ts
= NULL
;
2210 /* Loop waiting until I see that the mutex is unowned */
2212 sw_res
= lck_mtx_lock_contended_spinwait_arm(lock
, thread
, interlocked
);
2213 interlocked
= FALSE
;
2216 case SPINWAIT_ACQUIRED
:
2218 interlock_lock(lock
);
2219 turnstile_complete((uintptr_t)lock
, NULL
, NULL
, TURNSTILE_KERNEL_MUTEX
);
2220 interlock_unlock(lock
);
2223 case SPINWAIT_INTERLOCK
:
2229 state
= ordered_load_mtx(lock
);
2230 holding_thread
= LCK_MTX_STATE_TO_THREAD(state
);
2231 if (holding_thread
== NULL
) {
2234 ordered_store_mtx(lock
, (state
| LCK_ILOCK
| ARM_LCK_WAITERS
)); // Set waiters bit and wait
2235 lck_mtx_lock_wait(lock
, holding_thread
, &ts
);
2236 /* returns interlock unlocked */
2240 /* Hooray, I'm the new owner! */
2241 state
= ordered_load_mtx(lock
);
2243 if (state
& ARM_LCK_WAITERS
) {
2244 /* Skip lck_mtx_lock_acquire if there are no waiters. */
2245 waiters
= lck_mtx_lock_acquire(lock
, ts
);
2247 * lck_mtx_lock_acquire will call
2248 * turnstile_complete
2252 turnstile_complete((uintptr_t)lock
, NULL
, NULL
, TURNSTILE_KERNEL_MUTEX
);
2256 state
= LCK_MTX_THREAD_TO_STATE(thread
);
2258 state
|= ARM_LCK_WAITERS
;
2261 state
|= LCK_ILOCK
; // Preserve interlock
2262 ordered_store_mtx(lock
, state
); // Set ownership
2263 interlock_unlock(lock
); // Release interlock, enable preemption
2265 ordered_store_mtx(lock
, state
); // Set ownership
2266 enable_preemption();
2270 load_memory_barrier();
2272 assert(thread
->turnstile
!= NULL
);
2275 turnstile_cleanup();
2279 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE
, lock
, 0);
2280 #endif /* CONFIG_DTRACE */
2284 * Routine: lck_mtx_lock_spinwait_arm
2286 * Invoked trying to acquire a mutex when there is contention but
2287 * the holder is running on another processor. We spin for up to a maximum
2288 * time waiting for the lock to be released.
2290 static spinwait_result_t
2291 lck_mtx_lock_contended_spinwait_arm(lck_mtx_t
*lock
, thread_t thread
, boolean_t interlocked
)
2293 int has_interlock
= (int)interlocked
;
2295 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lock
);
2297 uint64_t overall_deadline
;
2298 uint64_t check_owner_deadline
;
2300 spinwait_result_t retval
= SPINWAIT_DID_SPIN
;
2305 if (__improbable(!(lck_mtx_adaptive_spin_mode
& ADAPTIVE_SPIN_ENABLE
))) {
2306 if (!has_interlock
) {
2307 interlock_lock(lock
);
2310 return SPINWAIT_DID_NOT_SPIN
;
2313 state
= ordered_load_mtx(lock
);
2315 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_SPIN_CODE
) | DBG_FUNC_START
,
2316 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state
)), lock
->lck_mtx_waiters
, 0, 0);
2318 cur_time
= mach_absolute_time();
2319 overall_deadline
= cur_time
+ MutexSpin
;
2320 check_owner_deadline
= cur_time
;
2322 if (has_interlock
) {
2323 istate
= ml_get_interrupts_enabled();
2326 /* Snoop the lock state */
2327 state
= ordered_load_mtx(lock
);
2331 * - mutex is locked, and
2332 * - it's locked as a spin lock, and
2333 * - owner is running on another processor, and
2334 * - owner (processor) is not idling, and
2335 * - we haven't spun for long enough.
2338 if (!(state
& LCK_ILOCK
) || has_interlock
) {
2339 if (!has_interlock
) {
2340 has_interlock
= interlock_try_disable_interrupts(lock
, &istate
);
2343 if (has_interlock
) {
2344 state
= ordered_load_mtx(lock
);
2345 holder
= LCK_MTX_STATE_TO_THREAD(state
);
2347 if (holder
== NULL
) {
2348 retval
= SPINWAIT_INTERLOCK
;
2351 ml_set_interrupts_enabled(istate
);
2357 if (!(holder
->machine
.machine_thread_flags
& MACHINE_THREAD_FLAGS_ON_CPU
) ||
2358 (holder
->state
& TH_IDLE
)) {
2359 if (loopcount
== 0) {
2360 retval
= SPINWAIT_DID_NOT_SPIN
;
2364 ml_set_interrupts_enabled(istate
);
2370 interlock_unlock_enable_interrupts(lock
, istate
);
2375 cur_time
= mach_absolute_time();
2377 if (cur_time
>= overall_deadline
) {
2381 check_owner_deadline
= cur_time
+ (MutexSpin
/ SPINWAIT_OWNER_CHECK_COUNT
);
2383 if (cur_time
< check_owner_deadline
) {
2384 machine_delay_until(check_owner_deadline
- cur_time
, check_owner_deadline
);
2387 /* Snoop the lock state */
2388 state
= ordered_load_mtx(lock
);
2391 /* Try to grab the lock. */
2392 if (os_atomic_cmpxchg(&lock
->lck_mtx_data
,
2393 0, LCK_MTX_THREAD_TO_STATE(thread
), acquire
)) {
2394 retval
= SPINWAIT_ACQUIRED
;
2404 * We've already kept a count via overall_deadline of how long we spun.
2405 * If dtrace is active, then we compute backwards to decide how
2408 * Note that we record a different probe id depending on whether
2409 * this is a direct or indirect mutex. This allows us to
2410 * penalize only lock groups that have debug/stats enabled
2411 * with dtrace processing if desired.
2413 if (__probable(lock
->lck_mtx_tag
!= LCK_MTX_TAG_INDIRECT
)) {
2414 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN
, lock
,
2415 mach_absolute_time() - (overall_deadline
- MutexSpin
));
2417 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN
, lock
,
2418 mach_absolute_time() - (overall_deadline
- MutexSpin
));
2420 /* The lockstat acquire event is recorded by the caller. */
2423 state
= ordered_load_mtx(lock
);
2425 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_SPIN_CODE
) | DBG_FUNC_END
,
2426 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state
)), lock
->lck_mtx_waiters
, retval
, 0);
2428 /* Spinwaiting is not useful on UP systems. */
2429 #pragma unused(lock, thread)
2430 int retval
= SPINWAIT_DID_NOT_SPIN
;
2431 #endif /* __SMP__ */
2432 if ((!has_interlock
) && (retval
!= SPINWAIT_ACQUIRED
)) {
2433 /* We must own either the lock or the interlock on return. */
2434 interlock_lock(lock
);
2441 * Common code for mutex locking as spinlock
2444 lck_mtx_lock_spin_internal(lck_mtx_t
*lock
, boolean_t allow_held_as_mutex
)
2448 interlock_lock(lock
);
2449 state
= ordered_load_mtx(lock
);
2450 if (LCK_MTX_STATE_TO_THREAD(state
)) {
2451 if (allow_held_as_mutex
) {
2452 lck_mtx_lock_contended(lock
, current_thread(), TRUE
);
2454 // "Always" variants can never block. If the lock is held and blocking is not allowed
2455 // then someone is mixing always and non-always calls on the same lock, which is
2457 panic("Attempting to block on a lock taken as spin-always %p", lock
);
2461 state
&= ARM_LCK_WAITERS
; // Preserve waiters bit
2462 state
|= (LCK_MTX_SPIN_TAG
| LCK_ILOCK
); // Add spin tag and maintain interlock
2463 ordered_store_mtx(lock
, state
);
2464 load_memory_barrier();
2467 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE
, lock
, 0);
2468 #endif /* CONFIG_DTRACE */
2472 * Routine: lck_mtx_lock_spin
2475 lck_mtx_lock_spin(lck_mtx_t
*lock
)
2477 lck_mtx_check_preemption(lock
);
2478 lck_mtx_lock_spin_internal(lock
, TRUE
);
2482 * Routine: lck_mtx_lock_spin_always
2485 lck_mtx_lock_spin_always(lck_mtx_t
*lock
)
2487 lck_mtx_lock_spin_internal(lock
, FALSE
);
2491 * Routine: lck_mtx_try_lock
2494 lck_mtx_try_lock(lck_mtx_t
*lock
)
2496 thread_t thread
= current_thread();
2498 lck_mtx_verify(lock
);
2499 if (os_atomic_cmpxchg(&lock
->lck_mtx_data
,
2500 0, LCK_MTX_THREAD_TO_STATE(thread
), acquire
)) {
2502 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE
, lock
, 0);
2503 #endif /* CONFIG_DTRACE */
2506 return lck_mtx_try_lock_contended(lock
, thread
);
2509 static boolean_t NOINLINE
2510 lck_mtx_try_lock_contended(lck_mtx_t
*lock
, thread_t thread
)
2512 thread_t holding_thread
;
2517 interlock_lock(lock
);
2518 state
= ordered_load_mtx(lock
);
2519 holding_thread
= LCK_MTX_STATE_TO_THREAD(state
);
2520 if (holding_thread
) {
2521 interlock_unlock(lock
);
2525 disable_preemption_for_thread(thread
);
2526 state
= ordered_load_mtx(lock
);
2527 if (state
& LCK_ILOCK
) {
2528 panic("Unexpected interlock set (%p)", lock
);
2530 holding_thread
= LCK_MTX_STATE_TO_THREAD(state
);
2531 if (holding_thread
) {
2532 enable_preemption();
2536 ordered_store_mtx(lock
, state
);
2538 waiters
= lck_mtx_lock_acquire(lock
, NULL
);
2539 state
= LCK_MTX_THREAD_TO_STATE(thread
);
2541 state
|= ARM_LCK_WAITERS
;
2544 state
|= LCK_ILOCK
; // Preserve interlock
2545 ordered_store_mtx(lock
, state
); // Set ownership
2546 interlock_unlock(lock
); // Release interlock, enable preemption
2548 ordered_store_mtx(lock
, state
); // Set ownership
2549 enable_preemption();
2551 load_memory_barrier();
2553 turnstile_cleanup();
2558 static inline boolean_t
2559 lck_mtx_try_lock_spin_internal(lck_mtx_t
*lock
, boolean_t allow_held_as_mutex
)
2563 if (!interlock_try(lock
)) {
2566 state
= ordered_load_mtx(lock
);
2567 if (LCK_MTX_STATE_TO_THREAD(state
)) {
2568 // Lock is held as mutex
2569 if (allow_held_as_mutex
) {
2570 interlock_unlock(lock
);
2572 // "Always" variants can never block. If the lock is held as a normal mutex
2573 // then someone is mixing always and non-always calls on the same lock, which is
2575 panic("Spin-mutex held as full mutex %p", lock
);
2579 state
&= ARM_LCK_WAITERS
; // Preserve waiters bit
2580 state
|= (LCK_MTX_SPIN_TAG
| LCK_ILOCK
); // Add spin tag and maintain interlock
2581 ordered_store_mtx(lock
, state
);
2582 load_memory_barrier();
2585 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE
, lock
, 0);
2586 #endif /* CONFIG_DTRACE */
2591 * Routine: lck_mtx_try_lock_spin
2594 lck_mtx_try_lock_spin(lck_mtx_t
*lock
)
2596 return lck_mtx_try_lock_spin_internal(lock
, TRUE
);
2600 * Routine: lck_mtx_try_lock_spin_always
2603 lck_mtx_try_lock_spin_always(lck_mtx_t
*lock
)
2605 return lck_mtx_try_lock_spin_internal(lock
, FALSE
);
2611 * Routine: lck_mtx_unlock
2614 lck_mtx_unlock(lck_mtx_t
*lock
)
2616 thread_t thread
= current_thread();
2618 boolean_t ilk_held
= FALSE
;
2620 lck_mtx_verify(lock
);
2622 state
= ordered_load_mtx(lock
);
2623 if (state
& LCK_ILOCK
) {
2624 if (LCK_MTX_STATE_TO_THREAD(state
) == (thread_t
)LCK_MTX_SPIN_TAG
) {
2625 ilk_held
= TRUE
; // Interlock is held by (presumably) this thread
2629 // Locked as a mutex
2630 if (os_atomic_cmpxchg(&lock
->lck_mtx_data
,
2631 LCK_MTX_THREAD_TO_STATE(thread
), 0, release
)) {
2633 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE
, lock
, 0);
2634 #endif /* CONFIG_DTRACE */
2638 lck_mtx_unlock_contended(lock
, thread
, ilk_held
);
2641 static void NOINLINE
2642 lck_mtx_unlock_contended(lck_mtx_t
*lock
, thread_t thread
, boolean_t ilk_held
)
2645 boolean_t cleanup
= FALSE
;
2648 state
= ordered_load_mtx(lock
);
2651 interlock_lock(lock
);
2652 state
= ordered_load_mtx(lock
);
2653 if (thread
!= LCK_MTX_STATE_TO_THREAD(state
)) {
2654 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock
);
2657 disable_preemption_for_thread(thread
);
2658 state
= ordered_load_mtx(lock
);
2659 if (state
& LCK_ILOCK
) {
2660 panic("lck_mtx_unlock(): Unexpected interlock set (%p)", lock
);
2662 if (thread
!= LCK_MTX_STATE_TO_THREAD(state
)) {
2663 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock
);
2666 ordered_store_mtx(lock
, state
);
2668 if (state
& ARM_LCK_WAITERS
) {
2669 if (lck_mtx_unlock_wakeup(lock
, thread
)) {
2670 state
= ARM_LCK_WAITERS
;
2678 state
&= ARM_LCK_WAITERS
; /* Clear state, retain waiters bit */
2682 ordered_store_mtx(lock
, state
);
2683 interlock_unlock(lock
);
2685 ordered_store_mtx(lock
, state
);
2686 enable_preemption();
2690 * Do not do any turnstile operations outside of this block.
2691 * lock/unlock is called at early stage of boot with single thread,
2692 * when turnstile is not yet initialized.
2693 * Even without contention we can come throught the slow path
2694 * if the mutex is acquired as a spin lock.
2696 turnstile_cleanup();
2700 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE
, lock
, 0);
2701 #endif /* CONFIG_DTRACE */
2705 * Routine: lck_mtx_assert
2708 lck_mtx_assert(lck_mtx_t
*lock
, unsigned int type
)
2710 thread_t thread
, holder
;
2713 state
= ordered_load_mtx(lock
);
2714 holder
= LCK_MTX_STATE_TO_THREAD(state
);
2715 if (holder
== (thread_t
)LCK_MTX_SPIN_TAG
) {
2716 // Lock is held in spin mode, owner is unknown.
2719 thread
= current_thread();
2720 if (type
== LCK_MTX_ASSERT_OWNED
) {
2721 if (thread
!= holder
) {
2722 panic("lck_mtx_assert(): mutex (%p) owned", lock
);
2724 } else if (type
== LCK_MTX_ASSERT_NOTOWNED
) {
2725 if (thread
== holder
) {
2726 panic("lck_mtx_assert(): mutex (%p) not owned", lock
);
2729 panic("lck_mtx_assert(): invalid arg (%u)", type
);
2734 * Routine: lck_mtx_ilk_unlock
2737 lck_mtx_ilk_unlock(lck_mtx_t
*lock
)
2739 interlock_unlock(lock
);
2744 * Routine: lck_mtx_convert_spin
2746 * Convert a mutex held for spin into a held full mutex
2749 lck_mtx_convert_spin(lck_mtx_t
*lock
)
2751 thread_t thread
= current_thread();
2755 state
= ordered_load_mtx(lock
);
2756 if (LCK_MTX_STATE_TO_THREAD(state
) == thread
) {
2757 return; // Already owned as mutex, return
2759 if ((state
& LCK_ILOCK
) == 0 || (LCK_MTX_STATE_TO_THREAD(state
) != (thread_t
)LCK_MTX_SPIN_TAG
)) {
2760 panic("lck_mtx_convert_spin: Not held as spinlock (%p)", lock
);
2762 state
&= ~(LCK_MTX_THREAD_MASK
); // Clear the spin tag
2763 ordered_store_mtx(lock
, state
);
2764 waiters
= lck_mtx_lock_acquire(lock
, NULL
); // Acquire to manage priority boosts
2765 state
= LCK_MTX_THREAD_TO_STATE(thread
);
2767 state
|= ARM_LCK_WAITERS
;
2771 ordered_store_mtx(lock
, state
); // Set ownership
2772 interlock_unlock(lock
); // Release interlock, enable preemption
2774 ordered_store_mtx(lock
, state
); // Set ownership
2775 enable_preemption();
2777 turnstile_cleanup();
2782 * Routine: lck_mtx_destroy
2789 if (lck
->lck_mtx_type
!= LCK_MTX_TYPE
) {
2790 panic("Destroying invalid mutex %p", lck
);
2792 if (lck
->lck_mtx_tag
== LCK_MTX_TAG_DESTROYED
) {
2793 panic("Destroying previously destroyed lock %p", lck
);
2795 lck_mtx_assert(lck
, LCK_MTX_ASSERT_NOTOWNED
);
2796 lck
->lck_mtx_tag
= LCK_MTX_TAG_DESTROYED
;
2797 lck_grp_lckcnt_decr(grp
, LCK_TYPE_MTX
);
2798 lck_grp_deallocate(grp
);
2803 * Routine: lck_spin_assert
2806 lck_spin_assert(lck_spin_t
*lock
, unsigned int type
)
2808 thread_t thread
, holder
;
2811 if (lock
->type
!= LCK_SPIN_TYPE
) {
2812 panic("Invalid spinlock %p", lock
);
2815 state
= lock
->lck_spin_data
;
2816 holder
= (thread_t
)(state
& ~LCK_ILOCK
);
2817 thread
= current_thread();
2818 if (type
== LCK_ASSERT_OWNED
) {
2820 panic("Lock not owned %p = %lx", lock
, state
);
2822 if (holder
!= thread
) {
2823 panic("Lock not owned by current thread %p = %lx", lock
, state
);
2825 if ((state
& LCK_ILOCK
) == 0) {
2826 panic("Lock bit not set %p = %lx", lock
, state
);
2828 } else if (type
== LCK_ASSERT_NOTOWNED
) {
2830 if (holder
== thread
) {
2831 panic("Lock owned by current thread %p = %lx", lock
, state
);
2835 panic("lck_spin_assert(): invalid arg (%u)", type
);
2840 lck_rw_lock_yield_shared(lck_rw_t
*lck
, boolean_t force_yield
)
2844 lck_rw_assert(lck
, LCK_RW_ASSERT_SHARED
);
2846 word
.data
= ordered_load_rw(lck
);
2847 if (word
.want_excl
|| word
.want_upgrade
|| force_yield
) {
2848 lck_rw_unlock_shared(lck
);
2850 lck_rw_lock_shared(lck
);
2858 * Routine: kdp_lck_mtx_lock_spin_is_acquired
2859 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2862 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t
*lck
)
2867 panic("panic: spinlock acquired check done outside of kernel debugger");
2869 state
= ordered_load_mtx(lck
);
2870 if (state
== LCK_MTX_TAG_DESTROYED
) {
2873 if (LCK_MTX_STATE_TO_THREAD(state
) || (state
& LCK_ILOCK
)) {
2880 kdp_lck_mtx_find_owner(__unused
struct waitq
* waitq
, event64_t event
, thread_waitinfo_t
* waitinfo
)
2882 lck_mtx_t
* mutex
= LCK_EVENT_TO_MUTEX(event
);
2883 waitinfo
->context
= VM_KERNEL_UNSLIDE_OR_PERM(mutex
);
2884 uintptr_t state
= ordered_load_mtx(mutex
);
2885 thread_t holder
= LCK_MTX_STATE_TO_THREAD(state
);
2886 if ((uintptr_t)holder
== (uintptr_t)LCK_MTX_SPIN_TAG
) {
2887 waitinfo
->owner
= STACKSHOT_WAITOWNER_MTXSPIN
;
2889 assertf(state
!= (uintptr_t)LCK_MTX_TAG_DESTROYED
, "state=0x%llx", (uint64_t)state
);
2890 assertf(state
!= (uintptr_t)LCK_MTX_TAG_INDIRECT
, "state=0x%llx", (uint64_t)state
);
2891 waitinfo
->owner
= thread_tid(holder
);
2896 kdp_rwlck_find_owner(__unused
struct waitq
* waitq
, event64_t event
, thread_waitinfo_t
* waitinfo
)
2898 lck_rw_t
*rwlck
= NULL
;
2899 switch (waitinfo
->wait_type
) {
2900 case kThreadWaitKernelRWLockRead
:
2901 rwlck
= READ_EVENT_TO_RWLOCK(event
);
2903 case kThreadWaitKernelRWLockWrite
:
2904 case kThreadWaitKernelRWLockUpgrade
:
2905 rwlck
= WRITE_EVENT_TO_RWLOCK(event
);
2908 panic("%s was called with an invalid blocking type", __FUNCTION__
);
2911 waitinfo
->context
= VM_KERNEL_UNSLIDE_OR_PERM(rwlck
);
2912 waitinfo
->owner
= thread_tid(rwlck
->lck_rw_owner
);