2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Locking primitives implementation
64 #define LOCK_PRIVATE 1
66 #include <mach_ldebug.h>
68 #include <kern/lock_stat.h>
69 #include <kern/locks.h>
70 #include <kern/kalloc.h>
71 #include <kern/misc_protos.h>
72 #include <kern/thread.h>
73 #include <kern/processor.h>
74 #include <kern/cpu_data.h>
75 #include <kern/cpu_number.h>
76 #include <kern/sched_prim.h>
77 #include <kern/debug.h>
80 #include <i386/machine_routines.h> /* machine_timeout_suspended() */
81 #include <machine/atomic.h>
82 #include <machine/machine_cpu.h>
84 #include <machine/atomic.h>
85 #include <sys/kdebug.h>
86 #include <i386/locks_i386_inlines.h>
89 #define DTRACE_RW_SHARED 0x0 //reader
90 #define DTRACE_RW_EXCL 0x1 //writer
91 #define DTRACE_NO_FLAG 0x0 //not applicable
92 #endif /* CONFIG_DTRACE */
94 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
95 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
96 #define LCK_RW_LCK_SHARED_CODE 0x102
97 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
98 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
99 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
101 #define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
102 #define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
103 #define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
104 #define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
105 #define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
106 #define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
107 #define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
108 #define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
111 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
113 unsigned int LcksOpts
=0;
115 #if DEVELOPMENT || DEBUG
116 unsigned int LckDisablePreemptCheck
= 0;
123 * Perform simple lock checks.
125 int uslock_check
= 1;
126 int max_lock_loops
= 100000000;
127 decl_simple_lock_data(extern , printf_lock
);
128 decl_simple_lock_data(extern , panic_lock
);
129 #endif /* USLOCK_DEBUG */
131 extern unsigned int not_in_kdp
;
134 * We often want to know the addresses of the callers
135 * of the various lock routines. However, this information
136 * is only used for debugging and statistics.
139 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
140 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
142 #define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
143 #define DECL_PC(pc) pc_t pc;
144 #else /* ANY_LOCK_DEBUG */
148 * Eliminate lint complaints about unused local pc variables.
150 #define OBTAIN_PC(pc) ++pc
152 #define OBTAIN_PC(pc)
154 #endif /* USLOCK_DEBUG */
157 * atomic exchange API is a low level abstraction of the operations
158 * to atomically read, modify, and write a pointer. This abstraction works
159 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
160 * well as the ARM exclusive instructions.
162 * atomic_exchange_begin() - begin exchange and retrieve current value
163 * atomic_exchange_complete() - conclude an exchange
164 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
167 atomic_exchange_begin32(uint32_t *target
, uint32_t *previous
, enum memory_order ord
)
171 (void)ord
; // Memory order not used
172 val
= os_atomic_load(target
, relaxed
);
178 atomic_exchange_complete32(uint32_t *target
, uint32_t previous
, uint32_t newval
, enum memory_order ord
)
180 return __c11_atomic_compare_exchange_strong((_Atomic
uint32_t *)target
, &previous
, newval
, ord
, memory_order_relaxed
);
184 atomic_exchange_abort(void) { }
187 atomic_test_and_set32(uint32_t *target
, uint32_t test_mask
, uint32_t set_mask
, enum memory_order ord
, boolean_t wait
)
189 uint32_t value
, prev
;
192 value
= atomic_exchange_begin32(target
, &prev
, ord
);
193 if (value
& test_mask
) {
197 atomic_exchange_abort();
201 if (atomic_exchange_complete32(target
, prev
, value
, ord
))
207 hw_atomic_test_and_set32(uint32_t *target
, uint32_t test_mask
, uint32_t set_mask
, enum memory_order ord
, boolean_t wait
)
209 return atomic_test_and_set32(target
, test_mask
, set_mask
, ord
, wait
);
213 * Portable lock package implementation of usimple_locks.
217 #define USLDBG(stmt) stmt
218 void usld_lock_init(usimple_lock_t
, unsigned short);
219 void usld_lock_pre(usimple_lock_t
, pc_t
);
220 void usld_lock_post(usimple_lock_t
, pc_t
);
221 void usld_unlock(usimple_lock_t
, pc_t
);
222 void usld_lock_try_pre(usimple_lock_t
, pc_t
);
223 void usld_lock_try_post(usimple_lock_t
, pc_t
);
224 int usld_lock_common_checks(usimple_lock_t
, char *);
225 #else /* USLOCK_DEBUG */
227 #endif /* USLOCK_DEBUG */
230 * Forward definitions
233 static void lck_rw_lock_shared_gen(lck_rw_t
*lck
);
234 static void lck_rw_lock_exclusive_gen(lck_rw_t
*lck
);
235 static boolean_t
lck_rw_lock_shared_to_exclusive_success(lck_rw_t
*lck
);
236 static boolean_t
lck_rw_lock_shared_to_exclusive_failure(lck_rw_t
*lck
, uint32_t prior_lock_state
);
237 static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t
*lck
, uint32_t prior_lock_state
);
238 static lck_rw_type_t
lck_rw_done_gen(lck_rw_t
*lck
, uint32_t prior_lock_state
);
239 void lck_rw_clear_promotions_x86(thread_t thread
);
240 static boolean_t
lck_rw_held_read_or_upgrade(lck_rw_t
*lock
);
241 static boolean_t
lck_rw_grab_want(lck_rw_t
*lock
);
242 static boolean_t
lck_rw_grab_shared(lck_rw_t
*lock
);
243 static void lck_mtx_unlock_wakeup_tail(lck_mtx_t
*mutex
, uint32_t state
, boolean_t indirect
);
244 static void lck_mtx_interlock_lock(lck_mtx_t
*mutex
, uint32_t *new_state
);
245 static void lck_mtx_interlock_lock_clear_flags(lck_mtx_t
*mutex
, uint32_t and_flags
, uint32_t *new_state
);
246 static int lck_mtx_interlock_try_lock(lck_mtx_t
*mutex
, uint32_t *new_state
);
247 static int lck_mtx_interlock_try_lock_set_flags(lck_mtx_t
*mutex
, uint32_t or_flags
, uint32_t *new_state
);
248 static boolean_t
lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t
*lock
, uint32_t *new_state
);
249 static boolean_t
lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t
*lock
, uint32_t *new_state
);
253 * Routine: lck_spin_alloc_init
262 if ((lck
= (lck_spin_t
*)kalloc(sizeof(lck_spin_t
))) != 0)
263 lck_spin_init(lck
, grp
, attr
);
269 * Routine: lck_spin_free
276 lck_spin_destroy(lck
, grp
);
277 kfree(lck
, sizeof(lck_spin_t
));
281 * Routine: lck_spin_init
287 __unused lck_attr_t
*attr
)
289 usimple_lock_init((usimple_lock_t
) lck
, 0);
291 lck_grp_reference(grp
);
292 lck_grp_lckcnt_incr(grp
, LCK_TYPE_SPIN
);
297 * Routine: lck_spin_destroy
304 if (lck
->interlock
== LCK_SPIN_TAG_DESTROYED
)
306 lck
->interlock
= LCK_SPIN_TAG_DESTROYED
;
308 lck_grp_lckcnt_decr(grp
, LCK_TYPE_SPIN
);
309 lck_grp_deallocate(grp
);
315 * Routine: lck_spin_lock
323 usimple_lock((usimple_lock_t
) lck
, grp
);
330 usimple_lock((usimple_lock_t
) lck
, NULL
);
334 * Routine: lck_spin_unlock
340 usimple_unlock((usimple_lock_t
) lck
);
344 lck_spin_try_lock_grp(
349 boolean_t lrval
= (boolean_t
)usimple_lock_try((usimple_lock_t
) lck
, grp
);
350 #if DEVELOPMENT || DEBUG
360 * Routine: lck_spin_try_lock
366 boolean_t lrval
= (boolean_t
)usimple_lock_try((usimple_lock_t
) lck
, LCK_GRP_NULL
);
367 #if DEVELOPMENT || DEBUG
376 * Routine: lck_spin_assert
379 lck_spin_assert(lck_spin_t
*lock
, unsigned int type
)
381 thread_t thread
, holder
;
384 if (__improbable(type
!= LCK_ASSERT_OWNED
&& type
!= LCK_ASSERT_NOTOWNED
)) {
385 panic("lck_spin_assert(): invalid arg (%u)", type
);
388 state
= lock
->interlock
;
389 holder
= (thread_t
)state
;
390 thread
= current_thread();
391 if (type
== LCK_ASSERT_OWNED
) {
392 if (__improbable(holder
== THREAD_NULL
)) {
393 panic("Lock not owned %p = %lx", lock
, state
);
395 if (__improbable(holder
!= thread
)) {
396 panic("Lock not owned by current thread %p = %lx", lock
, state
);
398 } else if (type
== LCK_ASSERT_NOTOWNED
) {
399 if (__improbable(holder
!= THREAD_NULL
)) {
400 if (holder
== thread
) {
401 panic("Lock owned by current thread %p = %lx", lock
, state
);
408 * Routine: kdp_lck_spin_is_acquired
409 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
410 * Returns: TRUE if lock is acquired.
413 kdp_lck_spin_is_acquired(lck_spin_t
*lck
) {
415 panic("panic: spinlock acquired check done outside of kernel debugger");
417 return (lck
->interlock
!= 0)? TRUE
: FALSE
;
421 * Initialize a usimple_lock.
423 * No change in preemption state.
428 __unused
unsigned short tag
)
430 #ifndef MACHINE_SIMPLE_LOCK
431 USLDBG(usld_lock_init(l
, tag
));
432 hw_lock_init(&l
->interlock
);
434 simple_lock_init((simple_lock_t
)l
,tag
);
438 volatile uint32_t spinlock_owner_cpu
= ~0;
439 volatile usimple_lock_t spinlock_timed_out
;
441 uint32_t spinlock_timeout_NMI(uintptr_t thread_addr
) {
444 for (i
= 0; i
< real_ncpus
; i
++) {
445 if ((cpu_data_ptr
[i
] != NULL
) && ((uintptr_t)cpu_data_ptr
[i
]->cpu_active_thread
== thread_addr
)) {
446 spinlock_owner_cpu
= i
;
447 if ((uint32_t) cpu_number() != i
) {
448 /* Cause NMI and panic on the owner's cpu */
449 NMIPI_panic(cpu_to_cpumask(i
), SPINLOCK_TIMEOUT
);
455 return spinlock_owner_cpu
;
459 * Acquire a usimple_lock.
461 * Returns with preemption disabled. Note
462 * that the hw_lock routines are responsible for
463 * maintaining preemption state.
468 LCK_GRP_ARG(lck_grp_t
*grp
))
470 #ifndef MACHINE_SIMPLE_LOCK
474 USLDBG(usld_lock_pre(l
, pc
));
476 if(__improbable(hw_lock_to(&l
->interlock
, LockTimeOutTSC
, grp
) == 0)) {
477 boolean_t uslock_acquired
= FALSE
;
478 while (machine_timeout_suspended()) {
480 if ((uslock_acquired
= hw_lock_to(&l
->interlock
, LockTimeOutTSC
, grp
)))
484 if (uslock_acquired
== FALSE
) {
486 uintptr_t lowner
= (uintptr_t)l
->interlock
.lock_data
;
487 spinlock_timed_out
= l
;
488 lock_cpu
= spinlock_timeout_NMI(lowner
);
489 panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx, time: %llu",
490 l
, lowner
, current_thread(), lock_cpu
, (uintptr_t)l
->interlock
.lock_data
, mach_absolute_time());
493 #if DEVELOPMENT || DEBUG
497 USLDBG(usld_lock_post(l
, pc
));
499 simple_lock((simple_lock_t
)l
, grp
);
502 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE
, l
, 0, (uintptr_t)LCK_GRP_PROBEARG(grp
));
508 * Release a usimple_lock.
510 * Returns with preemption enabled. Note
511 * that the hw_lock routines are responsible for
512 * maintaining preemption state.
518 #ifndef MACHINE_SIMPLE_LOCK
522 USLDBG(usld_unlock(l
, pc
));
523 #if DEVELOPMENT || DEBUG
526 hw_lock_unlock(&l
->interlock
);
528 simple_unlock_rwmb((simple_lock_t
)l
);
534 * Conditionally acquire a usimple_lock.
536 * On success, returns with preemption disabled.
537 * On failure, returns with preemption in the same state
538 * as when first invoked. Note that the hw_lock routines
539 * are responsible for maintaining preemption state.
541 * XXX No stats are gathered on a miss; I preserved this
542 * behavior from the original assembly-language code, but
543 * doesn't it make sense to log misses? XXX
550 #ifndef MACHINE_SIMPLE_LOCK
551 unsigned int success
;
555 USLDBG(usld_lock_try_pre(l
, pc
));
556 if ((success
= hw_lock_try(&l
->interlock
, grp
))) {
557 #if DEVELOPMENT || DEBUG
560 USLDBG(usld_lock_try_post(l
, pc
));
564 return(simple_lock_try((simple_lock_t
)l
, grp
));
569 * Acquire a usimple_lock while polling for pending cpu signals
570 * and spinning on a lock.
574 (usimple_lock_try_lock_mp_signal_safe_loop_deadline
)(usimple_lock_t l
,
576 LCK_GRP_ARG(lck_grp_t
*grp
))
578 boolean_t istate
= ml_get_interrupts_enabled();
580 if (deadline
< mach_absolute_time()) {
584 while (!simple_lock_try(l
, grp
)) {
586 cpu_signal_handler(NULL
);
588 if (deadline
< mach_absolute_time()) {
599 (usimple_lock_try_lock_loop
)(usimple_lock_t l
600 LCK_GRP_ARG(lck_grp_t
*grp
))
602 usimple_lock_try_lock_mp_signal_safe_loop_deadline(l
, ULLONG_MAX
, grp
);
606 (usimple_lock_try_lock_mp_signal_safe_loop_duration
)(usimple_lock_t l
,
608 LCK_GRP_ARG(lck_grp_t
*grp
))
611 uint64_t base_at
= mach_absolute_time();
612 uint64_t duration_at
;
614 nanoseconds_to_absolutetime(duration
, &duration_at
);
615 deadline
= base_at
+ duration_at
;
616 if (deadline
< base_at
) {
617 /* deadline has overflowed, make it saturate */
618 deadline
= ULLONG_MAX
;
621 return usimple_lock_try_lock_mp_signal_safe_loop_deadline(l
, deadline
, grp
);
626 * States of a usimple_lock. The default when initializing
627 * a usimple_lock is setting it up for debug checking.
629 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
630 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
631 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
632 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
633 #define USLOCK_CHECKING(l) (uslock_check && \
634 ((l)->debug.state & USLOCK_CHECKED))
637 * Initialize the debugging information contained
643 __unused
unsigned short tag
)
645 if (l
== USIMPLE_LOCK_NULL
)
646 panic("lock initialization: null lock pointer");
647 l
->lock_type
= USLOCK_TAG
;
648 l
->debug
.state
= uslock_check
? USLOCK_INITIALIZED
: 0;
649 l
->debug
.lock_cpu
= l
->debug
.unlock_cpu
= 0;
650 l
->debug
.lock_pc
= l
->debug
.unlock_pc
= INVALID_PC
;
651 l
->debug
.lock_thread
= l
->debug
.unlock_thread
= INVALID_THREAD
;
652 l
->debug
.duration
[0] = l
->debug
.duration
[1] = 0;
653 l
->debug
.unlock_cpu
= l
->debug
.unlock_cpu
= 0;
654 l
->debug
.unlock_pc
= l
->debug
.unlock_pc
= INVALID_PC
;
655 l
->debug
.unlock_thread
= l
->debug
.unlock_thread
= INVALID_THREAD
;
660 * These checks apply to all usimple_locks, not just
661 * those with USLOCK_CHECKED turned on.
664 usld_lock_common_checks(
668 if (l
== USIMPLE_LOCK_NULL
)
669 panic("%s: null lock pointer", caller
);
670 if (l
->lock_type
!= USLOCK_TAG
)
671 panic("%s: %p is not a usimple lock, 0x%x", caller
, l
, l
->lock_type
);
672 if (!(l
->debug
.state
& USLOCK_INIT
))
673 panic("%s: %p is not an initialized lock, 0x%x", caller
, l
, l
->debug
.state
);
674 return USLOCK_CHECKING(l
);
679 * Debug checks on a usimple_lock just before attempting
688 char caller
[] = "usimple_lock";
691 if (!usld_lock_common_checks(l
, caller
))
695 * Note that we have a weird case where we are getting a lock when we are]
696 * in the process of putting the system to sleep. We are running with no
697 * current threads, therefore we can't tell if we are trying to retake a lock
698 * we have or someone on the other processor has it. Therefore we just
699 * ignore this test if the locking thread is 0.
702 if ((l
->debug
.state
& USLOCK_TAKEN
) && l
->debug
.lock_thread
&&
703 l
->debug
.lock_thread
== (void *) current_thread()) {
704 printf("%s: lock %p already locked (at %p) by",
705 caller
, l
, l
->debug
.lock_pc
);
706 printf(" current thread %p (new attempt at pc %p)\n",
707 l
->debug
.lock_thread
, pc
);
710 mp_disable_preemption();
711 mp_enable_preemption();
716 * Debug checks on a usimple_lock just after acquiring it.
718 * Pre-emption has been disabled at this point,
719 * so we are safe in using cpu_number.
727 char caller
[] = "successful usimple_lock";
730 if (!usld_lock_common_checks(l
, caller
))
733 if (!((l
->debug
.state
& ~USLOCK_TAKEN
) == USLOCK_INITIALIZED
))
734 panic("%s: lock %p became uninitialized",
736 if ((l
->debug
.state
& USLOCK_TAKEN
))
737 panic("%s: lock 0x%p became TAKEN by someone else",
740 mycpu
= cpu_number();
741 l
->debug
.lock_thread
= (void *)current_thread();
742 l
->debug
.state
|= USLOCK_TAKEN
;
743 l
->debug
.lock_pc
= pc
;
744 l
->debug
.lock_cpu
= mycpu
;
749 * Debug checks on a usimple_lock just before
750 * releasing it. Note that the caller has not
751 * yet released the hardware lock.
753 * Preemption is still disabled, so there's
754 * no problem using cpu_number.
762 char caller
[] = "usimple_unlock";
765 if (!usld_lock_common_checks(l
, caller
))
768 mycpu
= cpu_number();
770 if (!(l
->debug
.state
& USLOCK_TAKEN
))
771 panic("%s: lock 0x%p hasn't been taken",
773 if (l
->debug
.lock_thread
!= (void *) current_thread())
774 panic("%s: unlocking lock 0x%p, owned by thread %p",
775 caller
, l
, l
->debug
.lock_thread
);
776 if (l
->debug
.lock_cpu
!= mycpu
) {
777 printf("%s: unlocking lock 0x%p on cpu 0x%x",
779 printf(" (acquired on cpu 0x%x)\n", l
->debug
.lock_cpu
);
783 l
->debug
.unlock_thread
= l
->debug
.lock_thread
;
784 l
->debug
.lock_thread
= INVALID_PC
;
785 l
->debug
.state
&= ~USLOCK_TAKEN
;
786 l
->debug
.unlock_pc
= pc
;
787 l
->debug
.unlock_cpu
= mycpu
;
792 * Debug checks on a usimple_lock just before
793 * attempting to acquire it.
795 * Preemption isn't guaranteed to be disabled.
802 char caller
[] = "usimple_lock_try";
804 if (!usld_lock_common_checks(l
, caller
))
810 * Debug checks on a usimple_lock just after
811 * successfully attempting to acquire it.
813 * Preemption has been disabled by the
814 * lock acquisition attempt, so it's safe
823 char caller
[] = "successful usimple_lock_try";
825 if (!usld_lock_common_checks(l
, caller
))
828 if (!((l
->debug
.state
& ~USLOCK_TAKEN
) == USLOCK_INITIALIZED
))
829 panic("%s: lock 0x%p became uninitialized",
831 if ((l
->debug
.state
& USLOCK_TAKEN
))
832 panic("%s: lock 0x%p became TAKEN by someone else",
835 mycpu
= cpu_number();
836 l
->debug
.lock_thread
= (void *) current_thread();
837 l
->debug
.state
|= USLOCK_TAKEN
;
838 l
->debug
.lock_pc
= pc
;
839 l
->debug
.lock_cpu
= mycpu
;
841 #endif /* USLOCK_DEBUG */
844 * Routine: lck_rw_alloc_init
852 if ((lck
= (lck_rw_t
*)kalloc(sizeof(lck_rw_t
))) != 0) {
853 bzero(lck
, sizeof(lck_rw_t
));
854 lck_rw_init(lck
, grp
, attr
);
861 * Routine: lck_rw_free
867 lck_rw_destroy(lck
, grp
);
868 kfree(lck
, sizeof(lck_rw_t
));
872 * Routine: lck_rw_init
880 lck_attr_t
*lck_attr
= (attr
!= LCK_ATTR_NULL
) ?
881 attr
: &LockDefaultLckAttr
;
883 hw_lock_byte_init(&lck
->lck_rw_interlock
);
884 lck
->lck_rw_want_write
= FALSE
;
885 lck
->lck_rw_want_upgrade
= FALSE
;
886 lck
->lck_rw_shared_count
= 0;
887 lck
->lck_rw_can_sleep
= TRUE
;
888 lck
->lck_r_waiting
= lck
->lck_w_waiting
= 0;
890 lck
->lck_rw_priv_excl
= ((lck_attr
->lck_attr_val
&
891 LCK_ATTR_RW_SHARED_PRIORITY
) == 0);
893 lck_grp_reference(grp
);
894 lck_grp_lckcnt_incr(grp
, LCK_TYPE_RW
);
898 * Routine: lck_rw_destroy
905 if (lck
->lck_rw_tag
== LCK_RW_TAG_DESTROYED
)
908 lck_rw_assert(lck
, LCK_RW_ASSERT_NOTHELD
);
910 lck
->lck_rw_tag
= LCK_RW_TAG_DESTROYED
;
911 lck_grp_lckcnt_decr(grp
, LCK_TYPE_RW
);
912 lck_grp_deallocate(grp
);
917 * Sleep locks. These use the same data structure and algorithm
918 * as the spin locks, but the process sleeps while it is waiting
919 * for the lock. These work on uniprocessor systems.
922 #define DECREMENTER_TIMEOUT 1000000
925 * We disable interrupts while holding the RW interlock to prevent an
926 * interrupt from exacerbating hold time.
927 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
929 static inline boolean_t
930 lck_interlock_lock(lck_rw_t
*lck
)
934 istate
= ml_set_interrupts_enabled(FALSE
);
935 hw_lock_byte_lock(&lck
->lck_rw_interlock
);
940 lck_interlock_unlock(lck_rw_t
*lck
, boolean_t istate
)
942 hw_lock_byte_unlock(&lck
->lck_rw_interlock
);
943 ml_set_interrupts_enabled(istate
);
947 * This inline is used when busy-waiting for an rw lock.
948 * If interrupts were disabled when the lock primitive was called,
949 * we poll the IPI handler for pending tlb flushes.
950 * XXX This is a hack to avoid deadlocking on the pmap_system_lock.
953 lck_rw_lock_pause(boolean_t interrupts_enabled
)
955 if (!interrupts_enabled
)
956 handle_pending_TLB_flushes();
960 static inline boolean_t
961 lck_rw_held_read_or_upgrade(lck_rw_t
*lock
)
963 if (ordered_load(&lock
->data
) & (LCK_RW_SHARED_MASK
| LCK_RW_INTERLOCK
| LCK_RW_WANT_UPGRADE
))
969 * compute the deadline to spin against when
970 * waiting for a change of state on a lck_rw_t
972 static inline uint64_t
973 lck_rw_deadline_for_spin(lck_rw_t
*lck
)
975 if (lck
->lck_rw_can_sleep
) {
976 if (lck
->lck_r_waiting
|| lck
->lck_w_waiting
|| lck
->lck_rw_shared_count
> machine_info
.max_cpus
) {
978 * there are already threads waiting on this lock... this
979 * implies that they have spun beyond their deadlines waiting for
980 * the desired state to show up so we will not bother spinning at this time...
982 * the current number of threads sharing this lock exceeds our capacity to run them
983 * concurrently and since all states we're going to spin for require the rw_shared_count
984 * to be at 0, we'll not bother spinning since the latency for this to happen is
987 return (mach_absolute_time());
989 return (mach_absolute_time() + MutexSpin
);
991 return (mach_absolute_time() + (100000LL * 1000000000LL));
996 * Spin while interlock is held.
1000 lck_rw_interlock_spin(lck_rw_t
*lock
)
1002 while (ordered_load(&lock
->data
) & LCK_RW_INTERLOCK
) {
1008 lck_rw_grab_want(lck_rw_t
*lock
)
1010 uint32_t data
, prev
;
1013 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_relaxed
);
1014 if ((data
& LCK_RW_INTERLOCK
) == 0)
1016 atomic_exchange_abort();
1017 lck_rw_interlock_spin(lock
);
1019 if (data
& LCK_RW_WANT_WRITE
) {
1020 atomic_exchange_abort();
1023 data
|= LCK_RW_WANT_WRITE
;
1024 return atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_relaxed
);
1028 lck_rw_grab_shared(lck_rw_t
*lock
)
1030 uint32_t data
, prev
;
1033 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1034 if ((data
& LCK_RW_INTERLOCK
) == 0)
1036 atomic_exchange_abort();
1037 lck_rw_interlock_spin(lock
);
1039 if (data
& (LCK_RW_WANT_WRITE
| LCK_RW_WANT_UPGRADE
)) {
1040 if (((data
& LCK_RW_SHARED_MASK
) == 0) || (data
& LCK_RW_PRIV_EXCL
)) {
1041 atomic_exchange_abort();
1045 data
+= LCK_RW_SHARED_READER
;
1046 return atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
);
1050 * Routine: lck_rw_lock_exclusive
1053 lck_rw_lock_exclusive_gen(
1056 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(lck
);
1057 uint64_t deadline
= 0;
1061 wait_result_t res
= 0;
1062 boolean_t istate
= -1;
1065 boolean_t dtrace_ls_initialized
= FALSE
;
1066 boolean_t dtrace_rwl_excl_spin
, dtrace_rwl_excl_block
, dtrace_ls_enabled
= FALSE
;
1067 uint64_t wait_interval
= 0;
1068 int readers_at_sleep
= 0;
1072 * Try to acquire the lck_rw_want_write bit.
1074 while ( !lck_rw_grab_want(lck
)) {
1077 if (dtrace_ls_initialized
== FALSE
) {
1078 dtrace_ls_initialized
= TRUE
;
1079 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
1080 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
1081 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
1082 if (dtrace_ls_enabled
) {
1084 * Either sleeping or spinning is happening,
1085 * start a timing of our delay interval now.
1087 readers_at_sleep
= lck
->lck_rw_shared_count
;
1088 wait_interval
= mach_absolute_time();
1093 istate
= ml_get_interrupts_enabled();
1095 deadline
= lck_rw_deadline_for_spin(lck
);
1097 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1099 while (((gotlock
= lck_rw_grab_want(lck
)) == 0) && mach_absolute_time() < deadline
)
1100 lck_rw_lock_pause(istate
);
1102 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_END
, trace_lck
, 0, 0, gotlock
, 0);
1107 * if we get here, the deadline has expired w/o us
1108 * being able to grab the lock exclusively
1109 * check to see if we're allowed to do a thread_block
1111 if (lck
->lck_rw_can_sleep
) {
1113 istate
= lck_interlock_lock(lck
);
1115 if (lck
->lck_rw_want_write
) {
1117 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1119 lck
->lck_w_waiting
= TRUE
;
1121 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite
);
1122 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
),
1123 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1124 lck_interlock_unlock(lck
, istate
);
1126 if (res
== THREAD_WAITING
) {
1127 res
= thread_block(THREAD_CONTINUE_NULL
);
1130 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_END
, trace_lck
, res
, slept
, 0, 0);
1132 lck
->lck_rw_want_write
= TRUE
;
1133 lck_interlock_unlock(lck
, istate
);
1139 * Wait for readers (and upgrades) to finish...
1140 * the test for these conditions must be done simultaneously with
1141 * a check of the interlock not being held since
1142 * the rw_shared_count will drop to 0 first and then want_upgrade
1143 * will be set to 1 in the shared_to_exclusive scenario... those
1144 * adjustments are done behind the interlock and represent an
1145 * atomic change in state and must be considered as such
1146 * however, once we see the read count at 0, the want_upgrade not set
1147 * and the interlock not held, we are safe to proceed
1149 while (lck_rw_held_read_or_upgrade(lck
)) {
1153 * Either sleeping or spinning is happening, start
1154 * a timing of our delay interval now. If we set it
1155 * to -1 we don't have accurate data so we cannot later
1156 * decide to record a dtrace spin or sleep event.
1158 if (dtrace_ls_initialized
== FALSE
) {
1159 dtrace_ls_initialized
= TRUE
;
1160 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
1161 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
1162 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
1163 if (dtrace_ls_enabled
) {
1165 * Either sleeping or spinning is happening,
1166 * start a timing of our delay interval now.
1168 readers_at_sleep
= lck
->lck_rw_shared_count
;
1169 wait_interval
= mach_absolute_time();
1174 istate
= ml_get_interrupts_enabled();
1176 deadline
= lck_rw_deadline_for_spin(lck
);
1178 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1180 while ((lockheld
= lck_rw_held_read_or_upgrade(lck
)) && mach_absolute_time() < deadline
)
1181 lck_rw_lock_pause(istate
);
1183 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_END
, trace_lck
, 0, 0, lockheld
, 0);
1188 * if we get here, the deadline has expired w/o us
1189 * being able to grab the lock exclusively
1190 * check to see if we're allowed to do a thread_block
1192 if (lck
->lck_rw_can_sleep
) {
1194 istate
= lck_interlock_lock(lck
);
1196 if (lck
->lck_rw_shared_count
!= 0 || lck
->lck_rw_want_upgrade
) {
1197 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1199 lck
->lck_w_waiting
= TRUE
;
1201 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite
);
1202 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
),
1203 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1204 lck_interlock_unlock(lck
, istate
);
1206 if (res
== THREAD_WAITING
) {
1207 res
= thread_block(THREAD_CONTINUE_NULL
);
1210 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_END
, trace_lck
, res
, slept
, 0, 0);
1212 lck_interlock_unlock(lck
, istate
);
1214 * must own the lock now, since we checked for
1215 * readers or upgrade owner behind the interlock
1216 * no need for a call to 'lck_rw_held_read_or_upgrade'
1225 * Decide what latencies we suffered that are Dtrace events.
1226 * If we have set wait_interval, then we either spun or slept.
1227 * At least we get out from under the interlock before we record
1228 * which is the best we can do here to minimize the impact
1230 * If we have set wait_interval to -1, then dtrace was not enabled when we
1231 * started sleeping/spinning so we don't record this event.
1233 if (dtrace_ls_enabled
== TRUE
) {
1235 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN
, lck
,
1236 mach_absolute_time() - wait_interval
, 1);
1239 * For the blocking case, we also record if when we blocked
1240 * it was held for read or write, and how many readers.
1241 * Notice that above we recorded this before we dropped
1242 * the interlock so the count is accurate.
1244 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK
, lck
,
1245 mach_absolute_time() - wait_interval
, 1,
1246 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1249 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE
, lck
, 1);
1254 * Routine: lck_rw_done
1257 lck_rw_type_t
lck_rw_done(lck_rw_t
*lock
)
1259 uint32_t data
, prev
;
1262 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_release_smp
);
1263 if (data
& LCK_RW_INTERLOCK
) { /* wait for interlock to clear */
1264 atomic_exchange_abort();
1265 lck_rw_interlock_spin(lock
);
1268 if (data
& LCK_RW_SHARED_MASK
) {
1269 data
-= LCK_RW_SHARED_READER
;
1270 if ((data
& LCK_RW_SHARED_MASK
) == 0) /* if reader count has now gone to 0, check for waiters */
1272 } else { /* if reader count == 0, must be exclusive lock */
1273 if (data
& LCK_RW_WANT_UPGRADE
) {
1274 data
&= ~(LCK_RW_WANT_UPGRADE
);
1276 if (data
& LCK_RW_WANT_WRITE
)
1277 data
&= ~(LCK_RW_WANT_EXCL
);
1278 else /* lock is not 'owned', panic */
1279 panic("Releasing non-exclusive RW lock without a reader refcount!");
1282 if (prev
& LCK_RW_W_WAITING
) {
1283 data
&= ~(LCK_RW_W_WAITING
);
1284 if ((prev
& LCK_RW_PRIV_EXCL
) == 0)
1285 data
&= ~(LCK_RW_R_WAITING
);
1287 data
&= ~(LCK_RW_R_WAITING
);
1289 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_release_smp
))
1293 return lck_rw_done_gen(lock
, prev
);
1297 * Routine: lck_rw_done_gen
1299 * called from lck_rw_done()
1300 * prior_lock_state is the value in the 1st
1301 * word of the lock at the time of a successful
1302 * atomic compare and exchange with the new value...
1303 * it represents the state of the lock before we
1304 * decremented the rw_shared_count or cleared either
1305 * rw_want_upgrade or rw_want_write and
1306 * the lck_x_waiting bits... since the wrapper
1307 * routine has already changed the state atomically,
1308 * we just need to decide if we should
1309 * wake up anyone and what value to return... we do
1310 * this by examining the state of the lock before
1313 static lck_rw_type_t
1316 uint32_t prior_lock_state
)
1319 lck_rw_type_t lock_type
;
1321 uint32_t rwlock_count
;
1323 thread
= current_thread();
1324 rwlock_count
= thread
->rwlock_count
--;
1325 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1327 if (lck
->lck_rw_can_sleep
) {
1329 * prior_lock state is a snapshot of the 1st word of the
1330 * lock in question... we'll fake up a pointer to it
1331 * and carefully not access anything beyond whats defined
1332 * in the first word of a lck_rw_t
1335 if (fake_lck
->lck_rw_shared_count
<= 1) {
1336 if (fake_lck
->lck_w_waiting
) {
1337 thread_wakeup(RW_LOCK_WRITER_EVENT(lck
));
1340 if (!(fake_lck
->lck_rw_priv_excl
&& fake_lck
->lck_w_waiting
) && fake_lck
->lck_r_waiting
) {
1341 thread_wakeup(RW_LOCK_READER_EVENT(lck
));
1345 if (rwlock_count
== 0) {
1346 panic("rw lock count underflow for thread %p", thread
);
1349 /* Check if dropping the lock means that we need to unpromote */
1351 if ((rwlock_count
== 1 /* field now 0 */) && (thread
->sched_flags
& TH_SFLAG_RW_PROMOTED
)) {
1352 /* sched_flags checked without lock, but will be rechecked while clearing */
1353 lck_rw_clear_promotion(thread
, unslide_for_kdebug(lck
));
1356 if (fake_lck
->lck_rw_shared_count
) {
1357 lock_type
= LCK_RW_TYPE_SHARED
;
1359 lock_type
= LCK_RW_TYPE_EXCLUSIVE
;
1363 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE
, lck
, lock_type
== LCK_RW_TYPE_SHARED
? 0 : 1);
1371 * Routine: lck_rw_unlock
1376 lck_rw_type_t lck_rw_type
)
1378 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1379 lck_rw_unlock_shared(lck
);
1380 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1381 lck_rw_unlock_exclusive(lck
);
1383 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type
);
1388 * Routine: lck_rw_unlock_shared
1391 lck_rw_unlock_shared(
1396 assertf(lck
->lck_rw_shared_count
> 0, "lck %p has shared_count=0x%x", lck
, lck
->lck_rw_shared_count
);
1397 ret
= lck_rw_done(lck
);
1399 if (ret
!= LCK_RW_TYPE_SHARED
)
1400 panic("lck_rw_unlock_shared(): lock %p held in mode: %d\n", lck
, ret
);
1405 * Routine: lck_rw_unlock_exclusive
1408 lck_rw_unlock_exclusive(
1413 ret
= lck_rw_done(lck
);
1415 if (ret
!= LCK_RW_TYPE_EXCLUSIVE
)
1416 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret
);
1421 * Routine: lck_rw_lock
1426 lck_rw_type_t lck_rw_type
)
1428 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1429 lck_rw_lock_shared(lck
);
1430 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1431 lck_rw_lock_exclusive(lck
);
1433 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type
);
1437 * Routine: lck_rw_lock_shared
1440 lck_rw_lock_shared(lck_rw_t
*lock
)
1442 uint32_t data
, prev
;
1444 current_thread()->rwlock_count
++;
1446 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1447 if (data
& (LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
| LCK_RW_INTERLOCK
)) {
1448 atomic_exchange_abort();
1449 if (lock
->lck_rw_can_sleep
) {
1450 lck_rw_lock_shared_gen(lock
);
1457 data
+= LCK_RW_SHARED_READER
;
1458 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
))
1463 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE
, lock
, DTRACE_RW_SHARED
);
1464 #endif /* CONFIG_DTRACE */
1469 * Routine: lck_rw_lock_shared_gen
1471 * assembly fast path code has determined that this lock
1472 * is held exclusively... this is where we spin/block
1473 * until we can acquire the lock in the shared mode
1476 lck_rw_lock_shared_gen(
1479 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(lck
);
1480 uint64_t deadline
= 0;
1483 wait_result_t res
= 0;
1484 boolean_t istate
= -1;
1487 uint64_t wait_interval
= 0;
1488 int readers_at_sleep
= 0;
1489 boolean_t dtrace_ls_initialized
= FALSE
;
1490 boolean_t dtrace_rwl_shared_spin
, dtrace_rwl_shared_block
, dtrace_ls_enabled
= FALSE
;
1493 while ( !lck_rw_grab_shared(lck
)) {
1496 if (dtrace_ls_initialized
== FALSE
) {
1497 dtrace_ls_initialized
= TRUE
;
1498 dtrace_rwl_shared_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_SPIN
] != 0);
1499 dtrace_rwl_shared_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_BLOCK
] != 0);
1500 dtrace_ls_enabled
= dtrace_rwl_shared_spin
|| dtrace_rwl_shared_block
;
1501 if (dtrace_ls_enabled
) {
1503 * Either sleeping or spinning is happening,
1504 * start a timing of our delay interval now.
1506 readers_at_sleep
= lck
->lck_rw_shared_count
;
1507 wait_interval
= mach_absolute_time();
1512 istate
= ml_get_interrupts_enabled();
1514 deadline
= lck_rw_deadline_for_spin(lck
);
1516 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_START
,
1517 trace_lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, 0, 0);
1519 while (((gotlock
= lck_rw_grab_shared(lck
)) == 0) && mach_absolute_time() < deadline
)
1520 lck_rw_lock_pause(istate
);
1522 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_END
,
1523 trace_lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, gotlock
, 0);
1528 * if we get here, the deadline has expired w/o us
1529 * being able to grab the lock for read
1530 * check to see if we're allowed to do a thread_block
1532 if (lck
->lck_rw_can_sleep
) {
1534 istate
= lck_interlock_lock(lck
);
1536 if ((lck
->lck_rw_want_write
|| lck
->lck_rw_want_upgrade
) &&
1537 ((lck
->lck_rw_shared_count
== 0) || lck
->lck_rw_priv_excl
)) {
1539 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_START
,
1540 trace_lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, 0, 0);
1542 lck
->lck_r_waiting
= TRUE
;
1544 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead
);
1545 res
= assert_wait(RW_LOCK_READER_EVENT(lck
),
1546 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1547 lck_interlock_unlock(lck
, istate
);
1549 if (res
== THREAD_WAITING
) {
1550 res
= thread_block(THREAD_CONTINUE_NULL
);
1553 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_END
,
1554 trace_lck
, res
, slept
, 0, 0);
1556 lck
->lck_rw_shared_count
++;
1557 lck_interlock_unlock(lck
, istate
);
1564 if (dtrace_ls_enabled
== TRUE
) {
1566 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN
, lck
, mach_absolute_time() - wait_interval
, 0);
1568 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK
, lck
,
1569 mach_absolute_time() - wait_interval
, 0,
1570 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1573 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE
, lck
, 0);
1579 * Routine: lck_rw_lock_exclusive
1583 lck_rw_lock_exclusive(lck_rw_t
*lock
)
1585 current_thread()->rwlock_count
++;
1586 if (atomic_test_and_set32(&lock
->data
,
1587 (LCK_RW_SHARED_MASK
| LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
| LCK_RW_INTERLOCK
),
1588 LCK_RW_WANT_EXCL
, memory_order_acquire_smp
, FALSE
)) {
1590 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE
, lock
, DTRACE_RW_EXCL
);
1591 #endif /* CONFIG_DTRACE */
1593 lck_rw_lock_exclusive_gen(lock
);
1598 * Routine: lck_rw_lock_shared_to_exclusive
1600 * False returned upon failure, in this case the shared lock is dropped.
1604 lck_rw_lock_shared_to_exclusive(lck_rw_t
*lock
)
1606 uint32_t data
, prev
;
1609 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1610 if (data
& LCK_RW_INTERLOCK
) {
1611 atomic_exchange_abort();
1612 lck_rw_interlock_spin(lock
);
1615 if (data
& LCK_RW_WANT_UPGRADE
) {
1616 data
-= LCK_RW_SHARED_READER
;
1617 if ((data
& LCK_RW_SHARED_MASK
) == 0) /* we were the last reader */
1618 data
&= ~(LCK_RW_W_WAITING
); /* so clear the wait indicator */
1619 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
))
1620 return lck_rw_lock_shared_to_exclusive_failure(lock
, prev
);
1622 data
|= LCK_RW_WANT_UPGRADE
; /* ask for WANT_UPGRADE */
1623 data
-= LCK_RW_SHARED_READER
; /* and shed our read count */
1624 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
))
1629 /* we now own the WANT_UPGRADE */
1630 if (data
& LCK_RW_SHARED_MASK
) /* check to see if all of the readers are drained */
1631 lck_rw_lock_shared_to_exclusive_success(lock
); /* if not, we need to go wait */
1633 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
, lock
, 0);
1640 * Routine: lck_rw_lock_shared_to_exclusive_failure
1642 * assembly fast path code has already dropped our read
1643 * count and determined that someone else owns 'lck_rw_want_upgrade'
1644 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1645 * all we need to do here is determine if a wakeup is needed
1648 lck_rw_lock_shared_to_exclusive_failure(
1650 uint32_t prior_lock_state
)
1653 thread_t thread
= current_thread();
1654 uint32_t rwlock_count
;
1656 /* Check if dropping the lock means that we need to unpromote */
1657 rwlock_count
= thread
->rwlock_count
--;
1659 if (rwlock_count
== 0) {
1660 panic("rw lock count underflow for thread %p", thread
);
1663 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1665 if (fake_lck
->lck_w_waiting
&& fake_lck
->lck_rw_shared_count
== 1) {
1667 * Someone else has requested upgrade.
1668 * Since we've released the read lock, wake
1669 * him up if he's blocked waiting
1671 thread_wakeup(RW_LOCK_WRITER_EVENT(lck
));
1674 if ((rwlock_count
== 1 /* field now 0 */) && (thread
->sched_flags
& TH_SFLAG_RW_PROMOTED
)) {
1675 /* sched_flags checked without lock, but will be rechecked while clearing */
1676 lck_rw_clear_promotion(thread
, unslide_for_kdebug(lck
));
1679 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_CODE
) | DBG_FUNC_NONE
,
1680 VM_KERNEL_UNSLIDE_OR_PERM(lck
), lck
->lck_rw_shared_count
, lck
->lck_rw_want_upgrade
, 0, 0);
1687 * Routine: lck_rw_lock_shared_to_exclusive_failure
1689 * assembly fast path code has already dropped our read
1690 * count and successfully acquired 'lck_rw_want_upgrade'
1691 * we just need to wait for the rest of the readers to drain
1692 * and then we can return as the exclusive holder of this lock
1695 lck_rw_lock_shared_to_exclusive_success(
1698 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(lck
);
1699 uint64_t deadline
= 0;
1701 int still_shared
= 0;
1703 boolean_t istate
= -1;
1706 uint64_t wait_interval
= 0;
1707 int readers_at_sleep
= 0;
1708 boolean_t dtrace_ls_initialized
= FALSE
;
1709 boolean_t dtrace_rwl_shared_to_excl_spin
, dtrace_rwl_shared_to_excl_block
, dtrace_ls_enabled
= FALSE
;
1712 while (lck
->lck_rw_shared_count
!= 0) {
1715 if (dtrace_ls_initialized
== FALSE
) {
1716 dtrace_ls_initialized
= TRUE
;
1717 dtrace_rwl_shared_to_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
] != 0);
1718 dtrace_rwl_shared_to_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
] != 0);
1719 dtrace_ls_enabled
= dtrace_rwl_shared_to_excl_spin
|| dtrace_rwl_shared_to_excl_block
;
1720 if (dtrace_ls_enabled
) {
1722 * Either sleeping or spinning is happening,
1723 * start a timing of our delay interval now.
1725 readers_at_sleep
= lck
->lck_rw_shared_count
;
1726 wait_interval
= mach_absolute_time();
1731 istate
= ml_get_interrupts_enabled();
1733 deadline
= lck_rw_deadline_for_spin(lck
);
1735 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_START
,
1736 trace_lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1738 while ((still_shared
= lck
->lck_rw_shared_count
) && mach_absolute_time() < deadline
)
1739 lck_rw_lock_pause(istate
);
1741 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_END
,
1742 trace_lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1747 * if we get here, the deadline has expired w/o
1748 * the rw_shared_count having drained to 0
1749 * check to see if we're allowed to do a thread_block
1751 if (lck
->lck_rw_can_sleep
) {
1753 istate
= lck_interlock_lock(lck
);
1755 if (lck
->lck_rw_shared_count
!= 0) {
1756 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_START
,
1757 trace_lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1759 lck
->lck_w_waiting
= TRUE
;
1761 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade
);
1762 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
),
1763 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1764 lck_interlock_unlock(lck
, istate
);
1766 if (res
== THREAD_WAITING
) {
1767 res
= thread_block(THREAD_CONTINUE_NULL
);
1770 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_END
,
1771 trace_lck
, res
, slept
, 0, 0);
1773 lck_interlock_unlock(lck
, istate
);
1780 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1782 if (dtrace_ls_enabled
== TRUE
) {
1784 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
, lck
, mach_absolute_time() - wait_interval
, 0);
1786 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
, lck
,
1787 mach_absolute_time() - wait_interval
, 1,
1788 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1791 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
, lck
, 1);
1797 * Routine: lck_rw_lock_exclusive_to_shared
1800 void lck_rw_lock_exclusive_to_shared(lck_rw_t
*lock
)
1802 uint32_t data
, prev
;
1805 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_release_smp
);
1806 if (data
& LCK_RW_INTERLOCK
) {
1807 atomic_exchange_abort();
1808 lck_rw_interlock_spin(lock
); /* wait for interlock to clear */
1811 data
+= LCK_RW_SHARED_READER
;
1812 if (data
& LCK_RW_WANT_UPGRADE
)
1813 data
&= ~(LCK_RW_WANT_UPGRADE
);
1815 data
&= ~(LCK_RW_WANT_EXCL
);
1816 if (!((prev
& LCK_RW_W_WAITING
) && (prev
& LCK_RW_PRIV_EXCL
)))
1817 data
&= ~(LCK_RW_W_WAITING
);
1818 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_release_smp
))
1822 return lck_rw_lock_exclusive_to_shared_gen(lock
, prev
);
1827 * Routine: lck_rw_lock_exclusive_to_shared_gen
1829 * assembly fast path has already dropped
1830 * our exclusive state and bumped lck_rw_shared_count
1831 * all we need to do here is determine if anyone
1832 * needs to be awakened.
1835 lck_rw_lock_exclusive_to_shared_gen(
1837 uint32_t prior_lock_state
)
1839 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(lck
);
1842 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1844 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_START
,
1845 trace_lck
, fake_lck
->lck_rw_want_write
, fake_lck
->lck_rw_want_upgrade
, 0, 0);
1848 * don't wake up anyone waiting to take the lock exclusively
1849 * since we hold a read count... when the read count drops to 0,
1850 * the writers will be woken.
1852 * wake up any waiting readers if we don't have any writers waiting,
1853 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1855 if (!(fake_lck
->lck_rw_priv_excl
&& fake_lck
->lck_w_waiting
) && fake_lck
->lck_r_waiting
)
1856 thread_wakeup(RW_LOCK_READER_EVENT(lck
));
1858 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_END
,
1859 trace_lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, lck
->lck_rw_shared_count
, 0);
1862 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE
, lck
, 0);
1868 * Routine: lck_rw_try_lock
1873 lck_rw_type_t lck_rw_type
)
1875 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1876 return(lck_rw_try_lock_shared(lck
));
1877 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1878 return(lck_rw_try_lock_exclusive(lck
));
1880 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type
);
1885 * Routine: lck_rw_try_lock_shared
1888 boolean_t
lck_rw_try_lock_shared(lck_rw_t
*lock
)
1890 uint32_t data
, prev
;
1893 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1894 if (data
& LCK_RW_INTERLOCK
) {
1895 atomic_exchange_abort();
1896 lck_rw_interlock_spin(lock
);
1899 if (data
& (LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
)) {
1900 atomic_exchange_abort();
1901 return FALSE
; /* lock is busy */
1903 data
+= LCK_RW_SHARED_READER
; /* Increment reader refcount */
1904 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
))
1908 current_thread()->rwlock_count
++;
1909 /* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
1911 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE
, lock
, DTRACE_RW_SHARED
);
1912 #endif /* CONFIG_DTRACE */
1918 * Routine: lck_rw_try_lock_exclusive
1921 boolean_t
lck_rw_try_lock_exclusive(lck_rw_t
*lock
)
1923 uint32_t data
, prev
;
1926 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1927 if (data
& LCK_RW_INTERLOCK
) {
1928 atomic_exchange_abort();
1929 lck_rw_interlock_spin(lock
);
1932 if (data
& (LCK_RW_SHARED_MASK
| LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
)) {
1933 atomic_exchange_abort();
1934 return FALSE
; /* can't get it */
1936 data
|= LCK_RW_WANT_EXCL
;
1937 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
))
1942 current_thread()->rwlock_count
++;
1944 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE
, lock
, DTRACE_RW_EXCL
);
1945 #endif /* CONFIG_DTRACE */
1956 case LCK_RW_ASSERT_SHARED
:
1957 if (lck
->lck_rw_shared_count
!= 0) {
1961 case LCK_RW_ASSERT_EXCLUSIVE
:
1962 if ((lck
->lck_rw_want_write
||
1963 lck
->lck_rw_want_upgrade
) &&
1964 lck
->lck_rw_shared_count
== 0) {
1968 case LCK_RW_ASSERT_HELD
:
1969 if (lck
->lck_rw_want_write
||
1970 lck
->lck_rw_want_upgrade
||
1971 lck
->lck_rw_shared_count
!= 0) {
1975 case LCK_RW_ASSERT_NOTHELD
:
1976 if (!(lck
->lck_rw_want_write
||
1977 lck
->lck_rw_want_upgrade
||
1978 lck
->lck_rw_shared_count
!= 0)) {
1986 panic("rw lock (%p)%s held (mode=%u), first word %08x\n", lck
, (type
== LCK_RW_ASSERT_NOTHELD
? "" : " not"), type
, *(uint32_t *)lck
);
1989 /* On return to userspace, this routine is called if the rwlock_count is somehow imbalanced */
1994 lck_rw_clear_promotions_x86(thread_t thread
)
1997 /* It's fatal to leave a RW lock locked and return to userspace */
1998 panic("%u rw lock(s) held on return to userspace for thread %p", thread
->rwlock_count
, thread
);
2000 /* Paper over the issue */
2001 thread
->rwlock_count
= 0;
2002 lck_rw_clear_promotion(thread
, 0);
2007 lck_rw_lock_yield_shared(lck_rw_t
*lck
, boolean_t force_yield
)
2009 lck_rw_assert(lck
, LCK_RW_ASSERT_SHARED
);
2011 if (lck
->lck_rw_want_write
|| lck
->lck_rw_want_upgrade
|| force_yield
) {
2012 lck_rw_unlock_shared(lck
);
2014 lck_rw_lock_shared(lck
);
2022 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
2023 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2026 kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t
*lck
) {
2028 panic("panic: rw lock exclusive check done outside of kernel debugger");
2030 return ((lck
->lck_rw_want_upgrade
|| lck
->lck_rw_want_write
) && (lck
->lck_rw_shared_count
== 0)) ? TRUE
: FALSE
;
2034 * Slow path routines for lck_mtx locking and unlocking functions.
2036 * These functions were previously implemented in x86 assembly,
2037 * and some optimizations are in place in this c code to obtain a compiled code
2038 * as performant and compact as the assembly version.
2040 * To avoid to inline these functions on the fast path, all functions directly called by
2041 * the fast paths have the __attribute__((noinline)) specified. Also they are all implemented
2042 * in such a way the fast path can tail call into them. In this way the return address
2043 * does not need to be pushed on the caller stack and stack optimization can happen on the caller.
2045 * Slow path code is structured in such a way there are no calls to functions that will return
2046 * on the context of the caller function, i.e. all functions called are or tail call functions
2047 * or inline functions. The number of arguments of the tail call functions are less then six,
2048 * so that they can be passed over registers and do not need to be pushed on stack.
2049 * This allows the compiler to not create a stack frame for the functions.
2051 * __improbable and __probable are used to compile the slow path code in such a way
2052 * the fast path case will be on a sequence of instructions with as less jumps as possible,
2053 * to make this case the most optimized even if falling through the slow path.
2057 * Intel lock invariants:
2059 * lck_mtx_waiters: contains the count of threads currently in the mutex waitqueue
2061 * The lock owner is promoted to the max priority of all its waiters only if it
2062 * was a lower priority when it acquired or was an owner when a waiter waited.
2063 * Max priority is capped at MAXPRI_PROMOTE.
2065 * The last waiter will not be promoted as it is woken up, but the last
2066 * lock owner may not have been the last thread to have been woken up depending on the
2067 * luck of the draw. Therefore a last-owner may still have the promoted-on-wakeup
2070 * TODO: Figure out an algorithm for stopping a lock holder which is already at the right
2071 * priority from dropping priority in the future without having to take thread lock
2076 extern zone_t lck_mtx_zone
;
2080 * Routine: lck_mtx_alloc_init
2089 if ((lck
= (lck_mtx_t
*)zalloc(lck_mtx_zone
)) != 0)
2090 lck_mtx_init(lck
, grp
, attr
);
2092 if ((lck
= (lck_mtx_t
*)kalloc(sizeof(lck_mtx_t
))) != 0)
2093 lck_mtx_init(lck
, grp
, attr
);
2099 * Routine: lck_mtx_free
2106 lck_mtx_destroy(lck
, grp
);
2108 zfree(lck_mtx_zone
, lck
);
2110 kfree(lck
, sizeof(lck_mtx_t
));
2115 * Routine: lck_mtx_ext_init
2123 bzero((void *)lck
, sizeof(lck_mtx_ext_t
));
2125 if ((attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2126 lck
->lck_mtx_deb
.type
= MUTEX_TAG
;
2127 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_DEBUG
;
2130 lck
->lck_mtx_grp
= grp
;
2132 if (grp
->lck_grp_attr
& LCK_GRP_ATTR_STAT
)
2133 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_STAT
;
2135 lck
->lck_mtx
.lck_mtx_is_ext
= 1;
2136 lck
->lck_mtx
.lck_mtx_pad32
= 0xFFFFFFFF;
2140 * Routine: lck_mtx_init
2148 lck_mtx_ext_t
*lck_ext
;
2149 lck_attr_t
*lck_attr
;
2151 if (attr
!= LCK_ATTR_NULL
)
2154 lck_attr
= &LockDefaultLckAttr
;
2156 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2157 if ((lck_ext
= (lck_mtx_ext_t
*)kalloc(sizeof(lck_mtx_ext_t
))) != 0) {
2158 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
2159 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
2160 lck
->lck_mtx_ptr
= lck_ext
;
2163 lck
->lck_mtx_owner
= 0;
2164 lck
->lck_mtx_state
= 0;
2166 lck
->lck_mtx_pad32
= 0xFFFFFFFF;
2167 lck_grp_reference(grp
);
2168 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
2172 * Routine: lck_mtx_init_ext
2177 lck_mtx_ext_t
*lck_ext
,
2181 lck_attr_t
*lck_attr
;
2183 if (attr
!= LCK_ATTR_NULL
)
2186 lck_attr
= &LockDefaultLckAttr
;
2188 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2189 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
2190 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
2191 lck
->lck_mtx_ptr
= lck_ext
;
2193 lck
->lck_mtx_owner
= 0;
2194 lck
->lck_mtx_state
= 0;
2196 lck
->lck_mtx_pad32
= 0xFFFFFFFF;
2198 lck_grp_reference(grp
);
2199 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
2203 lck_mtx_lock_mark_destroyed(
2210 /* convert to destroyed state */
2211 ordered_store_mtx_state_release(mutex
, LCK_MTX_TAG_DESTROYED
);
2215 state
= ordered_load_mtx_state(mutex
);
2216 lck_mtx_interlock_lock(mutex
, &state
);
2218 ordered_store_mtx_state_release(mutex
, LCK_MTX_TAG_DESTROYED
);
2220 enable_preemption();
2224 * Routine: lck_mtx_destroy
2233 if (lck
->lck_mtx_tag
== LCK_MTX_TAG_DESTROYED
)
2236 lck_mtx_assert(lck
, LCK_MTX_ASSERT_NOTOWNED
);
2238 indirect
= (lck
->lck_mtx_tag
== LCK_MTX_TAG_INDIRECT
);
2240 lck_mtx_lock_mark_destroyed(lck
, indirect
);
2243 kfree(lck
->lck_mtx_ptr
, sizeof(lck_mtx_ext_t
));
2244 lck_grp_lckcnt_decr(grp
, LCK_TYPE_MTX
);
2245 lck_grp_deallocate(grp
);
2250 #if DEVELOPMENT | DEBUG
2251 __attribute__((noinline
))
2253 lck_mtx_owner_check_panic(
2256 thread_t owner
= (thread_t
)lock
->lck_mtx_owner
;
2257 panic("Mutex unlock attempted from non-owner thread. Owner=%p lock=%p", owner
, lock
);
2261 __attribute__((always_inline
))
2267 *lock
= &((*lock
)->lck_mtx_ptr
->lck_mtx
);
2268 *state
= ordered_load_mtx_state(*lock
);
2273 * Routine: lck_mtx_unlock_slow
2275 * Unlocks a mutex held by current thread.
2277 * It will wake up waiters if necessary.
2279 * Interlock can be held.
2281 __attribute__((noinline
))
2283 lck_mtx_unlock_slow(
2287 uint32_t state
, prev
;
2288 boolean_t indirect
= FALSE
;
2290 state
= ordered_load_mtx_state(lock
);
2292 /* Is this an indirect mutex? */
2293 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
2294 indirect
= get_indirect_mutex(&lock
, &state
);
2297 thread
= current_thread();
2299 #if DEVELOPMENT | DEBUG
2300 thread_t owner
= (thread_t
)lock
->lck_mtx_owner
;
2301 if(__improbable(owner
!= thread
))
2302 lck_mtx_owner_check_panic(lock
);
2305 /* check if it is held as a spinlock */
2306 if (__improbable((state
& LCK_MTX_MLOCKED_MSK
) == 0))
2309 lck_mtx_interlock_lock_clear_flags(lock
, LCK_MTX_MLOCKED_MSK
, &state
);
2312 /* preemption disabled, interlock held and mutex not held */
2315 ordered_store_mtx_owner(lock
, 0);
2316 /* keep original state in prev for later evaluation */
2319 if (__improbable(state
& LCK_MTX_WAITERS_MSK
)) {
2322 thread
->mutex_count
--;
2324 return lck_mtx_unlock_wakeup_tail(lock
, state
, indirect
);
2327 /* release interlock, promotion and clear spin flag */
2328 state
&= (~(LCK_MTX_ILOCKED_MSK
| LCK_MTX_SPIN_MSK
));
2329 ordered_store_mtx_state_release(lock
, state
); /* since I own the interlock, I don't need an atomic update */
2332 /* perform lock statistics after drop to prevent delay */
2334 thread
->mutex_count
--; /* lock statistic */
2335 #endif /* MACH_LDEBUG */
2337 /* re-enable preemption */
2338 lck_mtx_unlock_finish_inline(lock
, FALSE
);
2343 #define LCK_MTX_LCK_WAIT_CODE 0x20
2344 #define LCK_MTX_LCK_WAKEUP_CODE 0x21
2345 #define LCK_MTX_LCK_SPIN_CODE 0x22
2346 #define LCK_MTX_LCK_ACQUIRE_CODE 0x23
2347 #define LCK_MTX_LCK_DEMOTE_CODE 0x24
2350 * Routine: lck_mtx_unlock_wakeup_tail
2352 * Invoked on unlock when there is
2353 * contention, i.e. the assembly routine sees
2354 * that mutex->lck_mtx_waiters != 0
2356 * neither the mutex or interlock is held
2358 * Note that this routine might not be called if there are pending
2359 * waiters which have previously been woken up, and they didn't
2360 * end up boosting the old owner.
2362 * assembly routine previously did the following to mutex:
2363 * (after saving the state in prior_lock_state)
2364 * decremented lck_mtx_waiters if nonzero
2366 * This function needs to be called as a tail call
2367 * to optimize the compiled code.
2369 __attribute__((noinline
))
2371 lck_mtx_unlock_wakeup_tail (
2376 struct turnstile
*ts
;
2378 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(mutex
);
2379 kern_return_t did_wake
;
2381 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAKEUP_CODE
) | DBG_FUNC_START
,
2382 trace_lck
, 0, mutex
->lck_mtx_waiters
, 0, 0);
2384 ts
= turnstile_prepare((uintptr_t)mutex
, NULL
, TURNSTILE_NULL
, TURNSTILE_KERNEL_MUTEX
);
2386 if (mutex
->lck_mtx_waiters
> 1) {
2387 /* WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor on the wokenup thread */
2388 did_wake
= waitq_wakeup64_one(&ts
->ts_waitq
, CAST_EVENT64_T(LCK_MTX_EVENT(mutex
)), THREAD_AWAKENED
, WAITQ_PROMOTE_ON_WAKE
);
2390 did_wake
= waitq_wakeup64_one(&ts
->ts_waitq
, CAST_EVENT64_T(LCK_MTX_EVENT(mutex
)), THREAD_AWAKENED
, WAITQ_ALL_PRIORITIES
);
2391 turnstile_update_inheritor(ts
, NULL
, TURNSTILE_IMMEDIATE_UPDATE
);
2393 assert(did_wake
== KERN_SUCCESS
);
2395 turnstile_update_inheritor_complete(ts
, TURNSTILE_INTERLOCK_HELD
);
2396 turnstile_complete((uintptr_t)mutex
, NULL
, NULL
, TURNSTILE_KERNEL_MUTEX
);
2398 state
-= LCK_MTX_WAITER
;
2399 state
&= (~(LCK_MTX_SPIN_MSK
| LCK_MTX_ILOCKED_MSK
));
2400 ordered_store_mtx_state_release(mutex
, state
);
2402 assert(current_thread()->turnstile
!= NULL
);
2404 turnstile_cleanup();
2406 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAKEUP_CODE
) | DBG_FUNC_END
,
2407 trace_lck
, 0, mutex
->lck_mtx_waiters
, 0, 0);
2409 lck_mtx_unlock_finish_inline(mutex
, indirect
);
2413 * Routine: lck_mtx_lock_acquire_x86
2415 * Invoked on acquiring the mutex when there is
2416 * contention (i.e. the assembly routine sees that
2417 * that mutex->lck_mtx_waiters != 0
2419 * mutex is owned... interlock is held... preemption is disabled
2421 __attribute__((always_inline
))
2423 lck_mtx_lock_acquire_inline(
2425 struct turnstile
*ts
)
2427 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(mutex
);
2429 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_ACQUIRE_CODE
) | DBG_FUNC_START
,
2430 trace_lck
, 0, mutex
->lck_mtx_waiters
, 0, 0);
2432 thread_t thread
= (thread_t
)mutex
->lck_mtx_owner
; /* faster than current_thread() */
2433 assert(thread
->waiting_for_mutex
== NULL
);
2435 if (mutex
->lck_mtx_waiters
> 0) {
2437 ts
= turnstile_prepare((uintptr_t)mutex
, NULL
, TURNSTILE_NULL
, TURNSTILE_KERNEL_MUTEX
);
2440 turnstile_update_inheritor(ts
, thread
, (TURNSTILE_IMMEDIATE_UPDATE
| TURNSTILE_INHERITOR_THREAD
));
2441 turnstile_update_inheritor_complete(ts
, TURNSTILE_INTERLOCK_HELD
);
2445 turnstile_complete((uintptr_t)mutex
, NULL
, NULL
, TURNSTILE_KERNEL_MUTEX
);
2448 assert(current_thread()->turnstile
!= NULL
);
2450 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_ACQUIRE_CODE
) | DBG_FUNC_END
,
2451 trace_lck
, 0, mutex
->lck_mtx_waiters
, 0, 0);
2455 lck_mtx_lock_acquire_x86(
2458 return lck_mtx_lock_acquire_inline(mutex
, NULL
);
2462 * Tail call helpers for lock functions that perform
2463 * lck_mtx_lock_acquire followed by the caller's finish routine, to optimize
2464 * the caller's compiled code.
2467 __attribute__((noinline
))
2469 lck_mtx_lock_acquire_tail(
2472 struct turnstile
*ts
)
2474 lck_mtx_lock_acquire_inline(mutex
, ts
);
2475 lck_mtx_lock_finish_inline_with_cleanup(mutex
, ordered_load_mtx_state(mutex
), indirect
);
2478 __attribute__((noinline
))
2480 lck_mtx_try_lock_acquire_tail(
2483 lck_mtx_lock_acquire_inline(mutex
, NULL
);
2484 lck_mtx_try_lock_finish_inline(mutex
, ordered_load_mtx_state(mutex
));
2489 __attribute__((noinline
))
2491 lck_mtx_convert_spin_acquire_tail(
2494 lck_mtx_lock_acquire_inline(mutex
, NULL
);
2495 lck_mtx_convert_spin_finish_inline(mutex
, ordered_load_mtx_state(mutex
));
2502 lck_mtx_ilk_unlock_inline(mutex
, ordered_load_mtx_state(mutex
));
2507 lck_mtx_interlock_lock_set_and_clear_flags(
2511 uint32_t *new_state
)
2513 uint32_t state
, prev
;
2517 /* have to wait for interlock to clear */
2518 while (__improbable(state
& (LCK_MTX_ILOCKED_MSK
| xor_flags
))) {
2520 state
= ordered_load_mtx_state(mutex
);
2522 prev
= state
; /* prev contains snapshot for exchange */
2523 state
|= LCK_MTX_ILOCKED_MSK
| xor_flags
; /* pick up interlock */
2524 state
&= ~and_flags
; /* clear flags */
2526 disable_preemption();
2527 if (os_atomic_cmpxchg(&mutex
->lck_mtx_state
, prev
, state
, acquire
))
2529 enable_preemption();
2531 state
= ordered_load_mtx_state(mutex
);
2538 lck_mtx_interlock_lock_clear_flags(
2541 uint32_t *new_state
)
2543 return lck_mtx_interlock_lock_set_and_clear_flags(mutex
, 0, and_flags
, new_state
);
2547 lck_mtx_interlock_lock(
2549 uint32_t *new_state
)
2551 return lck_mtx_interlock_lock_set_and_clear_flags(mutex
, 0, 0, new_state
);
2555 lck_mtx_interlock_try_lock_set_flags(
2558 uint32_t *new_state
)
2560 uint32_t state
, prev
;
2563 /* have to wait for interlock to clear */
2564 if (state
& (LCK_MTX_ILOCKED_MSK
| or_flags
)) {
2567 prev
= state
; /* prev contains snapshot for exchange */
2568 state
|= LCK_MTX_ILOCKED_MSK
| or_flags
; /* pick up interlock */
2569 disable_preemption();
2570 if (os_atomic_cmpxchg(&mutex
->lck_mtx_state
, prev
, state
, acquire
)) {
2575 enable_preemption();
2580 lck_mtx_interlock_try_lock(
2582 uint32_t *new_state
)
2584 return lck_mtx_interlock_try_lock_set_flags(mutex
, 0, new_state
);
2588 lck_mtx_interlock_try_lock_disable_interrupts(
2594 *istate
= ml_set_interrupts_enabled(FALSE
);
2595 state
= ordered_load_mtx_state(mutex
);
2597 if (lck_mtx_interlock_try_lock(mutex
, &state
)) {
2600 ml_set_interrupts_enabled(*istate
);
2606 lck_mtx_interlock_unlock_enable_interrupts(
2610 lck_mtx_ilk_unlock(mutex
);
2611 ml_set_interrupts_enabled(istate
);
2614 __attribute__((noinline
))
2616 lck_mtx_lock_contended(
2619 boolean_t
*first_miss
)
2621 lck_mtx_spinwait_ret_type_t ret
;
2624 struct turnstile
*ts
= NULL
;
2629 lck_grp_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, first_miss
);
2632 ret
= lck_mtx_lock_spinwait_x86(lock
);
2633 state
= ordered_load_mtx_state(lock
);
2635 case LCK_MTX_SPINWAIT_NO_SPIN
:
2637 * owner not on core, lck_mtx_lock_spinwait_x86 didn't even
2641 lck_grp_mtx_update_direct_wait((struct _lck_mtx_ext_
*)lock
);
2644 /* just fall through case LCK_MTX_SPINWAIT_SPUN */
2645 case LCK_MTX_SPINWAIT_SPUN
:
2647 * mutex not acquired but lck_mtx_lock_spinwait_x86 tried to spin
2648 * interlock not held
2650 lck_mtx_interlock_lock(lock
, &state
);
2651 assert(state
& LCK_MTX_ILOCKED_MSK
);
2653 if (state
& LCK_MTX_MLOCKED_MSK
) {
2655 lck_grp_mtx_update_wait((struct _lck_mtx_ext_
*)lock
, first_miss
);
2657 lck_mtx_lock_wait_x86(lock
, &ts
);
2659 * interlock is not held here.
2664 /* grab the mutex */
2665 state
|= LCK_MTX_MLOCKED_MSK
;
2666 ordered_store_mtx_state_release(lock
, state
);
2667 thread
= current_thread();
2668 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
2671 thread
->mutex_count
++;
2673 #endif /* MACH_LDEBUG */
2677 case LCK_MTX_SPINWAIT_ACQUIRED
:
2679 * mutex has been acquired by lck_mtx_lock_spinwait_x86
2680 * interlock is held and preemption disabled
2681 * owner is set and mutex marked as locked
2682 * statistics updated too
2686 panic("lck_mtx_lock_spinwait_x86 returned %d for mutex %p\n", ret
, lock
);
2690 * interlock is already acquired here
2693 /* mutex has been acquired */
2694 thread
= (thread_t
)lock
->lck_mtx_owner
;
2695 if (state
& LCK_MTX_WAITERS_MSK
) {
2697 * lck_mtx_lock_acquire_tail will call
2698 * turnstile_complete.
2700 return lck_mtx_lock_acquire_tail(lock
, indirect
, ts
);
2704 turnstile_complete((uintptr_t)lock
, NULL
, NULL
, TURNSTILE_KERNEL_MUTEX
);
2707 assert(current_thread()->turnstile
!= NULL
);
2709 /* release the interlock */
2710 lck_mtx_lock_finish_inline_with_cleanup(lock
, ordered_load_mtx_state(lock
), indirect
);
2714 * Helper noinline functions for calling
2715 * panic to optimize compiled code.
2718 __attribute__((noinline
)) __abortlike
2723 panic("trying to interlock destroyed mutex (%p)", lock
);
2726 __attribute__((noinline
))
2728 lck_mtx_try_destroyed(
2731 panic("trying to interlock destroyed mutex (%p)", lock
);
2735 __attribute__((always_inline
))
2737 lck_mtx_lock_wait_interlock_to_clear(
2739 uint32_t* new_state
)
2745 state
= ordered_load_mtx_state(lock
);
2746 if (!(state
& (LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
))) {
2750 if (state
& LCK_MTX_MLOCKED_MSK
) {
2751 /* if it is held as mutex, just fail */
2757 __attribute__((always_inline
))
2759 lck_mtx_try_lock_wait_interlock_to_clear(
2761 uint32_t* new_state
)
2767 state
= ordered_load_mtx_state(lock
);
2768 if (state
& (LCK_MTX_MLOCKED_MSK
| LCK_MTX_SPIN_MSK
)) {
2769 /* if it is held as mutex or spin, just fail */
2772 if (!(state
& LCK_MTX_ILOCKED_MSK
)) {
2780 * Routine: lck_mtx_lock_slow
2782 * Locks a mutex for current thread.
2783 * If the lock is contended this function might
2786 * Called with interlock not held.
2788 __attribute__((noinline
))
2793 boolean_t indirect
= FALSE
;
2797 state
= ordered_load_mtx_state(lock
);
2799 /* is the interlock or mutex held */
2800 if (__improbable(state
& ((LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
)))) {
2802 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
2803 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
2804 * set in state (state == lck_mtx_tag)
2808 /* is the mutex already held and not indirect */
2809 if (__improbable(!(state
& LCK_MTX_ILOCKED_MSK
))){
2810 /* no, must have been the mutex */
2811 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
2814 /* check to see if it is marked destroyed */
2815 if (__improbable(state
== LCK_MTX_TAG_DESTROYED
)) {
2816 lck_mtx_destroyed(lock
);
2819 /* Is this an indirect mutex? */
2820 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
2821 indirect
= get_indirect_mutex(&lock
, &state
);
2824 lck_grp_mtx_update_held((struct _lck_mtx_ext_
*)lock
);
2826 if (state
& LCK_MTX_SPIN_MSK
) {
2827 /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
2828 assert(state
& LCK_MTX_ILOCKED_MSK
);
2829 lck_grp_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
2833 if (!lck_mtx_lock_wait_interlock_to_clear(lock
, &state
)) {
2834 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
2838 /* no - can't be INDIRECT, DESTROYED or locked */
2839 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock
, LCK_MTX_MLOCKED_MSK
, &state
))) {
2840 if (!lck_mtx_lock_wait_interlock_to_clear(lock
, &state
)) {
2841 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
2845 /* lock and interlock acquired */
2847 thread_t thread
= current_thread();
2848 /* record owner of mutex */
2849 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
2853 thread
->mutex_count
++; /* lock statistic */
2857 * Check if there are waiters to
2858 * inherit their priority.
2860 if (__improbable(state
& LCK_MTX_WAITERS_MSK
)) {
2861 return lck_mtx_lock_acquire_tail(lock
, indirect
, NULL
);
2864 /* release the interlock */
2865 lck_mtx_lock_finish_inline(lock
, ordered_load_mtx_state(lock
), indirect
);
2870 __attribute__((noinline
))
2872 lck_mtx_try_lock_slow(
2875 boolean_t indirect
= FALSE
;
2879 state
= ordered_load_mtx_state(lock
);
2881 /* is the interlock or mutex held */
2882 if (__improbable(state
& ((LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
)))) {
2884 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
2885 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
2886 * set in state (state == lck_mtx_tag)
2889 /* is the mutex already held and not indirect */
2890 if (__improbable(!(state
& LCK_MTX_ILOCKED_MSK
))){
2894 /* check to see if it is marked destroyed */
2895 if (__improbable(state
== LCK_MTX_TAG_DESTROYED
)) {
2896 lck_mtx_try_destroyed(lock
);
2899 /* Is this an indirect mutex? */
2900 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
2901 indirect
= get_indirect_mutex(&lock
, &state
);
2904 lck_grp_mtx_update_held((struct _lck_mtx_ext_
*)lock
);
2907 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock
, &state
)) {
2909 lck_grp_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
2914 /* no - can't be INDIRECT, DESTROYED or locked */
2915 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock
, LCK_MTX_MLOCKED_MSK
, &state
))) {
2916 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock
, &state
)) {
2918 lck_grp_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
2923 /* lock and interlock acquired */
2925 thread_t thread
= current_thread();
2926 /* record owner of mutex */
2927 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
2931 thread
->mutex_count
++; /* lock statistic */
2935 * Check if there are waiters to
2936 * inherit their priority.
2938 if (__improbable(state
& LCK_MTX_WAITERS_MSK
)) {
2939 return lck_mtx_try_lock_acquire_tail(lock
);
2942 /* release the interlock */
2943 lck_mtx_try_lock_finish_inline(lock
, ordered_load_mtx_state(lock
));
2949 __attribute__((noinline
))
2951 lck_mtx_lock_spin_slow(
2954 boolean_t indirect
= FALSE
;
2958 state
= ordered_load_mtx_state(lock
);
2960 /* is the interlock or mutex held */
2961 if (__improbable(state
& ((LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
)))) {
2963 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
2964 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
2965 * set in state (state == lck_mtx_tag)
2969 /* is the mutex already held and not indirect */
2970 if (__improbable(!(state
& LCK_MTX_ILOCKED_MSK
))){
2971 /* no, must have been the mutex */
2972 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
2975 /* check to see if it is marked destroyed */
2976 if (__improbable(state
== LCK_MTX_TAG_DESTROYED
)) {
2977 lck_mtx_destroyed(lock
);
2980 /* Is this an indirect mutex? */
2981 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
2982 indirect
= get_indirect_mutex(&lock
, &state
);
2985 lck_grp_mtx_update_held((struct _lck_mtx_ext_
*)lock
);
2987 if (state
& LCK_MTX_SPIN_MSK
) {
2988 /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
2989 assert(state
& LCK_MTX_ILOCKED_MSK
);
2990 lck_grp_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
2994 if (!lck_mtx_lock_wait_interlock_to_clear(lock
, &state
)) {
2995 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
2999 /* no - can't be INDIRECT, DESTROYED or locked */
3000 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock
, LCK_MTX_SPIN_MSK
, &state
) )) {
3001 if (!lck_mtx_lock_wait_interlock_to_clear(lock
, &state
)) {
3002 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
3006 /* lock as spinlock and interlock acquired */
3008 thread_t thread
= current_thread();
3009 /* record owner of mutex */
3010 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
3014 thread
->mutex_count
++; /* lock statistic */
3019 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE
, lock
, 0);
3021 /* return with the interlock held and preemption disabled */
3025 __attribute__((noinline
))
3027 lck_mtx_try_lock_spin_slow(
3030 boolean_t indirect
= FALSE
;
3034 state
= ordered_load_mtx_state(lock
);
3036 /* is the interlock or mutex held */
3037 if (__improbable(state
& ((LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
)))) {
3039 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
3040 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
3041 * set in state (state == lck_mtx_tag)
3044 /* is the mutex already held and not indirect */
3045 if (__improbable(!(state
& LCK_MTX_ILOCKED_MSK
))){
3049 /* check to see if it is marked destroyed */
3050 if (__improbable(state
== LCK_MTX_TAG_DESTROYED
)) {
3051 lck_mtx_try_destroyed(lock
);
3054 /* Is this an indirect mutex? */
3055 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
3056 indirect
= get_indirect_mutex(&lock
, &state
);
3059 lck_grp_mtx_update_held((struct _lck_mtx_ext_
*)lock
);
3062 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock
, &state
)) {
3064 lck_grp_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
3069 /* no - can't be INDIRECT, DESTROYED or locked */
3070 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock
, LCK_MTX_SPIN_MSK
, &state
))) {
3071 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock
, &state
)) {
3073 lck_grp_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
3078 /* lock and interlock acquired */
3080 thread_t thread
= current_thread();
3081 /* record owner of mutex */
3082 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
3086 thread
->mutex_count
++; /* lock statistic */
3091 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE
, lock
, 0);
3097 __attribute__((noinline
))
3099 lck_mtx_convert_spin(
3104 state
= ordered_load_mtx_state(lock
);
3106 /* Is this an indirect mutex? */
3107 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
3108 /* If so, take indirection */
3109 get_indirect_mutex(&lock
, &state
);
3112 assertf((thread_t
)lock
->lck_mtx_owner
== current_thread(), "lock %p not owned by thread %p (current owner %p)", lock
, current_thread(), (thread_t
)lock
->lck_mtx_owner
);
3114 if (__improbable(state
& LCK_MTX_MLOCKED_MSK
)) {
3115 /* already owned as a mutex, just return */
3119 assert(get_preemption_level() > 0);
3120 assert(state
& LCK_MTX_ILOCKED_MSK
);
3121 assert(state
& LCK_MTX_SPIN_MSK
);
3124 * Check if there are waiters to
3125 * inherit their priority.
3127 if (__improbable(state
& LCK_MTX_WAITERS_MSK
)) {
3128 return lck_mtx_convert_spin_acquire_tail(lock
);
3131 lck_mtx_convert_spin_finish_inline(lock
, ordered_load_mtx_state(lock
));
3136 static inline boolean_t
3137 lck_mtx_lock_grab_mutex(
3142 state
= ordered_load_mtx_state(lock
);
3144 if (!lck_mtx_interlock_try_lock_set_flags(lock
, LCK_MTX_MLOCKED_MSK
, &state
)) {
3148 /* lock and interlock acquired */
3150 thread_t thread
= current_thread();
3151 /* record owner of mutex */
3152 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
3156 thread
->mutex_count
++; /* lock statistic */
3162 __attribute__((noinline
))
3168 thread_t thread
, owner
;
3171 thread
= current_thread();
3172 state
= ordered_load_mtx_state(lock
);
3174 if (state
== LCK_MTX_TAG_INDIRECT
) {
3175 get_indirect_mutex(&lock
, &state
);
3178 owner
= (thread_t
)lock
->lck_mtx_owner
;
3180 if (type
== LCK_MTX_ASSERT_OWNED
) {
3181 if (owner
!= thread
|| !(state
& (LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
)))
3182 panic("mutex (%p) not owned\n", lock
);
3184 assert (type
== LCK_MTX_ASSERT_NOTOWNED
);
3185 if (owner
== thread
)
3186 panic("mutex (%p) owned\n", lock
);
3191 * Routine: lck_mtx_lock_spinwait_x86
3193 * Invoked trying to acquire a mutex when there is contention but
3194 * the holder is running on another processor. We spin for up to a maximum
3195 * time waiting for the lock to be released.
3197 * Called with the interlock unlocked.
3198 * returns LCK_MTX_SPINWAIT_ACQUIRED if mutex acquired
3199 * returns LCK_MTX_SPINWAIT_SPUN if we spun
3200 * returns LCK_MTX_SPINWAIT_NO_SPIN if we didn't spin due to the holder not running
3202 __attribute__((noinline
))
3203 lck_mtx_spinwait_ret_type_t
3204 lck_mtx_lock_spinwait_x86(
3207 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(mutex
);
3209 uint64_t overall_deadline
;
3210 uint64_t check_owner_deadline
;
3212 lck_mtx_spinwait_ret_type_t retval
= LCK_MTX_SPINWAIT_SPUN
;
3215 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_SPIN_CODE
) | DBG_FUNC_START
,
3216 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(mutex
->lck_mtx_owner
), mutex
->lck_mtx_waiters
, 0, 0);
3218 cur_time
= mach_absolute_time();
3219 overall_deadline
= cur_time
+ MutexSpin
;
3220 check_owner_deadline
= cur_time
;
3224 * - mutex is locked, and
3225 * - its locked as a spin lock, and
3226 * - owner is running on another processor, and
3227 * - owner (processor) is not idling, and
3228 * - we haven't spun for long enough.
3231 if (__probable(lck_mtx_lock_grab_mutex(mutex
))) {
3232 retval
= LCK_MTX_SPINWAIT_ACQUIRED
;
3235 cur_time
= mach_absolute_time();
3237 if (cur_time
>= overall_deadline
)
3240 if (cur_time
>= check_owner_deadline
&& mutex
->lck_mtx_owner
) {
3244 * We will repeatedly peek at the state of the lock while spinning,
3245 * and we will acquire the interlock to do so.
3246 * The thread that will unlock the mutex will also need to acquire
3247 * the interlock, and we want to avoid to slow it down.
3248 * To avoid to get an interrupt while holding the interlock
3249 * and increase the time we are holding it, we
3250 * will try to acquire the interlock with interrupts disabled.
3251 * This is safe because it is a "try_lock", if we can't acquire
3252 * the interlock we re-enable the interrupts and fail, so it is
3253 * ok to call it even if the interlock was already held.
3255 if (lck_mtx_interlock_try_lock_disable_interrupts(mutex
, &istate
)) {
3257 if ((holder
= (thread_t
) mutex
->lck_mtx_owner
) != NULL
) {
3259 if ( !(holder
->machine
.specFlags
& OnProc
) ||
3260 (holder
->state
& TH_IDLE
)) {
3262 lck_mtx_interlock_unlock_enable_interrupts(mutex
, istate
);
3265 retval
= LCK_MTX_SPINWAIT_NO_SPIN
;
3269 lck_mtx_interlock_unlock_enable_interrupts(mutex
, istate
);
3271 check_owner_deadline
= cur_time
+ (MutexSpin
/ 4);
3282 * We've already kept a count via overall_deadline of how long we spun.
3283 * If dtrace is active, then we compute backwards to decide how
3286 * Note that we record a different probe id depending on whether
3287 * this is a direct or indirect mutex. This allows us to
3288 * penalize only lock groups that have debug/stats enabled
3289 * with dtrace processing if desired.
3291 if (__probable(mutex
->lck_mtx_is_ext
== 0)) {
3292 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN
, mutex
,
3293 mach_absolute_time() - (overall_deadline
- MutexSpin
));
3295 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN
, mutex
,
3296 mach_absolute_time() - (overall_deadline
- MutexSpin
));
3298 /* The lockstat acquire event is recorded by the assembly code beneath us. */
3301 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_SPIN_CODE
) | DBG_FUNC_END
,
3302 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(mutex
->lck_mtx_owner
), mutex
->lck_mtx_waiters
, retval
, 0);
3310 * Routine: lck_mtx_lock_wait_x86
3312 * Invoked in order to wait on contention.
3314 * Called with the interlock locked and
3315 * preemption disabled...
3316 * returns it unlocked and with preemption enabled
3318 * lck_mtx_waiters is 1:1 with a wakeup needing to occur.
3319 * A runnable waiter can exist between wait and acquire
3320 * without a waiters count being set.
3321 * This allows us to never make a spurious wakeup call.
3324 * This avoids taking the thread lock if the owning thread is the same priority.
3325 * This optimizes the case of same-priority threads contending on a lock.
3326 * However, that allows the owning thread to drop in priority while holding the lock,
3327 * because there is no state that the priority change can notice that
3328 * says that the targeted thread holds a contended mutex.
3330 * One possible solution: priority changes could look for some atomic tag
3331 * on the thread saying 'holding contended lock', and then set up a promotion.
3332 * Needs a story for dropping that promotion - the last contended unlock
3333 * has to notice that this has happened.
3335 __attribute__((noinline
))
3337 lck_mtx_lock_wait_x86 (
3339 struct turnstile
**ts
)
3341 thread_t self
= current_thread();
3344 uint64_t sleep_start
= 0;
3346 if (lockstat_probemap
[LS_LCK_MTX_LOCK_BLOCK
] || lockstat_probemap
[LS_LCK_MTX_EXT_LOCK_BLOCK
]) {
3347 sleep_start
= mach_absolute_time();
3350 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(mutex
);
3352 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAIT_CODE
) | DBG_FUNC_START
,
3353 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(mutex
->lck_mtx_owner
),
3354 mutex
->lck_mtx_waiters
, 0, 0);
3356 assert(self
->waiting_for_mutex
== NULL
);
3357 self
->waiting_for_mutex
= mutex
;
3358 mutex
->lck_mtx_waiters
++;
3360 thread_t holder
= (thread_t
)mutex
->lck_mtx_owner
;
3361 assert(holder
!= NULL
);
3364 * lck_mtx_lock_wait_x86 might be called on a loop. Call prepare just once and reuse
3365 * the same turnstile while looping, the matching turnstile compleate will be called
3366 * by lck_mtx_lock_contended when finally acquiring the lock.
3369 *ts
= turnstile_prepare((uintptr_t)mutex
, NULL
, TURNSTILE_NULL
, TURNSTILE_KERNEL_MUTEX
);
3372 struct turnstile
*turnstile
= *ts
;
3373 thread_set_pending_block_hint(self
, kThreadWaitKernelMutex
);
3374 turnstile_update_inheritor(turnstile
, holder
, (TURNSTILE_DELAYED_UPDATE
| TURNSTILE_INHERITOR_THREAD
));
3376 waitq_assert_wait64(&turnstile
->ts_waitq
, CAST_EVENT64_T(LCK_MTX_EVENT(mutex
)), THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
, TIMEOUT_WAIT_FOREVER
);
3378 lck_mtx_ilk_unlock(mutex
);
3380 turnstile_update_inheritor_complete(turnstile
, TURNSTILE_INTERLOCK_NOT_HELD
);
3382 thread_block(THREAD_CONTINUE_NULL
);
3384 self
->waiting_for_mutex
= NULL
;
3386 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAIT_CODE
) | DBG_FUNC_END
,
3387 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(mutex
->lck_mtx_owner
),
3388 mutex
->lck_mtx_waiters
, 0, 0);
3392 * Record the Dtrace lockstat probe for blocking, block time
3393 * measured from when we were entered.
3396 if (mutex
->lck_mtx_is_ext
== 0) {
3397 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK
, mutex
,
3398 mach_absolute_time() - sleep_start
);
3400 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK
, mutex
,
3401 mach_absolute_time() - sleep_start
);
3408 * Routine: kdp_lck_mtx_lock_spin_is_acquired
3409 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
3410 * Returns: TRUE if lock is acquired.
3413 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t
*lck
)
3416 panic("panic: kdp_lck_mtx_lock_spin_is_acquired called outside of kernel debugger");
3419 if (lck
->lck_mtx_ilocked
|| lck
->lck_mtx_mlocked
) {
3427 kdp_lck_mtx_find_owner(__unused
struct waitq
* waitq
, event64_t event
, thread_waitinfo_t
* waitinfo
)
3429 lck_mtx_t
* mutex
= LCK_EVENT_TO_MUTEX(event
);
3430 waitinfo
->context
= VM_KERNEL_UNSLIDE_OR_PERM(mutex
);
3431 thread_t holder
= (thread_t
)mutex
->lck_mtx_owner
;
3432 waitinfo
->owner
= thread_tid(holder
);
3436 kdp_rwlck_find_owner(__unused
struct waitq
* waitq
, event64_t event
, thread_waitinfo_t
* waitinfo
)
3438 lck_rw_t
*rwlck
= NULL
;
3439 switch(waitinfo
->wait_type
) {
3440 case kThreadWaitKernelRWLockRead
:
3441 rwlck
= READ_EVENT_TO_RWLOCK(event
);
3443 case kThreadWaitKernelRWLockWrite
:
3444 case kThreadWaitKernelRWLockUpgrade
:
3445 rwlck
= WRITE_EVENT_TO_RWLOCK(event
);
3448 panic("%s was called with an invalid blocking type", __FUNCTION__
);
3451 waitinfo
->context
= VM_KERNEL_UNSLIDE_OR_PERM(rwlck
);
3452 waitinfo
->owner
= 0;