2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Locking primitives implementation
64 #define ATOMIC_PRIVATE 1
65 #define LOCK_PRIVATE 1
67 #include <mach_ldebug.h>
69 #include <kern/lock_stat.h>
70 #include <kern/locks.h>
71 #include <kern/kalloc.h>
72 #include <kern/misc_protos.h>
73 #include <kern/thread.h>
74 #include <kern/processor.h>
75 #include <kern/cpu_data.h>
76 #include <kern/cpu_number.h>
77 #include <kern/sched_prim.h>
79 #include <kern/debug.h>
82 #include <i386/machine_routines.h> /* machine_timeout_suspended() */
83 #include <machine/atomic.h>
84 #include <machine/machine_cpu.h>
86 #include <machine/atomic.h>
87 #include <sys/kdebug.h>
88 #include <i386/locks_i386_inlines.h>
91 #define DTRACE_RW_SHARED 0x0 //reader
92 #define DTRACE_RW_EXCL 0x1 //writer
93 #define DTRACE_NO_FLAG 0x0 //not applicable
94 #endif /* CONFIG_DTRACE */
96 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
97 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
98 #define LCK_RW_LCK_SHARED_CODE 0x102
99 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
100 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
101 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
103 #define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
104 #define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
105 #define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
106 #define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
107 #define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
108 #define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
109 #define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
110 #define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
113 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
115 unsigned int LcksOpts
= 0;
117 #if DEVELOPMENT || DEBUG
118 unsigned int LckDisablePreemptCheck
= 0;
125 * Perform simple lock checks.
127 int uslock_check
= 1;
128 int max_lock_loops
= 100000000;
129 decl_simple_lock_data(extern, printf_lock
)
130 decl_simple_lock_data(extern, panic_lock
)
131 #endif /* USLOCK_DEBUG */
133 extern unsigned int not_in_kdp
;
136 * We often want to know the addresses of the callers
137 * of the various lock routines. However, this information
138 * is only used for debugging and statistics.
141 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
142 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
144 #define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
145 #define DECL_PC(pc) pc_t pc;
146 #else /* ANY_LOCK_DEBUG */
150 * Eliminate lint complaints about unused local pc variables.
152 #define OBTAIN_PC(pc) ++pc
154 #define OBTAIN_PC(pc)
156 #endif /* USLOCK_DEBUG */
159 * atomic exchange API is a low level abstraction of the operations
160 * to atomically read, modify, and write a pointer. This abstraction works
161 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
162 * well as the ARM exclusive instructions.
164 * atomic_exchange_begin() - begin exchange and retrieve current value
165 * atomic_exchange_complete() - conclude an exchange
166 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
169 atomic_exchange_begin32(uint32_t *target
, uint32_t *previous
, enum memory_order ord
)
173 (void)ord
; // Memory order not used
174 val
= __c11_atomic_load((_Atomic
uint32_t *)target
, memory_order_relaxed
);
180 atomic_exchange_complete32(uint32_t *target
, uint32_t previous
, uint32_t newval
, enum memory_order ord
)
182 return __c11_atomic_compare_exchange_strong((_Atomic
uint32_t *)target
, &previous
, newval
, ord
, memory_order_relaxed
);
186 atomic_exchange_abort(void)
191 atomic_test_and_set32(uint32_t *target
, uint32_t test_mask
, uint32_t set_mask
, enum memory_order ord
, boolean_t wait
)
193 uint32_t value
, prev
;
196 value
= atomic_exchange_begin32(target
, &prev
, ord
);
197 if (value
& test_mask
) {
201 atomic_exchange_abort();
206 if (atomic_exchange_complete32(target
, prev
, value
, ord
)) {
213 * Portable lock package implementation of usimple_locks.
217 #define USLDBG(stmt) stmt
218 void usld_lock_init(usimple_lock_t
, unsigned short);
219 void usld_lock_pre(usimple_lock_t
, pc_t
);
220 void usld_lock_post(usimple_lock_t
, pc_t
);
221 void usld_unlock(usimple_lock_t
, pc_t
);
222 void usld_lock_try_pre(usimple_lock_t
, pc_t
);
223 void usld_lock_try_post(usimple_lock_t
, pc_t
);
224 int usld_lock_common_checks(usimple_lock_t
, char *);
225 #else /* USLOCK_DEBUG */
227 #endif /* USLOCK_DEBUG */
230 * Forward definitions
233 static void lck_rw_lock_shared_gen(lck_rw_t
*lck
);
234 static void lck_rw_lock_exclusive_gen(lck_rw_t
*lck
);
235 static boolean_t
lck_rw_lock_shared_to_exclusive_success(lck_rw_t
*lck
);
236 static boolean_t
lck_rw_lock_shared_to_exclusive_failure(lck_rw_t
*lck
, uint32_t prior_lock_state
);
237 static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t
*lck
, uint32_t prior_lock_state
);
238 static lck_rw_type_t
lck_rw_done_gen(lck_rw_t
*lck
, uint32_t prior_lock_state
);
239 void lck_rw_clear_promotions_x86(thread_t thread
);
240 static boolean_t
lck_rw_held_read_or_upgrade(lck_rw_t
*lock
);
241 static boolean_t
lck_rw_grab_want(lck_rw_t
*lock
);
242 static boolean_t
lck_rw_grab_shared(lck_rw_t
*lock
);
243 static void lck_mtx_unlock_wakeup_tail(lck_mtx_t
*mutex
, int prior_lock_state
, boolean_t indirect
);
244 static void lck_mtx_interlock_lock(lck_mtx_t
*mutex
, uint32_t *new_state
);
245 static void lck_mtx_interlock_lock_clear_flags(lck_mtx_t
*mutex
, uint32_t and_flags
, uint32_t *new_state
);
246 static int lck_mtx_interlock_try_lock(lck_mtx_t
*mutex
, uint32_t *new_state
);
247 static int lck_mtx_interlock_try_lock_set_flags(lck_mtx_t
*mutex
, uint32_t or_flags
, uint32_t *new_state
);
248 static boolean_t
lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t
*lock
, uint32_t *new_state
);
249 static boolean_t
lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t
*lock
, uint32_t *new_state
);
253 * Routine: lck_spin_alloc_init
262 if ((lck
= (lck_spin_t
*)kalloc(sizeof(lck_spin_t
))) != 0) {
263 lck_spin_init(lck
, grp
, attr
);
270 * Routine: lck_spin_free
277 lck_spin_destroy(lck
, grp
);
278 kfree(lck
, sizeof(lck_spin_t
));
282 * Routine: lck_spin_init
288 __unused lck_attr_t
*attr
)
290 usimple_lock_init((usimple_lock_t
) lck
, 0);
291 lck_grp_reference(grp
);
292 lck_grp_lckcnt_incr(grp
, LCK_TYPE_SPIN
);
296 * Routine: lck_spin_destroy
303 if (lck
->interlock
== LCK_SPIN_TAG_DESTROYED
) {
306 lck
->interlock
= LCK_SPIN_TAG_DESTROYED
;
307 lck_grp_lckcnt_decr(grp
, LCK_TYPE_SPIN
);
308 lck_grp_deallocate(grp
);
313 * Routine: lck_spin_lock
321 usimple_lock((usimple_lock_t
) lck
, grp
);
328 usimple_lock((usimple_lock_t
) lck
, NULL
);
332 * Routine: lck_spin_unlock
338 usimple_unlock((usimple_lock_t
) lck
);
342 lck_spin_try_lock_grp(
347 boolean_t lrval
= (boolean_t
)usimple_lock_try((usimple_lock_t
) lck
, grp
);
348 #if DEVELOPMENT || DEBUG
358 * Routine: lck_spin_try_lock
364 boolean_t lrval
= (boolean_t
)usimple_lock_try((usimple_lock_t
) lck
, LCK_GRP_NULL
);
365 #if DEVELOPMENT || DEBUG
374 * Routine: lck_spin_assert
377 lck_spin_assert(lck_spin_t
*lock
, unsigned int type
)
379 thread_t thread
, holder
;
382 if (__improbable(type
!= LCK_ASSERT_OWNED
&& type
!= LCK_ASSERT_NOTOWNED
)) {
383 panic("lck_spin_assert(): invalid arg (%u)", type
);
386 state
= lock
->interlock
;
387 holder
= (thread_t
)state
;
388 thread
= current_thread();
389 if (type
== LCK_ASSERT_OWNED
) {
390 if (__improbable(holder
== THREAD_NULL
)) {
391 panic("Lock not owned %p = %lx", lock
, state
);
393 if (__improbable(holder
!= thread
)) {
394 panic("Lock not owned by current thread %p = %lx", lock
, state
);
396 } else if (type
== LCK_ASSERT_NOTOWNED
) {
397 if (__improbable(holder
!= THREAD_NULL
)) {
398 if (holder
== thread
) {
399 panic("Lock owned by current thread %p = %lx", lock
, state
);
401 panic("Lock %p owned by thread %p", lock
, holder
);
408 * Routine: kdp_lck_spin_is_acquired
409 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
410 * Returns: TRUE if lock is acquired.
413 kdp_lck_spin_is_acquired(lck_spin_t
*lck
)
416 panic("panic: spinlock acquired check done outside of kernel debugger");
418 return (lck
->interlock
!= 0)? TRUE
: FALSE
;
422 * Initialize a usimple_lock.
424 * No change in preemption state.
429 __unused
unsigned short tag
)
431 #ifndef MACHINE_SIMPLE_LOCK
432 USLDBG(usld_lock_init(l
, tag
));
433 hw_lock_init(&l
->interlock
);
435 simple_lock_init((simple_lock_t
)l
, tag
);
439 volatile uint32_t spinlock_owner_cpu
= ~0;
440 volatile usimple_lock_t spinlock_timed_out
;
443 spinlock_timeout_NMI(uintptr_t thread_addr
)
447 for (i
= 0; i
< real_ncpus
; i
++) {
448 if ((cpu_data_ptr
[i
] != NULL
) && ((uintptr_t)cpu_data_ptr
[i
]->cpu_active_thread
== thread_addr
)) {
449 spinlock_owner_cpu
= i
;
450 if ((uint32_t) cpu_number() != i
) {
451 /* Cause NMI and panic on the owner's cpu */
452 NMIPI_panic(cpu_to_cpumask(i
), SPINLOCK_TIMEOUT
);
458 return spinlock_owner_cpu
;
462 * Acquire a usimple_lock.
464 * Returns with preemption disabled. Note
465 * that the hw_lock routines are responsible for
466 * maintaining preemption state.
471 LCK_GRP_ARG(lck_grp_t
*grp
))
473 #ifndef MACHINE_SIMPLE_LOCK
477 USLDBG(usld_lock_pre(l
, pc
));
479 if (__improbable(hw_lock_to(&l
->interlock
, LockTimeOutTSC
, grp
) == 0)) {
480 boolean_t uslock_acquired
= FALSE
;
481 while (machine_timeout_suspended()) {
483 if ((uslock_acquired
= hw_lock_to(&l
->interlock
, LockTimeOutTSC
, grp
))) {
488 if (uslock_acquired
== FALSE
) {
490 uintptr_t lowner
= (uintptr_t)l
->interlock
.lock_data
;
491 spinlock_timed_out
= l
;
492 lock_cpu
= spinlock_timeout_NMI(lowner
);
493 panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx, time: %llu",
494 l
, lowner
, current_thread(), lock_cpu
, (uintptr_t)l
->interlock
.lock_data
, mach_absolute_time());
497 #if DEVELOPMENT || DEBUG
501 USLDBG(usld_lock_post(l
, pc
));
503 simple_lock((simple_lock_t
)l
, grp
);
506 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE
, l
, 0, (uintptr_t)LCK_GRP_PROBEARG(grp
));
512 * Release a usimple_lock.
514 * Returns with preemption enabled. Note
515 * that the hw_lock routines are responsible for
516 * maintaining preemption state.
522 #ifndef MACHINE_SIMPLE_LOCK
526 USLDBG(usld_unlock(l
, pc
));
527 #if DEVELOPMENT || DEBUG
530 hw_lock_unlock(&l
->interlock
);
532 simple_unlock_rwmb((simple_lock_t
)l
);
538 * Conditionally acquire a usimple_lock.
540 * On success, returns with preemption disabled.
541 * On failure, returns with preemption in the same state
542 * as when first invoked. Note that the hw_lock routines
543 * are responsible for maintaining preemption state.
545 * XXX No stats are gathered on a miss; I preserved this
546 * behavior from the original assembly-language code, but
547 * doesn't it make sense to log misses? XXX
554 #ifndef MACHINE_SIMPLE_LOCK
555 unsigned int success
;
559 USLDBG(usld_lock_try_pre(l
, pc
));
560 if ((success
= hw_lock_try(&l
->interlock
, grp
))) {
561 #if DEVELOPMENT || DEBUG
564 USLDBG(usld_lock_try_post(l
, pc
));
568 return simple_lock_try((simple_lock_t
)l
, grp
);
573 * Acquire a usimple_lock while polling for pending TLB flushes
574 * and spinning on a lock.
578 usimple_lock_try_lock_loop(usimple_lock_t l
, lck_grp_t
*grp
)
580 boolean_t istate
= ml_get_interrupts_enabled();
581 while (!simple_lock_try(l
, grp
)) {
583 handle_pending_TLB_flushes();
591 * States of a usimple_lock. The default when initializing
592 * a usimple_lock is setting it up for debug checking.
594 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
595 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
596 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
597 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
598 #define USLOCK_CHECKING(l) (uslock_check && \
599 ((l)->debug.state & USLOCK_CHECKED))
602 * Trace activities of a particularly interesting lock.
604 void usl_trace(usimple_lock_t
, int, pc_t
, const char *);
608 * Initialize the debugging information contained
614 __unused
unsigned short tag
)
616 if (l
== USIMPLE_LOCK_NULL
) {
617 panic("lock initialization: null lock pointer");
619 l
->lock_type
= USLOCK_TAG
;
620 l
->debug
.state
= uslock_check
? USLOCK_INITIALIZED
: 0;
621 l
->debug
.lock_cpu
= l
->debug
.unlock_cpu
= 0;
622 l
->debug
.lock_pc
= l
->debug
.unlock_pc
= INVALID_PC
;
623 l
->debug
.lock_thread
= l
->debug
.unlock_thread
= INVALID_THREAD
;
624 l
->debug
.duration
[0] = l
->debug
.duration
[1] = 0;
625 l
->debug
.unlock_cpu
= l
->debug
.unlock_cpu
= 0;
626 l
->debug
.unlock_pc
= l
->debug
.unlock_pc
= INVALID_PC
;
627 l
->debug
.unlock_thread
= l
->debug
.unlock_thread
= INVALID_THREAD
;
632 * These checks apply to all usimple_locks, not just
633 * those with USLOCK_CHECKED turned on.
636 usld_lock_common_checks(
640 if (l
== USIMPLE_LOCK_NULL
) {
641 panic("%s: null lock pointer", caller
);
643 if (l
->lock_type
!= USLOCK_TAG
) {
644 panic("%s: %p is not a usimple lock, 0x%x", caller
, l
, l
->lock_type
);
646 if (!(l
->debug
.state
& USLOCK_INIT
)) {
647 panic("%s: %p is not an initialized lock, 0x%x", caller
, l
, l
->debug
.state
);
649 return USLOCK_CHECKING(l
);
654 * Debug checks on a usimple_lock just before attempting
663 char caller
[] = "usimple_lock";
666 if (!usld_lock_common_checks(l
, caller
)) {
671 * Note that we have a weird case where we are getting a lock when we are]
672 * in the process of putting the system to sleep. We are running with no
673 * current threads, therefore we can't tell if we are trying to retake a lock
674 * we have or someone on the other processor has it. Therefore we just
675 * ignore this test if the locking thread is 0.
678 if ((l
->debug
.state
& USLOCK_TAKEN
) && l
->debug
.lock_thread
&&
679 l
->debug
.lock_thread
== (void *) current_thread()) {
680 printf("%s: lock %p already locked (at %p) by",
681 caller
, l
, l
->debug
.lock_pc
);
682 printf(" current thread %p (new attempt at pc %p)\n",
683 l
->debug
.lock_thread
, pc
);
686 mp_disable_preemption();
687 usl_trace(l
, cpu_number(), pc
, caller
);
688 mp_enable_preemption();
693 * Debug checks on a usimple_lock just after acquiring it.
695 * Pre-emption has been disabled at this point,
696 * so we are safe in using cpu_number.
704 char caller
[] = "successful usimple_lock";
707 if (!usld_lock_common_checks(l
, caller
)) {
711 if (!((l
->debug
.state
& ~USLOCK_TAKEN
) == USLOCK_INITIALIZED
)) {
712 panic("%s: lock %p became uninitialized",
715 if ((l
->debug
.state
& USLOCK_TAKEN
)) {
716 panic("%s: lock 0x%p became TAKEN by someone else",
720 mycpu
= cpu_number();
721 l
->debug
.lock_thread
= (void *)current_thread();
722 l
->debug
.state
|= USLOCK_TAKEN
;
723 l
->debug
.lock_pc
= pc
;
724 l
->debug
.lock_cpu
= mycpu
;
726 usl_trace(l
, mycpu
, pc
, caller
);
731 * Debug checks on a usimple_lock just before
732 * releasing it. Note that the caller has not
733 * yet released the hardware lock.
735 * Preemption is still disabled, so there's
736 * no problem using cpu_number.
744 char caller
[] = "usimple_unlock";
747 if (!usld_lock_common_checks(l
, caller
)) {
751 mycpu
= cpu_number();
753 if (!(l
->debug
.state
& USLOCK_TAKEN
)) {
754 panic("%s: lock 0x%p hasn't been taken",
757 if (l
->debug
.lock_thread
!= (void *) current_thread()) {
758 panic("%s: unlocking lock 0x%p, owned by thread %p",
759 caller
, l
, l
->debug
.lock_thread
);
761 if (l
->debug
.lock_cpu
!= mycpu
) {
762 printf("%s: unlocking lock 0x%p on cpu 0x%x",
764 printf(" (acquired on cpu 0x%x)\n", l
->debug
.lock_cpu
);
767 usl_trace(l
, mycpu
, pc
, caller
);
769 l
->debug
.unlock_thread
= l
->debug
.lock_thread
;
770 l
->debug
.lock_thread
= INVALID_PC
;
771 l
->debug
.state
&= ~USLOCK_TAKEN
;
772 l
->debug
.unlock_pc
= pc
;
773 l
->debug
.unlock_cpu
= mycpu
;
778 * Debug checks on a usimple_lock just before
779 * attempting to acquire it.
781 * Preemption isn't guaranteed to be disabled.
788 char caller
[] = "usimple_lock_try";
790 if (!usld_lock_common_checks(l
, caller
)) {
793 mp_disable_preemption();
794 usl_trace(l
, cpu_number(), pc
, caller
);
795 mp_enable_preemption();
800 * Debug checks on a usimple_lock just after
801 * successfully attempting to acquire it.
803 * Preemption has been disabled by the
804 * lock acquisition attempt, so it's safe
813 char caller
[] = "successful usimple_lock_try";
815 if (!usld_lock_common_checks(l
, caller
)) {
819 if (!((l
->debug
.state
& ~USLOCK_TAKEN
) == USLOCK_INITIALIZED
)) {
820 panic("%s: lock 0x%p became uninitialized",
823 if ((l
->debug
.state
& USLOCK_TAKEN
)) {
824 panic("%s: lock 0x%p became TAKEN by someone else",
828 mycpu
= cpu_number();
829 l
->debug
.lock_thread
= (void *) current_thread();
830 l
->debug
.state
|= USLOCK_TAKEN
;
831 l
->debug
.lock_pc
= pc
;
832 l
->debug
.lock_cpu
= mycpu
;
834 usl_trace(l
, mycpu
, pc
, caller
);
839 * For very special cases, set traced_lock to point to a
840 * specific lock of interest. The result is a series of
841 * XPRs showing lock operations on that lock. The lock_seq
842 * value is used to show the order of those operations.
844 usimple_lock_t traced_lock
;
845 unsigned int lock_seq
;
852 const char * op_name
)
854 if (traced_lock
== l
) {
856 "seq %d, cpu %d, %s @ %x\n",
857 (uintptr_t) lock_seq
, (uintptr_t) mycpu
,
858 (uintptr_t) op_name
, (uintptr_t) pc
, 0);
864 #endif /* USLOCK_DEBUG */
867 * Routine: lck_rw_alloc_init
876 if ((lck
= (lck_rw_t
*)kalloc(sizeof(lck_rw_t
))) != 0) {
877 bzero(lck
, sizeof(lck_rw_t
));
878 lck_rw_init(lck
, grp
, attr
);
885 * Routine: lck_rw_free
892 lck_rw_destroy(lck
, grp
);
893 kfree(lck
, sizeof(lck_rw_t
));
897 * Routine: lck_rw_init
905 lck_attr_t
*lck_attr
= (attr
!= LCK_ATTR_NULL
) ?
906 attr
: &LockDefaultLckAttr
;
908 hw_lock_byte_init(&lck
->lck_rw_interlock
);
909 lck
->lck_rw_want_write
= FALSE
;
910 lck
->lck_rw_want_upgrade
= FALSE
;
911 lck
->lck_rw_shared_count
= 0;
912 lck
->lck_rw_can_sleep
= TRUE
;
913 lck
->lck_r_waiting
= lck
->lck_w_waiting
= 0;
915 lck
->lck_rw_priv_excl
= ((lck_attr
->lck_attr_val
&
916 LCK_ATTR_RW_SHARED_PRIORITY
) == 0);
918 lck_grp_reference(grp
);
919 lck_grp_lckcnt_incr(grp
, LCK_TYPE_RW
);
923 * Routine: lck_rw_destroy
930 if (lck
->lck_rw_tag
== LCK_RW_TAG_DESTROYED
) {
934 lck_rw_assert(lck
, LCK_RW_ASSERT_NOTHELD
);
936 lck
->lck_rw_tag
= LCK_RW_TAG_DESTROYED
;
937 lck_grp_lckcnt_decr(grp
, LCK_TYPE_RW
);
938 lck_grp_deallocate(grp
);
943 * Sleep locks. These use the same data structure and algorithm
944 * as the spin locks, but the process sleeps while it is waiting
945 * for the lock. These work on uniprocessor systems.
948 #define DECREMENTER_TIMEOUT 1000000
951 * We disable interrupts while holding the RW interlock to prevent an
952 * interrupt from exacerbating hold time.
953 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
955 static inline boolean_t
956 lck_interlock_lock(lck_rw_t
*lck
)
960 istate
= ml_set_interrupts_enabled(FALSE
);
961 hw_lock_byte_lock(&lck
->lck_rw_interlock
);
966 lck_interlock_unlock(lck_rw_t
*lck
, boolean_t istate
)
968 hw_lock_byte_unlock(&lck
->lck_rw_interlock
);
969 ml_set_interrupts_enabled(istate
);
973 * This inline is used when busy-waiting for an rw lock.
974 * If interrupts were disabled when the lock primitive was called,
975 * we poll the IPI handler for pending tlb flushes.
976 * XXX This is a hack to avoid deadlocking on the pmap_system_lock.
979 lck_rw_lock_pause(boolean_t interrupts_enabled
)
981 if (!interrupts_enabled
) {
982 handle_pending_TLB_flushes();
987 static inline boolean_t
988 lck_rw_held_read_or_upgrade(lck_rw_t
*lock
)
990 if (ordered_load(&lock
->data
) & (LCK_RW_SHARED_MASK
| LCK_RW_INTERLOCK
| LCK_RW_WANT_UPGRADE
)) {
997 * compute the deadline to spin against when
998 * waiting for a change of state on a lck_rw_t
1000 static inline uint64_t
1001 lck_rw_deadline_for_spin(lck_rw_t
*lck
)
1003 if (lck
->lck_rw_can_sleep
) {
1004 if (lck
->lck_r_waiting
|| lck
->lck_w_waiting
|| lck
->lck_rw_shared_count
> machine_info
.max_cpus
) {
1006 * there are already threads waiting on this lock... this
1007 * implies that they have spun beyond their deadlines waiting for
1008 * the desired state to show up so we will not bother spinning at this time...
1010 * the current number of threads sharing this lock exceeds our capacity to run them
1011 * concurrently and since all states we're going to spin for require the rw_shared_count
1012 * to be at 0, we'll not bother spinning since the latency for this to happen is
1015 return mach_absolute_time();
1017 return mach_absolute_time() + MutexSpin
;
1019 return mach_absolute_time() + (1LL * 1000000000LL);
1025 * Spin while interlock is held.
1029 lck_rw_interlock_spin(lck_rw_t
*lock
)
1031 while (ordered_load(&lock
->data
) & LCK_RW_INTERLOCK
) {
1037 lck_rw_grab_want(lck_rw_t
*lock
)
1039 uint32_t data
, prev
;
1042 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_relaxed
);
1043 if ((data
& LCK_RW_INTERLOCK
) == 0) {
1046 atomic_exchange_abort();
1047 lck_rw_interlock_spin(lock
);
1049 if (data
& LCK_RW_WANT_WRITE
) {
1050 atomic_exchange_abort();
1053 data
|= LCK_RW_WANT_WRITE
;
1054 return atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_relaxed
);
1058 lck_rw_grab_shared(lck_rw_t
*lock
)
1060 uint32_t data
, prev
;
1063 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1064 if ((data
& LCK_RW_INTERLOCK
) == 0) {
1067 atomic_exchange_abort();
1068 lck_rw_interlock_spin(lock
);
1070 if (data
& (LCK_RW_WANT_WRITE
| LCK_RW_WANT_UPGRADE
)) {
1071 if (((data
& LCK_RW_SHARED_MASK
) == 0) || (data
& LCK_RW_PRIV_EXCL
)) {
1072 atomic_exchange_abort();
1076 data
+= LCK_RW_SHARED_READER
;
1077 return atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
);
1081 * Routine: lck_rw_lock_exclusive
1084 lck_rw_lock_exclusive_gen(
1087 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(lck
);
1088 uint64_t deadline
= 0;
1092 wait_result_t res
= 0;
1093 boolean_t istate
= -1;
1096 boolean_t dtrace_ls_initialized
= FALSE
;
1097 boolean_t dtrace_rwl_excl_spin
, dtrace_rwl_excl_block
, dtrace_ls_enabled
= FALSE
;
1098 uint64_t wait_interval
= 0;
1099 int readers_at_sleep
= 0;
1103 * Try to acquire the lck_rw_want_write bit.
1105 while (!lck_rw_grab_want(lck
)) {
1107 if (dtrace_ls_initialized
== FALSE
) {
1108 dtrace_ls_initialized
= TRUE
;
1109 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
1110 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
1111 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
1112 if (dtrace_ls_enabled
) {
1114 * Either sleeping or spinning is happening,
1115 * start a timing of our delay interval now.
1117 readers_at_sleep
= lck
->lck_rw_shared_count
;
1118 wait_interval
= mach_absolute_time();
1123 istate
= ml_get_interrupts_enabled();
1126 deadline
= lck_rw_deadline_for_spin(lck
);
1128 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1130 while (((gotlock
= lck_rw_grab_want(lck
)) == 0) && mach_absolute_time() < deadline
) {
1131 lck_rw_lock_pause(istate
);
1134 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_END
, trace_lck
, 0, 0, gotlock
, 0);
1140 * if we get here, the deadline has expired w/o us
1141 * being able to grab the lock exclusively
1142 * check to see if we're allowed to do a thread_block
1144 if (lck
->lck_rw_can_sleep
) {
1145 istate
= lck_interlock_lock(lck
);
1147 if (lck
->lck_rw_want_write
) {
1148 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1150 lck
->lck_w_waiting
= TRUE
;
1152 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite
);
1153 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
),
1154 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1155 lck_interlock_unlock(lck
, istate
);
1157 if (res
== THREAD_WAITING
) {
1158 res
= thread_block(THREAD_CONTINUE_NULL
);
1161 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_END
, trace_lck
, res
, slept
, 0, 0);
1163 lck
->lck_rw_want_write
= TRUE
;
1164 lck_interlock_unlock(lck
, istate
);
1170 * Wait for readers (and upgrades) to finish...
1171 * the test for these conditions must be done simultaneously with
1172 * a check of the interlock not being held since
1173 * the rw_shared_count will drop to 0 first and then want_upgrade
1174 * will be set to 1 in the shared_to_exclusive scenario... those
1175 * adjustments are done behind the interlock and represent an
1176 * atomic change in state and must be considered as such
1177 * however, once we see the read count at 0, the want_upgrade not set
1178 * and the interlock not held, we are safe to proceed
1180 while (lck_rw_held_read_or_upgrade(lck
)) {
1183 * Either sleeping or spinning is happening, start
1184 * a timing of our delay interval now. If we set it
1185 * to -1 we don't have accurate data so we cannot later
1186 * decide to record a dtrace spin or sleep event.
1188 if (dtrace_ls_initialized
== FALSE
) {
1189 dtrace_ls_initialized
= TRUE
;
1190 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
1191 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
1192 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
1193 if (dtrace_ls_enabled
) {
1195 * Either sleeping or spinning is happening,
1196 * start a timing of our delay interval now.
1198 readers_at_sleep
= lck
->lck_rw_shared_count
;
1199 wait_interval
= mach_absolute_time();
1204 istate
= ml_get_interrupts_enabled();
1207 deadline
= lck_rw_deadline_for_spin(lck
);
1209 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1211 while ((lockheld
= lck_rw_held_read_or_upgrade(lck
)) && mach_absolute_time() < deadline
) {
1212 lck_rw_lock_pause(istate
);
1215 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_END
, trace_lck
, 0, 0, lockheld
, 0);
1221 * if we get here, the deadline has expired w/o us
1222 * being able to grab the lock exclusively
1223 * check to see if we're allowed to do a thread_block
1225 if (lck
->lck_rw_can_sleep
) {
1226 istate
= lck_interlock_lock(lck
);
1228 if (lck
->lck_rw_shared_count
!= 0 || lck
->lck_rw_want_upgrade
) {
1229 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1231 lck
->lck_w_waiting
= TRUE
;
1233 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite
);
1234 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
),
1235 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1236 lck_interlock_unlock(lck
, istate
);
1238 if (res
== THREAD_WAITING
) {
1239 res
= thread_block(THREAD_CONTINUE_NULL
);
1242 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_END
, trace_lck
, res
, slept
, 0, 0);
1244 lck_interlock_unlock(lck
, istate
);
1246 * must own the lock now, since we checked for
1247 * readers or upgrade owner behind the interlock
1248 * no need for a call to 'lck_rw_held_read_or_upgrade'
1257 * Decide what latencies we suffered that are Dtrace events.
1258 * If we have set wait_interval, then we either spun or slept.
1259 * At least we get out from under the interlock before we record
1260 * which is the best we can do here to minimize the impact
1262 * If we have set wait_interval to -1, then dtrace was not enabled when we
1263 * started sleeping/spinning so we don't record this event.
1265 if (dtrace_ls_enabled
== TRUE
) {
1267 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN
, lck
,
1268 mach_absolute_time() - wait_interval
, 1);
1271 * For the blocking case, we also record if when we blocked
1272 * it was held for read or write, and how many readers.
1273 * Notice that above we recorded this before we dropped
1274 * the interlock so the count is accurate.
1276 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK
, lck
,
1277 mach_absolute_time() - wait_interval
, 1,
1278 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1281 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE
, lck
, 1);
1286 * Routine: lck_rw_done
1290 lck_rw_done(lck_rw_t
*lock
)
1292 uint32_t data
, prev
;
1295 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_release_smp
);
1296 if (data
& LCK_RW_INTERLOCK
) { /* wait for interlock to clear */
1297 atomic_exchange_abort();
1298 lck_rw_interlock_spin(lock
);
1301 if (data
& LCK_RW_SHARED_MASK
) {
1302 data
-= LCK_RW_SHARED_READER
;
1303 if ((data
& LCK_RW_SHARED_MASK
) == 0) { /* if reader count has now gone to 0, check for waiters */
1306 } else { /* if reader count == 0, must be exclusive lock */
1307 if (data
& LCK_RW_WANT_UPGRADE
) {
1308 data
&= ~(LCK_RW_WANT_UPGRADE
);
1310 if (data
& LCK_RW_WANT_WRITE
) {
1311 data
&= ~(LCK_RW_WANT_EXCL
);
1312 } else { /* lock is not 'owned', panic */
1313 panic("Releasing non-exclusive RW lock without a reader refcount!");
1317 if (prev
& LCK_RW_W_WAITING
) {
1318 data
&= ~(LCK_RW_W_WAITING
);
1319 if ((prev
& LCK_RW_PRIV_EXCL
) == 0) {
1320 data
&= ~(LCK_RW_R_WAITING
);
1323 data
&= ~(LCK_RW_R_WAITING
);
1326 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_release_smp
)) {
1331 return lck_rw_done_gen(lock
, prev
);
1335 * Routine: lck_rw_done_gen
1337 * called from lck_rw_done()
1338 * prior_lock_state is the value in the 1st
1339 * word of the lock at the time of a successful
1340 * atomic compare and exchange with the new value...
1341 * it represents the state of the lock before we
1342 * decremented the rw_shared_count or cleared either
1343 * rw_want_upgrade or rw_want_write and
1344 * the lck_x_waiting bits... since the wrapper
1345 * routine has already changed the state atomically,
1346 * we just need to decide if we should
1347 * wake up anyone and what value to return... we do
1348 * this by examining the state of the lock before
1351 static lck_rw_type_t
1354 uint32_t prior_lock_state
)
1357 lck_rw_type_t lock_type
;
1359 uint32_t rwlock_count
;
1361 thread
= current_thread();
1362 rwlock_count
= thread
->rwlock_count
--;
1363 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1365 if (lck
->lck_rw_can_sleep
) {
1367 * prior_lock state is a snapshot of the 1st word of the
1368 * lock in question... we'll fake up a pointer to it
1369 * and carefully not access anything beyond whats defined
1370 * in the first word of a lck_rw_t
1373 if (fake_lck
->lck_rw_shared_count
<= 1) {
1374 if (fake_lck
->lck_w_waiting
) {
1375 thread_wakeup(RW_LOCK_WRITER_EVENT(lck
));
1378 if (!(fake_lck
->lck_rw_priv_excl
&& fake_lck
->lck_w_waiting
) && fake_lck
->lck_r_waiting
) {
1379 thread_wakeup(RW_LOCK_READER_EVENT(lck
));
1383 if (rwlock_count
== 0) {
1384 panic("rw lock count underflow for thread %p", thread
);
1387 /* Check if dropping the lock means that we need to unpromote */
1389 if ((rwlock_count
== 1 /* field now 0 */) && (thread
->sched_flags
& TH_SFLAG_RW_PROMOTED
)) {
1390 /* sched_flags checked without lock, but will be rechecked while clearing */
1391 lck_rw_clear_promotion(thread
, unslide_for_kdebug(lck
));
1394 if (fake_lck
->lck_rw_shared_count
) {
1395 lock_type
= LCK_RW_TYPE_SHARED
;
1397 lock_type
= LCK_RW_TYPE_EXCLUSIVE
;
1401 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE
, lck
, lock_type
== LCK_RW_TYPE_SHARED
? 0 : 1);
1409 * Routine: lck_rw_unlock
1414 lck_rw_type_t lck_rw_type
)
1416 if (lck_rw_type
== LCK_RW_TYPE_SHARED
) {
1417 lck_rw_unlock_shared(lck
);
1418 } else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
) {
1419 lck_rw_unlock_exclusive(lck
);
1421 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type
);
1427 * Routine: lck_rw_unlock_shared
1430 lck_rw_unlock_shared(
1435 assertf(lck
->lck_rw_shared_count
> 0, "lck %p has shared_count=0x%x", lck
, lck
->lck_rw_shared_count
);
1436 ret
= lck_rw_done(lck
);
1438 if (ret
!= LCK_RW_TYPE_SHARED
) {
1439 panic("lck_rw_unlock_shared(): lock %p held in mode: %d\n", lck
, ret
);
1445 * Routine: lck_rw_unlock_exclusive
1448 lck_rw_unlock_exclusive(
1453 ret
= lck_rw_done(lck
);
1455 if (ret
!= LCK_RW_TYPE_EXCLUSIVE
) {
1456 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret
);
1462 * Routine: lck_rw_lock
1467 lck_rw_type_t lck_rw_type
)
1469 if (lck_rw_type
== LCK_RW_TYPE_SHARED
) {
1470 lck_rw_lock_shared(lck
);
1471 } else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
) {
1472 lck_rw_lock_exclusive(lck
);
1474 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type
);
1479 * Routine: lck_rw_lock_shared
1482 lck_rw_lock_shared(lck_rw_t
*lock
)
1484 uint32_t data
, prev
;
1486 current_thread()->rwlock_count
++;
1488 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1489 if (data
& (LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
| LCK_RW_INTERLOCK
)) {
1490 atomic_exchange_abort();
1491 if (lock
->lck_rw_can_sleep
) {
1492 lck_rw_lock_shared_gen(lock
);
1499 data
+= LCK_RW_SHARED_READER
;
1500 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
)) {
1506 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE
, lock
, DTRACE_RW_SHARED
);
1507 #endif /* CONFIG_DTRACE */
1512 * Routine: lck_rw_lock_shared_gen
1514 * assembly fast path code has determined that this lock
1515 * is held exclusively... this is where we spin/block
1516 * until we can acquire the lock in the shared mode
1519 lck_rw_lock_shared_gen(
1522 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(lck
);
1523 uint64_t deadline
= 0;
1526 wait_result_t res
= 0;
1527 boolean_t istate
= -1;
1530 uint64_t wait_interval
= 0;
1531 int readers_at_sleep
= 0;
1532 boolean_t dtrace_ls_initialized
= FALSE
;
1533 boolean_t dtrace_rwl_shared_spin
, dtrace_rwl_shared_block
, dtrace_ls_enabled
= FALSE
;
1536 while (!lck_rw_grab_shared(lck
)) {
1538 if (dtrace_ls_initialized
== FALSE
) {
1539 dtrace_ls_initialized
= TRUE
;
1540 dtrace_rwl_shared_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_SPIN
] != 0);
1541 dtrace_rwl_shared_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_BLOCK
] != 0);
1542 dtrace_ls_enabled
= dtrace_rwl_shared_spin
|| dtrace_rwl_shared_block
;
1543 if (dtrace_ls_enabled
) {
1545 * Either sleeping or spinning is happening,
1546 * start a timing of our delay interval now.
1548 readers_at_sleep
= lck
->lck_rw_shared_count
;
1549 wait_interval
= mach_absolute_time();
1554 istate
= ml_get_interrupts_enabled();
1557 deadline
= lck_rw_deadline_for_spin(lck
);
1559 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_START
,
1560 trace_lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, 0, 0);
1562 while (((gotlock
= lck_rw_grab_shared(lck
)) == 0) && mach_absolute_time() < deadline
) {
1563 lck_rw_lock_pause(istate
);
1566 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_END
,
1567 trace_lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, gotlock
, 0);
1573 * if we get here, the deadline has expired w/o us
1574 * being able to grab the lock for read
1575 * check to see if we're allowed to do a thread_block
1577 if (lck
->lck_rw_can_sleep
) {
1578 istate
= lck_interlock_lock(lck
);
1580 if ((lck
->lck_rw_want_write
|| lck
->lck_rw_want_upgrade
) &&
1581 ((lck
->lck_rw_shared_count
== 0) || lck
->lck_rw_priv_excl
)) {
1582 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_START
,
1583 trace_lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, 0, 0);
1585 lck
->lck_r_waiting
= TRUE
;
1587 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead
);
1588 res
= assert_wait(RW_LOCK_READER_EVENT(lck
),
1589 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1590 lck_interlock_unlock(lck
, istate
);
1592 if (res
== THREAD_WAITING
) {
1593 res
= thread_block(THREAD_CONTINUE_NULL
);
1596 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_END
,
1597 trace_lck
, res
, slept
, 0, 0);
1599 lck
->lck_rw_shared_count
++;
1600 lck_interlock_unlock(lck
, istate
);
1607 if (dtrace_ls_enabled
== TRUE
) {
1609 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN
, lck
, mach_absolute_time() - wait_interval
, 0);
1611 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK
, lck
,
1612 mach_absolute_time() - wait_interval
, 0,
1613 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1616 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE
, lck
, 0);
1622 * Routine: lck_rw_lock_exclusive
1626 lck_rw_lock_exclusive(lck_rw_t
*lock
)
1628 current_thread()->rwlock_count
++;
1629 if (atomic_test_and_set32(&lock
->data
,
1630 (LCK_RW_SHARED_MASK
| LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
| LCK_RW_INTERLOCK
),
1631 LCK_RW_WANT_EXCL
, memory_order_acquire_smp
, FALSE
)) {
1633 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE
, lock
, DTRACE_RW_EXCL
);
1634 #endif /* CONFIG_DTRACE */
1636 lck_rw_lock_exclusive_gen(lock
);
1642 * Routine: lck_rw_lock_shared_to_exclusive
1646 lck_rw_lock_shared_to_exclusive(lck_rw_t
*lock
)
1648 uint32_t data
, prev
;
1651 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1652 if (data
& LCK_RW_INTERLOCK
) {
1653 atomic_exchange_abort();
1654 lck_rw_interlock_spin(lock
);
1657 if (data
& LCK_RW_WANT_UPGRADE
) {
1658 data
-= LCK_RW_SHARED_READER
;
1659 if ((data
& LCK_RW_SHARED_MASK
) == 0) { /* we were the last reader */
1660 data
&= ~(LCK_RW_W_WAITING
); /* so clear the wait indicator */
1662 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
)) {
1663 return lck_rw_lock_shared_to_exclusive_failure(lock
, prev
);
1666 data
|= LCK_RW_WANT_UPGRADE
; /* ask for WANT_UPGRADE */
1667 data
-= LCK_RW_SHARED_READER
; /* and shed our read count */
1668 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
)) {
1674 /* we now own the WANT_UPGRADE */
1675 if (data
& LCK_RW_SHARED_MASK
) { /* check to see if all of the readers are drained */
1676 lck_rw_lock_shared_to_exclusive_success(lock
); /* if not, we need to go wait */
1679 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
, lock
, 0);
1686 * Routine: lck_rw_lock_shared_to_exclusive_failure
1688 * assembly fast path code has already dropped our read
1689 * count and determined that someone else owns 'lck_rw_want_upgrade'
1690 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1691 * all we need to do here is determine if a wakeup is needed
1694 lck_rw_lock_shared_to_exclusive_failure(
1696 uint32_t prior_lock_state
)
1699 thread_t thread
= current_thread();
1700 uint32_t rwlock_count
;
1702 /* Check if dropping the lock means that we need to unpromote */
1703 rwlock_count
= thread
->rwlock_count
--;
1705 if (rwlock_count
== 0) {
1706 panic("rw lock count underflow for thread %p", thread
);
1709 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1711 if (fake_lck
->lck_w_waiting
&& fake_lck
->lck_rw_shared_count
== 1) {
1713 * Someone else has requested upgrade.
1714 * Since we've released the read lock, wake
1715 * him up if he's blocked waiting
1717 thread_wakeup(RW_LOCK_WRITER_EVENT(lck
));
1720 if ((rwlock_count
== 1 /* field now 0 */) && (thread
->sched_flags
& TH_SFLAG_RW_PROMOTED
)) {
1721 /* sched_flags checked without lock, but will be rechecked while clearing */
1722 lck_rw_clear_promotion(thread
, unslide_for_kdebug(lck
));
1725 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_CODE
) | DBG_FUNC_NONE
,
1726 VM_KERNEL_UNSLIDE_OR_PERM(lck
), lck
->lck_rw_shared_count
, lck
->lck_rw_want_upgrade
, 0, 0);
1733 * Routine: lck_rw_lock_shared_to_exclusive_failure
1735 * assembly fast path code has already dropped our read
1736 * count and successfully acquired 'lck_rw_want_upgrade'
1737 * we just need to wait for the rest of the readers to drain
1738 * and then we can return as the exclusive holder of this lock
1741 lck_rw_lock_shared_to_exclusive_success(
1744 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(lck
);
1745 uint64_t deadline
= 0;
1747 int still_shared
= 0;
1749 boolean_t istate
= -1;
1752 uint64_t wait_interval
= 0;
1753 int readers_at_sleep
= 0;
1754 boolean_t dtrace_ls_initialized
= FALSE
;
1755 boolean_t dtrace_rwl_shared_to_excl_spin
, dtrace_rwl_shared_to_excl_block
, dtrace_ls_enabled
= FALSE
;
1758 while (lck
->lck_rw_shared_count
!= 0) {
1760 if (dtrace_ls_initialized
== FALSE
) {
1761 dtrace_ls_initialized
= TRUE
;
1762 dtrace_rwl_shared_to_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
] != 0);
1763 dtrace_rwl_shared_to_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
] != 0);
1764 dtrace_ls_enabled
= dtrace_rwl_shared_to_excl_spin
|| dtrace_rwl_shared_to_excl_block
;
1765 if (dtrace_ls_enabled
) {
1767 * Either sleeping or spinning is happening,
1768 * start a timing of our delay interval now.
1770 readers_at_sleep
= lck
->lck_rw_shared_count
;
1771 wait_interval
= mach_absolute_time();
1776 istate
= ml_get_interrupts_enabled();
1779 deadline
= lck_rw_deadline_for_spin(lck
);
1781 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_START
,
1782 trace_lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1784 while ((still_shared
= lck
->lck_rw_shared_count
) && mach_absolute_time() < deadline
) {
1785 lck_rw_lock_pause(istate
);
1788 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_END
,
1789 trace_lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1791 if (!still_shared
) {
1795 * if we get here, the deadline has expired w/o
1796 * the rw_shared_count having drained to 0
1797 * check to see if we're allowed to do a thread_block
1799 if (lck
->lck_rw_can_sleep
) {
1800 istate
= lck_interlock_lock(lck
);
1802 if (lck
->lck_rw_shared_count
!= 0) {
1803 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_START
,
1804 trace_lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1806 lck
->lck_w_waiting
= TRUE
;
1808 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade
);
1809 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
),
1810 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1811 lck_interlock_unlock(lck
, istate
);
1813 if (res
== THREAD_WAITING
) {
1814 res
= thread_block(THREAD_CONTINUE_NULL
);
1817 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_END
,
1818 trace_lck
, res
, slept
, 0, 0);
1820 lck_interlock_unlock(lck
, istate
);
1827 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1829 if (dtrace_ls_enabled
== TRUE
) {
1831 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
, lck
, mach_absolute_time() - wait_interval
, 0);
1833 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
, lck
,
1834 mach_absolute_time() - wait_interval
, 1,
1835 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1838 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
, lck
, 1);
1844 * Routine: lck_rw_lock_exclusive_to_shared
1848 lck_rw_lock_exclusive_to_shared(lck_rw_t
*lock
)
1850 uint32_t data
, prev
;
1853 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_release_smp
);
1854 if (data
& LCK_RW_INTERLOCK
) {
1855 atomic_exchange_abort();
1856 lck_rw_interlock_spin(lock
); /* wait for interlock to clear */
1859 data
+= LCK_RW_SHARED_READER
;
1860 if (data
& LCK_RW_WANT_UPGRADE
) {
1861 data
&= ~(LCK_RW_WANT_UPGRADE
);
1863 data
&= ~(LCK_RW_WANT_EXCL
);
1865 if (!((prev
& LCK_RW_W_WAITING
) && (prev
& LCK_RW_PRIV_EXCL
))) {
1866 data
&= ~(LCK_RW_W_WAITING
);
1868 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_release_smp
)) {
1873 return lck_rw_lock_exclusive_to_shared_gen(lock
, prev
);
1878 * Routine: lck_rw_lock_exclusive_to_shared_gen
1880 * assembly fast path has already dropped
1881 * our exclusive state and bumped lck_rw_shared_count
1882 * all we need to do here is determine if anyone
1883 * needs to be awakened.
1886 lck_rw_lock_exclusive_to_shared_gen(
1888 uint32_t prior_lock_state
)
1890 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(lck
);
1893 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1895 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_START
,
1896 trace_lck
, fake_lck
->lck_rw_want_write
, fake_lck
->lck_rw_want_upgrade
, 0, 0);
1899 * don't wake up anyone waiting to take the lock exclusively
1900 * since we hold a read count... when the read count drops to 0,
1901 * the writers will be woken.
1903 * wake up any waiting readers if we don't have any writers waiting,
1904 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1906 if (!(fake_lck
->lck_rw_priv_excl
&& fake_lck
->lck_w_waiting
) && fake_lck
->lck_r_waiting
) {
1907 thread_wakeup(RW_LOCK_READER_EVENT(lck
));
1910 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_END
,
1911 trace_lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, lck
->lck_rw_shared_count
, 0);
1914 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE
, lck
, 0);
1920 * Routine: lck_rw_try_lock
1925 lck_rw_type_t lck_rw_type
)
1927 if (lck_rw_type
== LCK_RW_TYPE_SHARED
) {
1928 return lck_rw_try_lock_shared(lck
);
1929 } else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
) {
1930 return lck_rw_try_lock_exclusive(lck
);
1932 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type
);
1938 * Routine: lck_rw_try_lock_shared
1942 lck_rw_try_lock_shared(lck_rw_t
*lock
)
1944 uint32_t data
, prev
;
1947 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1948 if (data
& LCK_RW_INTERLOCK
) {
1949 atomic_exchange_abort();
1950 lck_rw_interlock_spin(lock
);
1953 if (data
& (LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
)) {
1954 atomic_exchange_abort();
1955 return FALSE
; /* lock is busy */
1957 data
+= LCK_RW_SHARED_READER
; /* Increment reader refcount */
1958 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
)) {
1963 current_thread()->rwlock_count
++;
1964 /* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
1966 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE
, lock
, DTRACE_RW_SHARED
);
1967 #endif /* CONFIG_DTRACE */
1973 * Routine: lck_rw_try_lock_exclusive
1977 lck_rw_try_lock_exclusive(lck_rw_t
*lock
)
1979 uint32_t data
, prev
;
1982 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1983 if (data
& LCK_RW_INTERLOCK
) {
1984 atomic_exchange_abort();
1985 lck_rw_interlock_spin(lock
);
1988 if (data
& (LCK_RW_SHARED_MASK
| LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
)) {
1989 atomic_exchange_abort();
1990 return FALSE
; /* can't get it */
1992 data
|= LCK_RW_WANT_EXCL
;
1993 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
)) {
1999 current_thread()->rwlock_count
++;
2001 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE
, lock
, DTRACE_RW_EXCL
);
2002 #endif /* CONFIG_DTRACE */
2013 case LCK_RW_ASSERT_SHARED
:
2014 if (lck
->lck_rw_shared_count
!= 0) {
2018 case LCK_RW_ASSERT_EXCLUSIVE
:
2019 if ((lck
->lck_rw_want_write
||
2020 lck
->lck_rw_want_upgrade
) &&
2021 lck
->lck_rw_shared_count
== 0) {
2025 case LCK_RW_ASSERT_HELD
:
2026 if (lck
->lck_rw_want_write
||
2027 lck
->lck_rw_want_upgrade
||
2028 lck
->lck_rw_shared_count
!= 0) {
2032 case LCK_RW_ASSERT_NOTHELD
:
2033 if (!(lck
->lck_rw_want_write
||
2034 lck
->lck_rw_want_upgrade
||
2035 lck
->lck_rw_shared_count
!= 0)) {
2043 panic("rw lock (%p)%s held (mode=%u), first word %08x\n", lck
, (type
== LCK_RW_ASSERT_NOTHELD
? "" : " not"), type
, *(uint32_t *)lck
);
2046 /* On return to userspace, this routine is called if the rwlock_count is somehow imbalanced */
2048 lck_rw_clear_promotions_x86(thread_t thread
)
2051 /* It's fatal to leave a RW lock locked and return to userspace */
2052 panic("%u rw lock(s) held on return to userspace for thread %p", thread
->rwlock_count
, thread
);
2054 /* Paper over the issue */
2055 thread
->rwlock_count
= 0;
2056 lck_rw_clear_promotion(thread
, 0);
2061 lck_rw_lock_yield_shared(lck_rw_t
*lck
, boolean_t force_yield
)
2063 lck_rw_assert(lck
, LCK_RW_ASSERT_SHARED
);
2065 if (lck
->lck_rw_want_write
|| lck
->lck_rw_want_upgrade
|| force_yield
) {
2066 lck_rw_unlock_shared(lck
);
2068 lck_rw_lock_shared(lck
);
2076 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
2077 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2080 kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t
*lck
)
2083 panic("panic: rw lock exclusive check done outside of kernel debugger");
2085 return ((lck
->lck_rw_want_upgrade
|| lck
->lck_rw_want_write
) && (lck
->lck_rw_shared_count
== 0)) ? TRUE
: FALSE
;
2089 * Slow path routines for lck_mtx locking and unlocking functions.
2091 * These functions were previously implemented in x86 assembly,
2092 * and some optimizations are in place in this c code to obtain a compiled code
2093 * as performant and compact as the assembly version.
2095 * To avoid to inline these functions on the fast path, all functions directly called by
2096 * the fast paths have the __attribute__((noinline)) specified. Also they are all implemented
2097 * in such a way the fast path can tail call into them. In this way the return address
2098 * does not need to be pushed on the caller stack and stack optimization can happen on the caller.
2100 * Slow path code is structured in such a way there are no calls to functions that will return
2101 * on the context of the caller function, i.e. all functions called are or tail call functions
2102 * or inline functions. The number of arguments of the tail call functions are less then six,
2103 * so that they can be passed over registers and do not need to be pushed on stack.
2104 * This allows the compiler to not create a stack frame for the functions.
2106 * __improbable and __probable are used to compile the slow path code in such a way
2107 * the fast path case will be on a sequence of instructions with as less jumps as possible,
2108 * to make this case the most optimized even if falling through the slow path.
2112 * Intel lock invariants:
2114 * lck_mtx_waiters: contains the count of threads currently in the mutex waitqueue
2115 * lck_mtx_pri: contains the max priority of all waiters during a contention period
2116 * not cleared on last unlock, but stomped over on next first contention
2117 * lck_mtx_promoted: set when the current lock owner has been promoted
2118 * cleared when lock owner unlocks, set on acquire or wait.
2120 * The lock owner is promoted to the max priority of all its waiters only if it
2121 * was a lower priority when it acquired or was an owner when a waiter waited.
2122 * Max priority is capped at MAXPRI_PROMOTE.
2124 * The last waiter will not be promoted as it is woken up, but the last
2125 * lock owner may not have been the last thread to have been woken up depending on the
2126 * luck of the draw. Therefore a last-owner may still have the promoted-on-wakeup
2129 * TODO: Figure out an algorithm for stopping a lock holder which is already at the right
2130 * priority from dropping priority in the future without having to take thread lock
2135 extern zone_t lck_mtx_zone
;
2139 * Routine: lck_mtx_alloc_init
2148 if ((lck
= (lck_mtx_t
*)zalloc(lck_mtx_zone
)) != 0) {
2149 lck_mtx_init(lck
, grp
, attr
);
2152 if ((lck
= (lck_mtx_t
*)kalloc(sizeof(lck_mtx_t
))) != 0) {
2153 lck_mtx_init(lck
, grp
, attr
);
2160 * Routine: lck_mtx_free
2167 lck_mtx_destroy(lck
, grp
);
2169 zfree(lck_mtx_zone
, lck
);
2171 kfree(lck
, sizeof(lck_mtx_t
));
2176 * Routine: lck_mtx_ext_init
2184 bzero((void *)lck
, sizeof(lck_mtx_ext_t
));
2186 if ((attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2187 lck
->lck_mtx_deb
.type
= MUTEX_TAG
;
2188 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_DEBUG
;
2191 lck
->lck_mtx_grp
= grp
;
2193 if (grp
->lck_grp_attr
& LCK_GRP_ATTR_STAT
) {
2194 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_STAT
;
2197 lck
->lck_mtx
.lck_mtx_is_ext
= 1;
2198 lck
->lck_mtx
.lck_mtx_pad32
= 0xFFFFFFFF;
2202 * Routine: lck_mtx_init
2210 lck_mtx_ext_t
*lck_ext
;
2211 lck_attr_t
*lck_attr
;
2213 if (attr
!= LCK_ATTR_NULL
) {
2216 lck_attr
= &LockDefaultLckAttr
;
2219 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2220 if ((lck_ext
= (lck_mtx_ext_t
*)kalloc(sizeof(lck_mtx_ext_t
))) != 0) {
2221 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
2222 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
2223 lck
->lck_mtx_ptr
= lck_ext
;
2226 lck
->lck_mtx_owner
= 0;
2227 lck
->lck_mtx_state
= 0;
2229 lck
->lck_mtx_pad32
= 0xFFFFFFFF;
2230 lck_grp_reference(grp
);
2231 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
2235 * Routine: lck_mtx_init_ext
2240 lck_mtx_ext_t
*lck_ext
,
2244 lck_attr_t
*lck_attr
;
2246 if (attr
!= LCK_ATTR_NULL
) {
2249 lck_attr
= &LockDefaultLckAttr
;
2252 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2253 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
2254 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
2255 lck
->lck_mtx_ptr
= lck_ext
;
2257 lck
->lck_mtx_owner
= 0;
2258 lck
->lck_mtx_state
= 0;
2260 lck
->lck_mtx_pad32
= 0xFFFFFFFF;
2262 lck_grp_reference(grp
);
2263 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
2267 lck_mtx_lock_mark_destroyed(
2274 /* convert to destroyed state */
2275 ordered_store_mtx_state_release(mutex
, LCK_MTX_TAG_DESTROYED
);
2279 state
= ordered_load_mtx_state(mutex
);
2280 lck_mtx_interlock_lock(mutex
, &state
);
2282 ordered_store_mtx_state_release(mutex
, LCK_MTX_TAG_DESTROYED
);
2284 enable_preemption();
2288 * Routine: lck_mtx_destroy
2297 if (lck
->lck_mtx_tag
== LCK_MTX_TAG_DESTROYED
) {
2301 lck_mtx_assert(lck
, LCK_MTX_ASSERT_NOTOWNED
);
2303 indirect
= (lck
->lck_mtx_tag
== LCK_MTX_TAG_INDIRECT
);
2305 lck_mtx_lock_mark_destroyed(lck
, indirect
);
2308 kfree(lck
->lck_mtx_ptr
, sizeof(lck_mtx_ext_t
));
2310 lck_grp_lckcnt_decr(grp
, LCK_TYPE_MTX
);
2311 lck_grp_deallocate(grp
);
2316 #if DEVELOPMENT | DEBUG
2317 __attribute__((noinline
))
2319 lck_mtx_owner_check_panic(
2322 thread_t owner
= (thread_t
)lock
->lck_mtx_owner
;
2323 panic("Mutex unlock attempted from non-owner thread. Owner=%p lock=%p", owner
, lock
);
2327 __attribute__((always_inline
))
2333 *lock
= &((*lock
)->lck_mtx_ptr
->lck_mtx
);
2334 *state
= ordered_load_mtx_state(*lock
);
2339 * Routine: lck_mtx_unlock_slow
2341 * Unlocks a mutex held by current thread.
2343 * It will wake up waiters if necessary and
2346 * Interlock can be held.
2348 __attribute__((noinline
))
2350 lck_mtx_unlock_slow(
2354 uint32_t state
, prev
;
2355 boolean_t indirect
= FALSE
;
2357 state
= ordered_load_mtx_state(lock
);
2359 /* Is this an indirect mutex? */
2360 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
2361 indirect
= get_indirect_mutex(&lock
, &state
);
2364 thread
= current_thread();
2366 #if DEVELOPMENT | DEBUG
2367 thread_t owner
= (thread_t
)lock
->lck_mtx_owner
;
2368 if (__improbable(owner
!= thread
)) {
2369 return lck_mtx_owner_check_panic(lock
);
2373 /* check if it is held as a spinlock */
2374 if (__improbable((state
& LCK_MTX_MLOCKED_MSK
) == 0)) {
2378 lck_mtx_interlock_lock_clear_flags(lock
, LCK_MTX_MLOCKED_MSK
, &state
);
2381 /* preemption disabled, interlock held and mutex not held */
2384 ordered_store_mtx_owner(lock
, 0);
2385 /* keep original state in prev for later evaluation */
2387 /* release interlock, promotion and clear spin flag */
2388 state
&= (~(LCK_MTX_ILOCKED_MSK
| LCK_MTX_SPIN_MSK
| LCK_MTX_PROMOTED_MSK
));
2389 if ((state
& LCK_MTX_WAITERS_MSK
)) {
2390 state
-= LCK_MTX_WAITER
; /* decrement waiter count */
2392 ordered_store_mtx_state_release(lock
, state
); /* since I own the interlock, I don't need an atomic update */
2395 /* perform lock statistics after drop to prevent delay */
2397 thread
->mutex_count
--; /* lock statistic */
2399 #endif /* MACH_LDEBUG */
2401 /* check if there are waiters to wake up or priority to drop */
2402 if ((prev
& (LCK_MTX_PROMOTED_MSK
| LCK_MTX_WAITERS_MSK
))) {
2403 return lck_mtx_unlock_wakeup_tail(lock
, prev
, indirect
);
2406 /* re-enable preemption */
2407 lck_mtx_unlock_finish_inline(lock
, FALSE
);
2412 #define LCK_MTX_LCK_WAIT_CODE 0x20
2413 #define LCK_MTX_LCK_WAKEUP_CODE 0x21
2414 #define LCK_MTX_LCK_SPIN_CODE 0x22
2415 #define LCK_MTX_LCK_ACQUIRE_CODE 0x23
2416 #define LCK_MTX_LCK_DEMOTE_CODE 0x24
2419 * Routine: lck_mtx_unlock_wakeup_tail
2421 * Invoked on unlock when there is
2422 * contention, i.e. the assembly routine sees
2423 * that mutex->lck_mtx_waiters != 0 or
2424 * that mutex->lck_mtx_promoted != 0
2426 * neither the mutex or interlock is held
2428 * Note that this routine might not be called if there are pending
2429 * waiters which have previously been woken up, and they didn't
2430 * end up boosting the old owner.
2432 * assembly routine previously did the following to mutex:
2433 * (after saving the state in prior_lock_state)
2434 * cleared lck_mtx_promoted
2435 * decremented lck_mtx_waiters if nonzero
2437 * This function needs to be called as a tail call
2438 * to optimize the compiled code.
2440 __attribute__((noinline
))
2442 lck_mtx_unlock_wakeup_tail(
2444 int prior_lock_state
,
2447 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(mutex
);
2451 * prior_lock state is a snapshot of the 2nd word of the
2452 * lock in question... we'll fake up a lock with the bits
2453 * copied into place and carefully not access anything
2454 * beyond whats defined in the second word of a lck_mtx_t
2456 fake_lck
.lck_mtx_state
= prior_lock_state
;
2458 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAKEUP_CODE
) | DBG_FUNC_START
,
2459 trace_lck
, fake_lck
.lck_mtx_promoted
, fake_lck
.lck_mtx_waiters
, fake_lck
.lck_mtx_pri
, 0);
2461 if (__probable(fake_lck
.lck_mtx_waiters
)) {
2462 kern_return_t did_wake
;
2464 if (fake_lck
.lck_mtx_waiters
> 1) {
2465 did_wake
= thread_wakeup_one_with_pri(LCK_MTX_EVENT(mutex
), fake_lck
.lck_mtx_pri
);
2467 did_wake
= thread_wakeup_one(LCK_MTX_EVENT(mutex
));
2470 * The waiters count always precisely matches the number of threads on the waitqueue.
2471 * i.e. we should never see ret == KERN_NOT_WAITING.
2473 assert(did_wake
== KERN_SUCCESS
);
2476 /* When lck_mtx_promoted was set, then I as the owner definitely have a promotion */
2477 if (__improbable(fake_lck
.lck_mtx_promoted
)) {
2478 thread_t thread
= current_thread();
2480 spl_t s
= splsched();
2481 thread_lock(thread
);
2483 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_DEMOTE_CODE
) | DBG_FUNC_NONE
,
2484 thread_tid(thread
), thread
->promotions
, thread
->sched_flags
& TH_SFLAG_PROMOTED
, 0, 0);
2485 assert(thread
->was_promoted_on_wakeup
== 0);
2486 assert(thread
->promotions
> 0);
2488 assert_promotions_invariant(thread
);
2490 if (--thread
->promotions
== 0) {
2491 sched_thread_unpromote(thread
, trace_lck
);
2494 assert_promotions_invariant(thread
);
2496 thread_unlock(thread
);
2500 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAKEUP_CODE
) | DBG_FUNC_END
,
2501 trace_lck
, 0, mutex
->lck_mtx_waiters
, 0, 0);
2503 lck_mtx_unlock_finish_inline(mutex
, indirect
);
2507 * Routine: lck_mtx_lock_acquire_x86
2509 * Invoked on acquiring the mutex when there is
2510 * contention (i.e. the assembly routine sees that
2511 * that mutex->lck_mtx_waiters != 0 or
2512 * thread->was_promoted_on_wakeup != 0)...
2514 * mutex is owned... interlock is held... preemption is disabled
2516 __attribute__((always_inline
))
2518 lck_mtx_lock_acquire_inline(
2521 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(mutex
);
2524 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_ACQUIRE_CODE
) | DBG_FUNC_START
,
2525 trace_lck
, thread
->was_promoted_on_wakeup
, mutex
->lck_mtx_waiters
, mutex
->lck_mtx_pri
, 0);
2527 if (mutex
->lck_mtx_waiters
) {
2528 priority
= mutex
->lck_mtx_pri
;
2530 priority
= 0; /* not worth resetting lck_mtx_pri here, it will be reset by next waiter */
2532 /* the priority must have been set correctly by wait */
2533 assert(priority
<= MAXPRI_PROMOTE
);
2534 assert(priority
== 0 || priority
>= BASEPRI_DEFAULT
);
2536 /* if the mutex wasn't owned, then the owner wasn't promoted */
2537 assert(mutex
->lck_mtx_promoted
== 0);
2539 thread_t thread
= (thread_t
)mutex
->lck_mtx_owner
; /* faster than current_thread() */
2541 if (thread
->sched_pri
< priority
|| thread
->was_promoted_on_wakeup
) {
2542 spl_t s
= splsched();
2543 thread_lock(thread
);
2545 if (thread
->was_promoted_on_wakeup
) {
2546 assert(thread
->promotions
> 0);
2549 /* Intel only promotes if priority goes up */
2550 if (thread
->sched_pri
< priority
&& thread
->promotion_priority
< priority
) {
2551 /* Remember that I need to drop this promotion on unlock */
2552 mutex
->lck_mtx_promoted
= 1;
2554 if (thread
->promotions
++ == 0) {
2555 /* This is the first promotion for the owner */
2556 sched_thread_promote_to_pri(thread
, priority
, trace_lck
);
2559 * Holder was previously promoted due to a different mutex,
2560 * raise to match this one.
2561 * Or, this thread was promoted on wakeup but someone else
2562 * later contended on mutex at higher priority before we got here
2564 sched_thread_update_promotion_to_pri(thread
, priority
, trace_lck
);
2568 if (thread
->was_promoted_on_wakeup
) {
2569 thread
->was_promoted_on_wakeup
= 0;
2570 if (--thread
->promotions
== 0) {
2571 sched_thread_unpromote(thread
, trace_lck
);
2575 thread_unlock(thread
);
2578 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_ACQUIRE_CODE
) | DBG_FUNC_END
,
2579 trace_lck
, 0, mutex
->lck_mtx_waiters
, 0, 0);
2583 lck_mtx_lock_acquire_x86(
2586 return lck_mtx_lock_acquire_inline(mutex
);
2590 * Tail call helpers for lock functions that perform
2591 * lck_mtx_lock_acquire followed by the caller's finish routine, to optimize
2592 * the caller's compiled code.
2595 __attribute__((noinline
))
2597 lck_mtx_lock_acquire_tail(
2601 lck_mtx_lock_acquire_inline(mutex
);
2602 lck_mtx_lock_finish_inline(mutex
, ordered_load_mtx_state(mutex
), indirect
);
2605 __attribute__((noinline
))
2607 lck_mtx_try_lock_acquire_tail(
2610 lck_mtx_lock_acquire_inline(mutex
);
2611 lck_mtx_try_lock_finish_inline(mutex
, ordered_load_mtx_state(mutex
));
2616 __attribute__((noinline
))
2618 lck_mtx_convert_spin_acquire_tail(
2621 lck_mtx_lock_acquire_inline(mutex
);
2622 lck_mtx_convert_spin_finish_inline(mutex
, ordered_load_mtx_state(mutex
));
2629 lck_mtx_ilk_unlock_inline(mutex
, ordered_load_mtx_state(mutex
));
2634 lck_mtx_interlock_lock_set_and_clear_flags(
2638 uint32_t *new_state
)
2640 uint32_t state
, prev
;
2644 /* have to wait for interlock to clear */
2645 while (__improbable(state
& (LCK_MTX_ILOCKED_MSK
| xor_flags
))) {
2647 state
= ordered_load_mtx_state(mutex
);
2649 prev
= state
; /* prev contains snapshot for exchange */
2650 state
|= LCK_MTX_ILOCKED_MSK
| xor_flags
; /* pick up interlock */
2651 state
&= ~and_flags
; /* clear flags */
2653 disable_preemption();
2654 if (atomic_compare_exchange32(&mutex
->lck_mtx_state
, prev
, state
, memory_order_acquire_smp
, FALSE
)) {
2657 enable_preemption();
2659 state
= ordered_load_mtx_state(mutex
);
2666 lck_mtx_interlock_lock_clear_flags(
2669 uint32_t *new_state
)
2671 return lck_mtx_interlock_lock_set_and_clear_flags(mutex
, 0, and_flags
, new_state
);
2675 lck_mtx_interlock_lock(
2677 uint32_t *new_state
)
2679 return lck_mtx_interlock_lock_set_and_clear_flags(mutex
, 0, 0, new_state
);
2683 lck_mtx_interlock_try_lock_set_flags(
2686 uint32_t *new_state
)
2688 uint32_t state
, prev
;
2691 /* have to wait for interlock to clear */
2692 if (state
& (LCK_MTX_ILOCKED_MSK
| or_flags
)) {
2695 prev
= state
; /* prev contains snapshot for exchange */
2696 state
|= LCK_MTX_ILOCKED_MSK
| or_flags
; /* pick up interlock */
2697 disable_preemption();
2698 if (atomic_compare_exchange32(&mutex
->lck_mtx_state
, prev
, state
, memory_order_acquire_smp
, FALSE
)) {
2703 enable_preemption();
2708 lck_mtx_interlock_try_lock(
2710 uint32_t *new_state
)
2712 return lck_mtx_interlock_try_lock_set_flags(mutex
, 0, new_state
);
2716 lck_mtx_interlock_try_lock_disable_interrupts(
2722 *istate
= ml_set_interrupts_enabled(FALSE
);
2723 state
= ordered_load_mtx_state(mutex
);
2725 if (lck_mtx_interlock_try_lock(mutex
, &state
)) {
2728 ml_set_interrupts_enabled(*istate
);
2734 lck_mtx_interlock_unlock_enable_interrupts(
2738 lck_mtx_ilk_unlock(mutex
);
2739 ml_set_interrupts_enabled(istate
);
2742 __attribute__((noinline
))
2744 lck_mtx_lock_contended(
2747 boolean_t
*first_miss
)
2749 lck_mtx_spinwait_ret_type_t ret
;
2756 lck_grp_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, first_miss
);
2759 ret
= lck_mtx_lock_spinwait_x86(lock
);
2760 state
= ordered_load_mtx_state(lock
);
2762 case LCK_MTX_SPINWAIT_NO_SPIN
:
2764 * owner not on core, lck_mtx_lock_spinwait_x86 didn't even
2768 lck_grp_mtx_update_direct_wait((struct _lck_mtx_ext_
*)lock
);
2771 /* just fall through case LCK_MTX_SPINWAIT_SPUN */
2772 case LCK_MTX_SPINWAIT_SPUN
:
2774 * mutex not acquired but lck_mtx_lock_spinwait_x86 tried to spin
2775 * interlock not held
2777 lck_mtx_interlock_lock(lock
, &state
);
2778 assert(state
& LCK_MTX_ILOCKED_MSK
);
2780 if (state
& LCK_MTX_MLOCKED_MSK
) {
2782 lck_grp_mtx_update_wait((struct _lck_mtx_ext_
*)lock
, first_miss
);
2784 lck_mtx_lock_wait_x86(lock
);
2786 * interlock is not held here.
2790 /* grab the mutex */
2791 state
|= LCK_MTX_MLOCKED_MSK
;
2792 ordered_store_mtx_state_release(lock
, state
);
2793 thread
= current_thread();
2794 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
2797 thread
->mutex_count
++;
2799 #endif /* MACH_LDEBUG */
2803 case LCK_MTX_SPINWAIT_ACQUIRED
:
2805 * mutex has been acquired by lck_mtx_lock_spinwait_x86
2806 * interlock is held and preemption disabled
2807 * owner is set and mutex marked as locked
2808 * statistics updated too
2812 panic("lck_mtx_lock_spinwait_x86 returned %d for mutex %p\n", ret
, lock
);
2816 * interlock is already acquired here
2819 /* mutex has been acquired */
2820 thread
= (thread_t
)lock
->lck_mtx_owner
;
2821 if (state
& LCK_MTX_WAITERS_MSK
|| thread
->was_promoted_on_wakeup
) {
2822 return lck_mtx_lock_acquire_tail(lock
, indirect
);
2825 /* release the interlock */
2826 lck_mtx_lock_finish_inline(lock
, ordered_load_mtx_state(lock
), indirect
);
2830 * Helper noinline functions for calling
2831 * panic to optimize compiled code.
2834 __attribute__((noinline
))
2839 panic("trying to interlock destroyed mutex (%p)", lock
);
2842 __attribute__((noinline
))
2844 lck_mtx_try_destroyed(
2847 panic("trying to interlock destroyed mutex (%p)", lock
);
2851 __attribute__((always_inline
))
2853 lck_mtx_lock_wait_interlock_to_clear(
2855 uint32_t* new_state
)
2861 state
= ordered_load_mtx_state(lock
);
2862 if (!(state
& (LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
))) {
2866 if (state
& LCK_MTX_MLOCKED_MSK
) {
2867 /* if it is held as mutex, just fail */
2873 __attribute__((always_inline
))
2875 lck_mtx_try_lock_wait_interlock_to_clear(
2877 uint32_t* new_state
)
2883 state
= ordered_load_mtx_state(lock
);
2884 if (state
& (LCK_MTX_MLOCKED_MSK
| LCK_MTX_SPIN_MSK
)) {
2885 /* if it is held as mutex or spin, just fail */
2888 if (!(state
& LCK_MTX_ILOCKED_MSK
)) {
2896 * Routine: lck_mtx_lock_slow
2898 * Locks a mutex for current thread.
2899 * If the lock is contended this function might
2902 * Called with interlock not held.
2904 __attribute__((noinline
))
2909 boolean_t indirect
= FALSE
;
2913 state
= ordered_load_mtx_state(lock
);
2915 /* is the interlock or mutex held */
2916 if (__improbable(state
& ((LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
)))) {
2918 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
2919 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
2920 * set in state (state == lck_mtx_tag)
2924 /* is the mutex already held and not indirect */
2925 if (__improbable(!(state
& LCK_MTX_ILOCKED_MSK
))) {
2926 /* no, must have been the mutex */
2927 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
2930 /* check to see if it is marked destroyed */
2931 if (__improbable(state
== LCK_MTX_TAG_DESTROYED
)) {
2932 return lck_mtx_destroyed(lock
);
2935 /* Is this an indirect mutex? */
2936 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
2937 indirect
= get_indirect_mutex(&lock
, &state
);
2940 lck_grp_mtx_update_held((struct _lck_mtx_ext_
*)lock
);
2942 if (state
& LCK_MTX_SPIN_MSK
) {
2943 /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
2944 assert(state
& LCK_MTX_ILOCKED_MSK
);
2945 lck_grp_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
2949 if (!lck_mtx_lock_wait_interlock_to_clear(lock
, &state
)) {
2950 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
2954 /* no - can't be INDIRECT, DESTROYED or locked */
2955 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock
, LCK_MTX_MLOCKED_MSK
, &state
))) {
2956 if (!lck_mtx_lock_wait_interlock_to_clear(lock
, &state
)) {
2957 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
2961 /* lock and interlock acquired */
2963 thread_t thread
= current_thread();
2964 /* record owner of mutex */
2965 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
2969 thread
->mutex_count
++; /* lock statistic */
2973 * Check if there are waiters to
2974 * inherit their priority.
2976 if (__improbable(state
& LCK_MTX_WAITERS_MSK
)) {
2977 return lck_mtx_lock_acquire_tail(lock
, indirect
);
2980 /* release the interlock */
2981 lck_mtx_lock_finish_inline(lock
, ordered_load_mtx_state(lock
), indirect
);
2986 __attribute__((noinline
))
2988 lck_mtx_try_lock_slow(
2991 boolean_t indirect
= FALSE
;
2995 state
= ordered_load_mtx_state(lock
);
2997 /* is the interlock or mutex held */
2998 if (__improbable(state
& ((LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
)))) {
3000 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
3001 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
3002 * set in state (state == lck_mtx_tag)
3005 /* is the mutex already held and not indirect */
3006 if (__improbable(!(state
& LCK_MTX_ILOCKED_MSK
))) {
3010 /* check to see if it is marked destroyed */
3011 if (__improbable(state
== LCK_MTX_TAG_DESTROYED
)) {
3012 return lck_mtx_try_destroyed(lock
);
3015 /* Is this an indirect mutex? */
3016 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
3017 indirect
= get_indirect_mutex(&lock
, &state
);
3020 lck_grp_mtx_update_held((struct _lck_mtx_ext_
*)lock
);
3023 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock
, &state
)) {
3025 lck_grp_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
3031 /* no - can't be INDIRECT, DESTROYED or locked */
3032 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock
, LCK_MTX_MLOCKED_MSK
, &state
))) {
3033 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock
, &state
)) {
3035 lck_grp_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
3041 /* lock and interlock acquired */
3043 thread_t thread
= current_thread();
3044 /* record owner of mutex */
3045 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
3049 thread
->mutex_count
++; /* lock statistic */
3053 * Check if there are waiters to
3054 * inherit their priority.
3056 if (__improbable(state
& LCK_MTX_WAITERS_MSK
)) {
3057 return lck_mtx_try_lock_acquire_tail(lock
);
3060 /* release the interlock */
3061 lck_mtx_try_lock_finish_inline(lock
, ordered_load_mtx_state(lock
));
3066 __attribute__((noinline
))
3068 lck_mtx_lock_spin_slow(
3071 boolean_t indirect
= FALSE
;
3075 state
= ordered_load_mtx_state(lock
);
3077 /* is the interlock or mutex held */
3078 if (__improbable(state
& ((LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
)))) {
3080 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
3081 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
3082 * set in state (state == lck_mtx_tag)
3086 /* is the mutex already held and not indirect */
3087 if (__improbable(!(state
& LCK_MTX_ILOCKED_MSK
))) {
3088 /* no, must have been the mutex */
3089 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
3092 /* check to see if it is marked destroyed */
3093 if (__improbable(state
== LCK_MTX_TAG_DESTROYED
)) {
3094 return lck_mtx_destroyed(lock
);
3097 /* Is this an indirect mutex? */
3098 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
3099 indirect
= get_indirect_mutex(&lock
, &state
);
3102 lck_grp_mtx_update_held((struct _lck_mtx_ext_
*)lock
);
3104 if (state
& LCK_MTX_SPIN_MSK
) {
3105 /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
3106 assert(state
& LCK_MTX_ILOCKED_MSK
);
3107 lck_grp_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
3111 if (!lck_mtx_lock_wait_interlock_to_clear(lock
, &state
)) {
3112 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
3116 /* no - can't be INDIRECT, DESTROYED or locked */
3117 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock
, LCK_MTX_SPIN_MSK
, &state
))) {
3118 if (!lck_mtx_lock_wait_interlock_to_clear(lock
, &state
)) {
3119 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
3123 /* lock as spinlock and interlock acquired */
3125 thread_t thread
= current_thread();
3126 /* record owner of mutex */
3127 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
3131 thread
->mutex_count
++; /* lock statistic */
3136 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE
, lock
, 0);
3138 /* return with the interlock held and preemption disabled */
3142 __attribute__((noinline
))
3144 lck_mtx_try_lock_spin_slow(
3147 boolean_t indirect
= FALSE
;
3151 state
= ordered_load_mtx_state(lock
);
3153 /* is the interlock or mutex held */
3154 if (__improbable(state
& ((LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
)))) {
3156 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
3157 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
3158 * set in state (state == lck_mtx_tag)
3161 /* is the mutex already held and not indirect */
3162 if (__improbable(!(state
& LCK_MTX_ILOCKED_MSK
))) {
3166 /* check to see if it is marked destroyed */
3167 if (__improbable(state
== LCK_MTX_TAG_DESTROYED
)) {
3168 return lck_mtx_try_destroyed(lock
);
3171 /* Is this an indirect mutex? */
3172 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
3173 indirect
= get_indirect_mutex(&lock
, &state
);
3176 lck_grp_mtx_update_held((struct _lck_mtx_ext_
*)lock
);
3179 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock
, &state
)) {
3181 lck_grp_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
3187 /* no - can't be INDIRECT, DESTROYED or locked */
3188 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock
, LCK_MTX_SPIN_MSK
, &state
))) {
3189 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock
, &state
)) {
3191 lck_grp_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
3197 /* lock and interlock acquired */
3199 thread_t thread
= current_thread();
3200 /* record owner of mutex */
3201 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
3205 thread
->mutex_count
++; /* lock statistic */
3210 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE
, lock
, 0);
3215 __attribute__((noinline
))
3217 lck_mtx_convert_spin(
3222 state
= ordered_load_mtx_state(lock
);
3224 /* Is this an indirect mutex? */
3225 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
3226 /* If so, take indirection */
3227 get_indirect_mutex(&lock
, &state
);
3230 assertf((thread_t
)lock
->lck_mtx_owner
== current_thread(), "lock %p not owned by thread %p (current owner %p)", lock
, current_thread(), (thread_t
)lock
->lck_mtx_owner
);
3232 if (__improbable(state
& LCK_MTX_MLOCKED_MSK
)) {
3233 /* already owned as a mutex, just return */
3237 assert(get_preemption_level() > 0);
3238 assert(state
& LCK_MTX_ILOCKED_MSK
);
3239 assert(state
& LCK_MTX_SPIN_MSK
);
3242 * Check if there are waiters to
3243 * inherit their priority.
3245 if (__improbable(state
& LCK_MTX_WAITERS_MSK
)) {
3246 return lck_mtx_convert_spin_acquire_tail(lock
);
3249 lck_mtx_convert_spin_finish_inline(lock
, ordered_load_mtx_state(lock
));
3254 static inline boolean_t
3255 lck_mtx_lock_grab_mutex(
3260 state
= ordered_load_mtx_state(lock
);
3262 if (!lck_mtx_interlock_try_lock_set_flags(lock
, LCK_MTX_MLOCKED_MSK
, &state
)) {
3266 /* lock and interlock acquired */
3268 thread_t thread
= current_thread();
3269 /* record owner of mutex */
3270 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
3274 thread
->mutex_count
++; /* lock statistic */
3280 __attribute__((noinline
))
3286 thread_t thread
, owner
;
3289 thread
= current_thread();
3290 state
= ordered_load_mtx_state(lock
);
3292 if (state
== LCK_MTX_TAG_INDIRECT
) {
3293 get_indirect_mutex(&lock
, &state
);
3296 owner
= (thread_t
)lock
->lck_mtx_owner
;
3298 if (type
== LCK_MTX_ASSERT_OWNED
) {
3299 if (owner
!= thread
|| !(state
& (LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
))) {
3300 panic("mutex (%p) not owned\n", lock
);
3303 assert(type
== LCK_MTX_ASSERT_NOTOWNED
);
3304 if (owner
== thread
) {
3305 panic("mutex (%p) owned\n", lock
);
3311 * Routine: lck_mtx_lock_spinwait_x86
3313 * Invoked trying to acquire a mutex when there is contention but
3314 * the holder is running on another processor. We spin for up to a maximum
3315 * time waiting for the lock to be released.
3317 * Called with the interlock unlocked.
3318 * returns LCK_MTX_SPINWAIT_ACQUIRED if mutex acquired
3319 * returns LCK_MTX_SPINWAIT_SPUN if we spun
3320 * returns LCK_MTX_SPINWAIT_NO_SPIN if we didn't spin due to the holder not running
3322 __attribute__((noinline
))
3323 lck_mtx_spinwait_ret_type_t
3324 lck_mtx_lock_spinwait_x86(
3327 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(mutex
);
3329 uint64_t overall_deadline
;
3330 uint64_t check_owner_deadline
;
3332 lck_mtx_spinwait_ret_type_t retval
= LCK_MTX_SPINWAIT_SPUN
;
3335 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_SPIN_CODE
) | DBG_FUNC_START
,
3336 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(mutex
->lck_mtx_owner
), mutex
->lck_mtx_waiters
, 0, 0);
3338 cur_time
= mach_absolute_time();
3339 overall_deadline
= cur_time
+ MutexSpin
;
3340 check_owner_deadline
= cur_time
;
3344 * - mutex is locked, and
3345 * - its locked as a spin lock, and
3346 * - owner is running on another processor, and
3347 * - owner (processor) is not idling, and
3348 * - we haven't spun for long enough.
3351 if (__probable(lck_mtx_lock_grab_mutex(mutex
))) {
3352 retval
= LCK_MTX_SPINWAIT_ACQUIRED
;
3355 cur_time
= mach_absolute_time();
3357 if (cur_time
>= overall_deadline
) {
3361 if (cur_time
>= check_owner_deadline
&& mutex
->lck_mtx_owner
) {
3365 * We will repeatedly peek at the state of the lock while spinning,
3366 * and we will acquire the interlock to do so.
3367 * The thread that will unlock the mutex will also need to acquire
3368 * the interlock, and we want to avoid to slow it down.
3369 * To avoid to get an interrupt while holding the interlock
3370 * and increase the time we are holding it, we
3371 * will try to acquire the interlock with interrupts disabled.
3372 * This is safe because it is a "try_lock", if we can't acquire
3373 * the interlock we re-enable the interrupts and fail, so it is
3374 * ok to call it even if the interlock was already held.
3376 if (lck_mtx_interlock_try_lock_disable_interrupts(mutex
, &istate
)) {
3377 if ((holder
= (thread_t
) mutex
->lck_mtx_owner
) != NULL
) {
3378 if (!(holder
->machine
.specFlags
& OnProc
) ||
3379 (holder
->state
& TH_IDLE
)) {
3380 lck_mtx_interlock_unlock_enable_interrupts(mutex
, istate
);
3382 if (loopcount
== 0) {
3383 retval
= LCK_MTX_SPINWAIT_NO_SPIN
;
3388 lck_mtx_interlock_unlock_enable_interrupts(mutex
, istate
);
3390 check_owner_deadline
= cur_time
+ (MutexSpin
/ 4);
3400 * We've already kept a count via overall_deadline of how long we spun.
3401 * If dtrace is active, then we compute backwards to decide how
3404 * Note that we record a different probe id depending on whether
3405 * this is a direct or indirect mutex. This allows us to
3406 * penalize only lock groups that have debug/stats enabled
3407 * with dtrace processing if desired.
3409 if (__probable(mutex
->lck_mtx_is_ext
== 0)) {
3410 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN
, mutex
,
3411 mach_absolute_time() - (overall_deadline
- MutexSpin
));
3413 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN
, mutex
,
3414 mach_absolute_time() - (overall_deadline
- MutexSpin
));
3416 /* The lockstat acquire event is recorded by the assembly code beneath us. */
3419 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_SPIN_CODE
) | DBG_FUNC_END
,
3420 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(mutex
->lck_mtx_owner
), mutex
->lck_mtx_waiters
, retval
, 0);
3428 * Routine: lck_mtx_lock_wait_x86
3430 * Invoked in order to wait on contention.
3432 * Called with the interlock locked and
3433 * preemption disabled...
3434 * returns it unlocked and with preemption enabled
3436 * lck_mtx_waiters is 1:1 with a wakeup needing to occur.
3437 * A runnable waiter can exist between wait and acquire
3438 * without a waiters count being set.
3439 * This allows us to never make a spurious wakeup call.
3442 * This avoids taking the thread lock if the owning thread is the same priority.
3443 * This optimizes the case of same-priority threads contending on a lock.
3444 * However, that allows the owning thread to drop in priority while holding the lock,
3445 * because there is no state that the priority change can notice that
3446 * says that the targeted thread holds a contended mutex.
3448 * One possible solution: priority changes could look for some atomic tag
3449 * on the thread saying 'holding contended lock', and then set up a promotion.
3450 * Needs a story for dropping that promotion - the last contended unlock
3451 * has to notice that this has happened.
3453 __attribute__((noinline
))
3455 lck_mtx_lock_wait_x86(
3459 uint64_t sleep_start
= 0;
3461 if (lockstat_probemap
[LS_LCK_MTX_LOCK_BLOCK
] || lockstat_probemap
[LS_LCK_MTX_EXT_LOCK_BLOCK
]) {
3462 sleep_start
= mach_absolute_time();
3465 thread_t self
= current_thread();
3466 assert(self
->waiting_for_mutex
== NULL
);
3468 self
->waiting_for_mutex
= mutex
;
3470 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(mutex
);
3472 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAIT_CODE
) | DBG_FUNC_START
,
3473 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(mutex
->lck_mtx_owner
),
3474 mutex
->lck_mtx_waiters
, mutex
->lck_mtx_pri
, 0);
3476 integer_t waiter_pri
= self
->sched_pri
;
3477 waiter_pri
= MAX(waiter_pri
, self
->base_pri
);
3478 waiter_pri
= MAX(waiter_pri
, BASEPRI_DEFAULT
);
3479 waiter_pri
= MIN(waiter_pri
, MAXPRI_PROMOTE
);
3481 assert(mutex
->lck_mtx_pri
<= MAXPRI_PROMOTE
);
3483 /* Re-initialize lck_mtx_pri if this is the first contention */
3484 if (mutex
->lck_mtx_waiters
== 0 || mutex
->lck_mtx_pri
<= waiter_pri
) {
3485 mutex
->lck_mtx_pri
= waiter_pri
;
3488 thread_t holder
= (thread_t
)mutex
->lck_mtx_owner
;
3490 assert(holder
!= NULL
);
3493 * Intel only causes a promotion when priority needs to change,
3494 * reducing thread lock holds but leaving us vulnerable to the holder
3495 * dropping priority.
3497 if (holder
->sched_pri
< mutex
->lck_mtx_pri
) {
3498 int promote_pri
= mutex
->lck_mtx_pri
;
3500 spl_t s
= splsched();
3501 thread_lock(holder
);
3503 /* Check again in case sched_pri changed */
3504 if (holder
->sched_pri
< promote_pri
&& holder
->promotion_priority
< promote_pri
) {
3505 if (mutex
->lck_mtx_promoted
== 0) {
3506 /* This is the first promotion for this mutex */
3507 mutex
->lck_mtx_promoted
= 1;
3509 if (holder
->promotions
++ == 0) {
3510 /* This is the first promotion for holder */
3511 sched_thread_promote_to_pri(holder
, promote_pri
, trace_lck
);
3514 * Holder was previously promoted due to a different mutex,
3515 * check if it needs to raise to match this one
3517 sched_thread_update_promotion_to_pri(holder
, promote_pri
,
3522 * Holder was previously promoted due to this mutex,
3523 * check if the pri needs to go up
3525 sched_thread_update_promotion_to_pri(holder
, promote_pri
, trace_lck
);
3529 thread_unlock(holder
);
3533 mutex
->lck_mtx_waiters
++;
3535 thread_set_pending_block_hint(self
, kThreadWaitKernelMutex
);
3536 assert_wait(LCK_MTX_EVENT(mutex
), THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
3538 lck_mtx_ilk_unlock(mutex
);
3540 thread_block(THREAD_CONTINUE_NULL
);
3542 self
->waiting_for_mutex
= NULL
;
3544 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAIT_CODE
) | DBG_FUNC_END
,
3545 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(mutex
->lck_mtx_owner
),
3546 mutex
->lck_mtx_waiters
, mutex
->lck_mtx_pri
, 0);
3550 * Record the Dtrace lockstat probe for blocking, block time
3551 * measured from when we were entered.
3554 if (mutex
->lck_mtx_is_ext
== 0) {
3555 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK
, mutex
,
3556 mach_absolute_time() - sleep_start
);
3558 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK
, mutex
,
3559 mach_absolute_time() - sleep_start
);
3566 * Routine: kdp_lck_mtx_lock_spin_is_acquired
3567 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
3568 * Returns: TRUE if lock is acquired.
3571 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t
*lck
)
3574 panic("panic: kdp_lck_mtx_lock_spin_is_acquired called outside of kernel debugger");
3577 if (lck
->lck_mtx_ilocked
|| lck
->lck_mtx_mlocked
) {
3585 kdp_lck_mtx_find_owner(__unused
struct waitq
* waitq
, event64_t event
, thread_waitinfo_t
* waitinfo
)
3587 lck_mtx_t
* mutex
= LCK_EVENT_TO_MUTEX(event
);
3588 waitinfo
->context
= VM_KERNEL_UNSLIDE_OR_PERM(mutex
);
3589 thread_t holder
= (thread_t
)mutex
->lck_mtx_owner
;
3590 waitinfo
->owner
= thread_tid(holder
);
3594 kdp_rwlck_find_owner(__unused
struct waitq
* waitq
, event64_t event
, thread_waitinfo_t
* waitinfo
)
3596 lck_rw_t
*rwlck
= NULL
;
3597 switch (waitinfo
->wait_type
) {
3598 case kThreadWaitKernelRWLockRead
:
3599 rwlck
= READ_EVENT_TO_RWLOCK(event
);
3601 case kThreadWaitKernelRWLockWrite
:
3602 case kThreadWaitKernelRWLockUpgrade
:
3603 rwlck
= WRITE_EVENT_TO_RWLOCK(event
);
3606 panic("%s was called with an invalid blocking type", __FUNCTION__
);
3609 waitinfo
->context
= VM_KERNEL_UNSLIDE_OR_PERM(rwlck
);
3610 waitinfo
->owner
= 0;