2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Locking primitives implementation
64 #define ATOMIC_PRIVATE 1
65 #define LOCK_PRIVATE 1
67 #include <mach_ldebug.h>
69 #include <kern/locks.h>
70 #include <kern/kalloc.h>
71 #include <kern/misc_protos.h>
72 #include <kern/thread.h>
73 #include <kern/processor.h>
74 #include <kern/cpu_data.h>
75 #include <kern/cpu_number.h>
76 #include <kern/sched_prim.h>
78 #include <kern/debug.h>
81 #include <i386/machine_routines.h> /* machine_timeout_suspended() */
82 #include <machine/atomic.h>
83 #include <machine/machine_cpu.h>
85 #include <machine/atomic.h>
86 #include <sys/kdebug.h>
87 #include <i386/locks_i386_inlines.h>
90 * We need only enough declarations from the BSD-side to be able to
91 * test if our probe is active, and to call __dtrace_probe(). Setting
92 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
95 #define NEED_DTRACE_DEFS
96 #include <../bsd/sys/lockstat.h>
98 #define DTRACE_RW_SHARED 0x0 //reader
99 #define DTRACE_RW_EXCL 0x1 //writer
100 #define DTRACE_NO_FLAG 0x0 //not applicable
104 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
105 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
106 #define LCK_RW_LCK_SHARED_CODE 0x102
107 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
108 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
109 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
111 #define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
112 #define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
113 #define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
114 #define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
115 #define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
116 #define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
117 #define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
118 #define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
121 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
123 unsigned int LcksOpts
=0;
125 #if DEVELOPMENT || DEBUG
126 unsigned int LckDisablePreemptCheck
= 0;
133 * Perform simple lock checks.
135 int uslock_check
= 1;
136 int max_lock_loops
= 100000000;
137 decl_simple_lock_data(extern , printf_lock
)
138 decl_simple_lock_data(extern , panic_lock
)
139 #endif /* USLOCK_DEBUG */
141 extern unsigned int not_in_kdp
;
144 * We often want to know the addresses of the callers
145 * of the various lock routines. However, this information
146 * is only used for debugging and statistics.
149 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
150 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
152 #define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
153 #define DECL_PC(pc) pc_t pc;
154 #else /* ANY_LOCK_DEBUG */
158 * Eliminate lint complaints about unused local pc variables.
160 #define OBTAIN_PC(pc) ++pc
162 #define OBTAIN_PC(pc)
164 #endif /* USLOCK_DEBUG */
167 * atomic exchange API is a low level abstraction of the operations
168 * to atomically read, modify, and write a pointer. This abstraction works
169 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
170 * well as the ARM exclusive instructions.
172 * atomic_exchange_begin() - begin exchange and retrieve current value
173 * atomic_exchange_complete() - conclude an exchange
174 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
177 atomic_exchange_begin32(uint32_t *target
, uint32_t *previous
, enum memory_order ord
)
181 (void)ord
; // Memory order not used
182 val
= __c11_atomic_load((_Atomic
uint32_t *)target
, memory_order_relaxed
);
188 atomic_exchange_complete32(uint32_t *target
, uint32_t previous
, uint32_t newval
, enum memory_order ord
)
190 return __c11_atomic_compare_exchange_strong((_Atomic
uint32_t *)target
, &previous
, newval
, ord
, memory_order_relaxed
);
194 atomic_exchange_abort(void) { }
197 atomic_test_and_set32(uint32_t *target
, uint32_t test_mask
, uint32_t set_mask
, enum memory_order ord
, boolean_t wait
)
199 uint32_t value
, prev
;
202 value
= atomic_exchange_begin32(target
, &prev
, ord
);
203 if (value
& test_mask
) {
207 atomic_exchange_abort();
211 if (atomic_exchange_complete32(target
, prev
, value
, ord
))
217 * Portable lock package implementation of usimple_locks.
221 #define USLDBG(stmt) stmt
222 void usld_lock_init(usimple_lock_t
, unsigned short);
223 void usld_lock_pre(usimple_lock_t
, pc_t
);
224 void usld_lock_post(usimple_lock_t
, pc_t
);
225 void usld_unlock(usimple_lock_t
, pc_t
);
226 void usld_lock_try_pre(usimple_lock_t
, pc_t
);
227 void usld_lock_try_post(usimple_lock_t
, pc_t
);
228 int usld_lock_common_checks(usimple_lock_t
, char *);
229 #else /* USLOCK_DEBUG */
231 #endif /* USLOCK_DEBUG */
234 * Forward definitions
237 static void lck_rw_lock_shared_gen(lck_rw_t
*lck
);
238 static void lck_rw_lock_exclusive_gen(lck_rw_t
*lck
);
239 static boolean_t
lck_rw_lock_shared_to_exclusive_success(lck_rw_t
*lck
);
240 static boolean_t
lck_rw_lock_shared_to_exclusive_failure(lck_rw_t
*lck
, uint32_t prior_lock_state
);
241 static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t
*lck
, uint32_t prior_lock_state
);
242 static lck_rw_type_t
lck_rw_done_gen(lck_rw_t
*lck
, uint32_t prior_lock_state
);
243 void lck_rw_clear_promotions_x86(thread_t thread
);
244 static boolean_t
lck_rw_held_read_or_upgrade(lck_rw_t
*lock
);
245 static boolean_t
lck_rw_grab_want(lck_rw_t
*lock
);
246 static boolean_t
lck_rw_grab_shared(lck_rw_t
*lock
);
247 static void lck_mtx_unlock_wakeup_tail(lck_mtx_t
*mutex
, int prior_lock_state
, boolean_t indirect
);
248 static void lck_mtx_interlock_lock(lck_mtx_t
*mutex
, uint32_t *new_state
);
249 static void lck_mtx_interlock_lock_clear_flags(lck_mtx_t
*mutex
, uint32_t and_flags
, uint32_t *new_state
);
250 static int lck_mtx_interlock_try_lock(lck_mtx_t
*mutex
, uint32_t *new_state
);
251 static int lck_mtx_interlock_try_lock_set_flags(lck_mtx_t
*mutex
, uint32_t or_flags
, uint32_t *new_state
);
252 static boolean_t
lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t
*lock
, uint32_t *new_state
);
253 static boolean_t
lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t
*lock
, uint32_t *new_state
);
257 * Routine: lck_spin_alloc_init
266 if ((lck
= (lck_spin_t
*)kalloc(sizeof(lck_spin_t
))) != 0)
267 lck_spin_init(lck
, grp
, attr
);
273 * Routine: lck_spin_free
280 lck_spin_destroy(lck
, grp
);
281 kfree(lck
, sizeof(lck_spin_t
));
285 * Routine: lck_spin_init
291 __unused lck_attr_t
*attr
)
293 usimple_lock_init((usimple_lock_t
) lck
, 0);
294 lck_grp_reference(grp
);
295 lck_grp_lckcnt_incr(grp
, LCK_TYPE_SPIN
);
299 * Routine: lck_spin_destroy
306 if (lck
->interlock
== LCK_SPIN_TAG_DESTROYED
)
308 lck
->interlock
= LCK_SPIN_TAG_DESTROYED
;
309 lck_grp_lckcnt_decr(grp
, LCK_TYPE_SPIN
);
310 lck_grp_deallocate(grp
);
315 * Routine: lck_spin_lock
321 usimple_lock((usimple_lock_t
) lck
);
325 * Routine: lck_spin_unlock
331 usimple_unlock((usimple_lock_t
) lck
);
336 * Routine: lck_spin_try_lock
342 boolean_t lrval
= (boolean_t
)usimple_lock_try((usimple_lock_t
) lck
);
343 #if DEVELOPMENT || DEBUG
352 * Routine: lck_spin_assert
355 lck_spin_assert(lck_spin_t
*lock
, unsigned int type
)
357 thread_t thread
, holder
;
360 if (__improbable(type
!= LCK_ASSERT_OWNED
&& type
!= LCK_ASSERT_NOTOWNED
)) {
361 panic("lck_spin_assert(): invalid arg (%u)", type
);
364 state
= lock
->interlock
;
365 holder
= (thread_t
)state
;
366 thread
= current_thread();
367 if (type
== LCK_ASSERT_OWNED
) {
368 if (__improbable(holder
== THREAD_NULL
)) {
369 panic("Lock not owned %p = %lx", lock
, state
);
371 if (__improbable(holder
!= thread
)) {
372 panic("Lock not owned by current thread %p = %lx", lock
, state
);
374 } else if (type
== LCK_ASSERT_NOTOWNED
) {
375 if (__improbable(holder
!= THREAD_NULL
)) {
376 if (holder
== thread
) {
377 panic("Lock owned by current thread %p = %lx", lock
, state
);
379 panic("Lock %p owned by thread %p", lock
, holder
);
386 * Routine: kdp_lck_spin_is_acquired
387 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
388 * Returns: TRUE if lock is acquired.
391 kdp_lck_spin_is_acquired(lck_spin_t
*lck
) {
393 panic("panic: spinlock acquired check done outside of kernel debugger");
395 return (lck
->interlock
!= 0)? TRUE
: FALSE
;
399 * Initialize a usimple_lock.
401 * No change in preemption state.
406 __unused
unsigned short tag
)
408 #ifndef MACHINE_SIMPLE_LOCK
409 USLDBG(usld_lock_init(l
, tag
));
410 hw_lock_init(&l
->interlock
);
412 simple_lock_init((simple_lock_t
)l
,tag
);
416 volatile uint32_t spinlock_owner_cpu
= ~0;
417 volatile usimple_lock_t spinlock_timed_out
;
419 uint32_t spinlock_timeout_NMI(uintptr_t thread_addr
) {
422 for (i
= 0; i
< real_ncpus
; i
++) {
423 if ((cpu_data_ptr
[i
] != NULL
) && ((uintptr_t)cpu_data_ptr
[i
]->cpu_active_thread
== thread_addr
)) {
424 spinlock_owner_cpu
= i
;
425 if ((uint32_t) cpu_number() != i
) {
426 /* Cause NMI and panic on the owner's cpu */
427 NMIPI_panic(cpu_to_cpumask(i
), SPINLOCK_TIMEOUT
);
433 return spinlock_owner_cpu
;
437 * Acquire a usimple_lock.
439 * Returns with preemption disabled. Note
440 * that the hw_lock routines are responsible for
441 * maintaining preemption state.
447 #ifndef MACHINE_SIMPLE_LOCK
451 USLDBG(usld_lock_pre(l
, pc
));
453 if(__improbable(hw_lock_to(&l
->interlock
, LockTimeOutTSC
) == 0)) {
454 boolean_t uslock_acquired
= FALSE
;
455 while (machine_timeout_suspended()) {
457 if ((uslock_acquired
= hw_lock_to(&l
->interlock
, LockTimeOutTSC
)))
461 if (uslock_acquired
== FALSE
) {
463 uintptr_t lowner
= (uintptr_t)l
->interlock
.lock_data
;
464 spinlock_timed_out
= l
;
465 lock_cpu
= spinlock_timeout_NMI(lowner
);
466 panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx, time: %llu",
467 l
, lowner
, current_thread(), lock_cpu
, (uintptr_t)l
->interlock
.lock_data
, mach_absolute_time());
470 #if DEVELOPMENT || DEBUG
474 USLDBG(usld_lock_post(l
, pc
));
476 simple_lock((simple_lock_t
)l
);
479 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE
, l
, 0);
485 * Release a usimple_lock.
487 * Returns with preemption enabled. Note
488 * that the hw_lock routines are responsible for
489 * maintaining preemption state.
495 #ifndef MACHINE_SIMPLE_LOCK
499 USLDBG(usld_unlock(l
, pc
));
500 #if DEVELOPMENT || DEBUG
503 hw_lock_unlock(&l
->interlock
);
505 simple_unlock_rwmb((simple_lock_t
)l
);
511 * Conditionally acquire a usimple_lock.
513 * On success, returns with preemption disabled.
514 * On failure, returns with preemption in the same state
515 * as when first invoked. Note that the hw_lock routines
516 * are responsible for maintaining preemption state.
518 * XXX No stats are gathered on a miss; I preserved this
519 * behavior from the original assembly-language code, but
520 * doesn't it make sense to log misses? XXX
526 #ifndef MACHINE_SIMPLE_LOCK
527 unsigned int success
;
531 USLDBG(usld_lock_try_pre(l
, pc
));
532 if ((success
= hw_lock_try(&l
->interlock
))) {
533 #if DEVELOPMENT || DEBUG
536 USLDBG(usld_lock_try_post(l
, pc
));
540 return(simple_lock_try((simple_lock_t
)l
));
545 * Acquire a usimple_lock while polling for pending TLB flushes
546 * and spinning on a lock.
550 usimple_lock_try_lock_loop(usimple_lock_t l
)
552 boolean_t istate
= ml_get_interrupts_enabled();
553 while (!simple_lock_try((l
))) {
555 handle_pending_TLB_flushes();
562 * States of a usimple_lock. The default when initializing
563 * a usimple_lock is setting it up for debug checking.
565 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
566 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
567 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
568 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
569 #define USLOCK_CHECKING(l) (uslock_check && \
570 ((l)->debug.state & USLOCK_CHECKED))
573 * Trace activities of a particularly interesting lock.
575 void usl_trace(usimple_lock_t
, int, pc_t
, const char *);
579 * Initialize the debugging information contained
585 __unused
unsigned short tag
)
587 if (l
== USIMPLE_LOCK_NULL
)
588 panic("lock initialization: null lock pointer");
589 l
->lock_type
= USLOCK_TAG
;
590 l
->debug
.state
= uslock_check
? USLOCK_INITIALIZED
: 0;
591 l
->debug
.lock_cpu
= l
->debug
.unlock_cpu
= 0;
592 l
->debug
.lock_pc
= l
->debug
.unlock_pc
= INVALID_PC
;
593 l
->debug
.lock_thread
= l
->debug
.unlock_thread
= INVALID_THREAD
;
594 l
->debug
.duration
[0] = l
->debug
.duration
[1] = 0;
595 l
->debug
.unlock_cpu
= l
->debug
.unlock_cpu
= 0;
596 l
->debug
.unlock_pc
= l
->debug
.unlock_pc
= INVALID_PC
;
597 l
->debug
.unlock_thread
= l
->debug
.unlock_thread
= INVALID_THREAD
;
602 * These checks apply to all usimple_locks, not just
603 * those with USLOCK_CHECKED turned on.
606 usld_lock_common_checks(
610 if (l
== USIMPLE_LOCK_NULL
)
611 panic("%s: null lock pointer", caller
);
612 if (l
->lock_type
!= USLOCK_TAG
)
613 panic("%s: %p is not a usimple lock, 0x%x", caller
, l
, l
->lock_type
);
614 if (!(l
->debug
.state
& USLOCK_INIT
))
615 panic("%s: %p is not an initialized lock, 0x%x", caller
, l
, l
->debug
.state
);
616 return USLOCK_CHECKING(l
);
621 * Debug checks on a usimple_lock just before attempting
630 char caller
[] = "usimple_lock";
633 if (!usld_lock_common_checks(l
, caller
))
637 * Note that we have a weird case where we are getting a lock when we are]
638 * in the process of putting the system to sleep. We are running with no
639 * current threads, therefore we can't tell if we are trying to retake a lock
640 * we have or someone on the other processor has it. Therefore we just
641 * ignore this test if the locking thread is 0.
644 if ((l
->debug
.state
& USLOCK_TAKEN
) && l
->debug
.lock_thread
&&
645 l
->debug
.lock_thread
== (void *) current_thread()) {
646 printf("%s: lock %p already locked (at %p) by",
647 caller
, l
, l
->debug
.lock_pc
);
648 printf(" current thread %p (new attempt at pc %p)\n",
649 l
->debug
.lock_thread
, pc
);
652 mp_disable_preemption();
653 usl_trace(l
, cpu_number(), pc
, caller
);
654 mp_enable_preemption();
659 * Debug checks on a usimple_lock just after acquiring it.
661 * Pre-emption has been disabled at this point,
662 * so we are safe in using cpu_number.
670 char caller
[] = "successful usimple_lock";
673 if (!usld_lock_common_checks(l
, caller
))
676 if (!((l
->debug
.state
& ~USLOCK_TAKEN
) == USLOCK_INITIALIZED
))
677 panic("%s: lock %p became uninitialized",
679 if ((l
->debug
.state
& USLOCK_TAKEN
))
680 panic("%s: lock 0x%p became TAKEN by someone else",
683 mycpu
= cpu_number();
684 l
->debug
.lock_thread
= (void *)current_thread();
685 l
->debug
.state
|= USLOCK_TAKEN
;
686 l
->debug
.lock_pc
= pc
;
687 l
->debug
.lock_cpu
= mycpu
;
689 usl_trace(l
, mycpu
, pc
, caller
);
694 * Debug checks on a usimple_lock just before
695 * releasing it. Note that the caller has not
696 * yet released the hardware lock.
698 * Preemption is still disabled, so there's
699 * no problem using cpu_number.
707 char caller
[] = "usimple_unlock";
710 if (!usld_lock_common_checks(l
, caller
))
713 mycpu
= cpu_number();
715 if (!(l
->debug
.state
& USLOCK_TAKEN
))
716 panic("%s: lock 0x%p hasn't been taken",
718 if (l
->debug
.lock_thread
!= (void *) current_thread())
719 panic("%s: unlocking lock 0x%p, owned by thread %p",
720 caller
, l
, l
->debug
.lock_thread
);
721 if (l
->debug
.lock_cpu
!= mycpu
) {
722 printf("%s: unlocking lock 0x%p on cpu 0x%x",
724 printf(" (acquired on cpu 0x%x)\n", l
->debug
.lock_cpu
);
727 usl_trace(l
, mycpu
, pc
, caller
);
729 l
->debug
.unlock_thread
= l
->debug
.lock_thread
;
730 l
->debug
.lock_thread
= INVALID_PC
;
731 l
->debug
.state
&= ~USLOCK_TAKEN
;
732 l
->debug
.unlock_pc
= pc
;
733 l
->debug
.unlock_cpu
= mycpu
;
738 * Debug checks on a usimple_lock just before
739 * attempting to acquire it.
741 * Preemption isn't guaranteed to be disabled.
748 char caller
[] = "usimple_lock_try";
750 if (!usld_lock_common_checks(l
, caller
))
752 mp_disable_preemption();
753 usl_trace(l
, cpu_number(), pc
, caller
);
754 mp_enable_preemption();
759 * Debug checks on a usimple_lock just after
760 * successfully attempting to acquire it.
762 * Preemption has been disabled by the
763 * lock acquisition attempt, so it's safe
772 char caller
[] = "successful usimple_lock_try";
774 if (!usld_lock_common_checks(l
, caller
))
777 if (!((l
->debug
.state
& ~USLOCK_TAKEN
) == USLOCK_INITIALIZED
))
778 panic("%s: lock 0x%p became uninitialized",
780 if ((l
->debug
.state
& USLOCK_TAKEN
))
781 panic("%s: lock 0x%p became TAKEN by someone else",
784 mycpu
= cpu_number();
785 l
->debug
.lock_thread
= (void *) current_thread();
786 l
->debug
.state
|= USLOCK_TAKEN
;
787 l
->debug
.lock_pc
= pc
;
788 l
->debug
.lock_cpu
= mycpu
;
790 usl_trace(l
, mycpu
, pc
, caller
);
795 * For very special cases, set traced_lock to point to a
796 * specific lock of interest. The result is a series of
797 * XPRs showing lock operations on that lock. The lock_seq
798 * value is used to show the order of those operations.
800 usimple_lock_t traced_lock
;
801 unsigned int lock_seq
;
808 const char * op_name
)
810 if (traced_lock
== l
) {
812 "seq %d, cpu %d, %s @ %x\n",
813 (uintptr_t) lock_seq
, (uintptr_t) mycpu
,
814 (uintptr_t) op_name
, (uintptr_t) pc
, 0);
820 #endif /* USLOCK_DEBUG */
823 * Routine: lck_rw_alloc_init
831 if ((lck
= (lck_rw_t
*)kalloc(sizeof(lck_rw_t
))) != 0) {
832 bzero(lck
, sizeof(lck_rw_t
));
833 lck_rw_init(lck
, grp
, attr
);
840 * Routine: lck_rw_free
846 lck_rw_destroy(lck
, grp
);
847 kfree(lck
, sizeof(lck_rw_t
));
851 * Routine: lck_rw_init
859 lck_attr_t
*lck_attr
= (attr
!= LCK_ATTR_NULL
) ?
860 attr
: &LockDefaultLckAttr
;
862 hw_lock_byte_init(&lck
->lck_rw_interlock
);
863 lck
->lck_rw_want_write
= FALSE
;
864 lck
->lck_rw_want_upgrade
= FALSE
;
865 lck
->lck_rw_shared_count
= 0;
866 lck
->lck_rw_can_sleep
= TRUE
;
867 lck
->lck_r_waiting
= lck
->lck_w_waiting
= 0;
869 lck
->lck_rw_priv_excl
= ((lck_attr
->lck_attr_val
&
870 LCK_ATTR_RW_SHARED_PRIORITY
) == 0);
872 lck_grp_reference(grp
);
873 lck_grp_lckcnt_incr(grp
, LCK_TYPE_RW
);
877 * Routine: lck_rw_destroy
884 if (lck
->lck_rw_tag
== LCK_RW_TAG_DESTROYED
)
887 lck_rw_assert(lck
, LCK_RW_ASSERT_NOTHELD
);
889 lck
->lck_rw_tag
= LCK_RW_TAG_DESTROYED
;
890 lck_grp_lckcnt_decr(grp
, LCK_TYPE_RW
);
891 lck_grp_deallocate(grp
);
896 * Sleep locks. These use the same data structure and algorithm
897 * as the spin locks, but the process sleeps while it is waiting
898 * for the lock. These work on uniprocessor systems.
901 #define DECREMENTER_TIMEOUT 1000000
904 * We disable interrupts while holding the RW interlock to prevent an
905 * interrupt from exacerbating hold time.
906 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
908 static inline boolean_t
909 lck_interlock_lock(lck_rw_t
*lck
)
913 istate
= ml_set_interrupts_enabled(FALSE
);
914 hw_lock_byte_lock(&lck
->lck_rw_interlock
);
919 lck_interlock_unlock(lck_rw_t
*lck
, boolean_t istate
)
921 hw_lock_byte_unlock(&lck
->lck_rw_interlock
);
922 ml_set_interrupts_enabled(istate
);
926 * This inline is used when busy-waiting for an rw lock.
927 * If interrupts were disabled when the lock primitive was called,
928 * we poll the IPI handler for pending tlb flushes.
929 * XXX This is a hack to avoid deadlocking on the pmap_system_lock.
932 lck_rw_lock_pause(boolean_t interrupts_enabled
)
934 if (!interrupts_enabled
)
935 handle_pending_TLB_flushes();
939 static inline boolean_t
940 lck_rw_held_read_or_upgrade(lck_rw_t
*lock
)
942 if (ordered_load(&lock
->data
) & (LCK_RW_SHARED_MASK
| LCK_RW_INTERLOCK
| LCK_RW_WANT_UPGRADE
))
948 * compute the deadline to spin against when
949 * waiting for a change of state on a lck_rw_t
951 static inline uint64_t
952 lck_rw_deadline_for_spin(lck_rw_t
*lck
)
954 if (lck
->lck_rw_can_sleep
) {
955 if (lck
->lck_r_waiting
|| lck
->lck_w_waiting
|| lck
->lck_rw_shared_count
> machine_info
.max_cpus
) {
957 * there are already threads waiting on this lock... this
958 * implies that they have spun beyond their deadlines waiting for
959 * the desired state to show up so we will not bother spinning at this time...
961 * the current number of threads sharing this lock exceeds our capacity to run them
962 * concurrently and since all states we're going to spin for require the rw_shared_count
963 * to be at 0, we'll not bother spinning since the latency for this to happen is
966 return (mach_absolute_time());
968 return (mach_absolute_time() + MutexSpin
);
970 return (mach_absolute_time() + (100000LL * 1000000000LL));
975 * Spin while interlock is held.
979 lck_rw_interlock_spin(lck_rw_t
*lock
)
981 while (ordered_load(&lock
->data
) & LCK_RW_INTERLOCK
) {
987 lck_rw_grab_want(lck_rw_t
*lock
)
992 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_relaxed
);
993 if ((data
& LCK_RW_INTERLOCK
) == 0)
995 atomic_exchange_abort();
996 lck_rw_interlock_spin(lock
);
998 if (data
& LCK_RW_WANT_WRITE
) {
999 atomic_exchange_abort();
1002 data
|= LCK_RW_WANT_WRITE
;
1003 return atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_relaxed
);
1007 lck_rw_grab_shared(lck_rw_t
*lock
)
1009 uint32_t data
, prev
;
1012 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1013 if ((data
& LCK_RW_INTERLOCK
) == 0)
1015 atomic_exchange_abort();
1016 lck_rw_interlock_spin(lock
);
1018 if (data
& (LCK_RW_WANT_WRITE
| LCK_RW_WANT_UPGRADE
)) {
1019 if (((data
& LCK_RW_SHARED_MASK
) == 0) || (data
& LCK_RW_PRIV_EXCL
)) {
1020 atomic_exchange_abort();
1024 data
+= LCK_RW_SHARED_READER
;
1025 return atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
);
1029 * Routine: lck_rw_lock_exclusive
1032 lck_rw_lock_exclusive_gen(
1035 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(lck
);
1036 uint64_t deadline
= 0;
1040 wait_result_t res
= 0;
1041 boolean_t istate
= -1;
1044 boolean_t dtrace_ls_initialized
= FALSE
;
1045 boolean_t dtrace_rwl_excl_spin
, dtrace_rwl_excl_block
, dtrace_ls_enabled
= FALSE
;
1046 uint64_t wait_interval
= 0;
1047 int readers_at_sleep
= 0;
1051 * Try to acquire the lck_rw_want_write bit.
1053 while ( !lck_rw_grab_want(lck
)) {
1056 if (dtrace_ls_initialized
== FALSE
) {
1057 dtrace_ls_initialized
= TRUE
;
1058 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
1059 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
1060 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
1061 if (dtrace_ls_enabled
) {
1063 * Either sleeping or spinning is happening,
1064 * start a timing of our delay interval now.
1066 readers_at_sleep
= lck
->lck_rw_shared_count
;
1067 wait_interval
= mach_absolute_time();
1072 istate
= ml_get_interrupts_enabled();
1074 deadline
= lck_rw_deadline_for_spin(lck
);
1076 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1078 while (((gotlock
= lck_rw_grab_want(lck
)) == 0) && mach_absolute_time() < deadline
)
1079 lck_rw_lock_pause(istate
);
1081 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_END
, trace_lck
, 0, 0, gotlock
, 0);
1086 * if we get here, the deadline has expired w/o us
1087 * being able to grab the lock exclusively
1088 * check to see if we're allowed to do a thread_block
1090 if (lck
->lck_rw_can_sleep
) {
1092 istate
= lck_interlock_lock(lck
);
1094 if (lck
->lck_rw_want_write
) {
1096 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1098 lck
->lck_w_waiting
= TRUE
;
1100 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite
);
1101 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
),
1102 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1103 lck_interlock_unlock(lck
, istate
);
1105 if (res
== THREAD_WAITING
) {
1106 res
= thread_block(THREAD_CONTINUE_NULL
);
1109 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_END
, trace_lck
, res
, slept
, 0, 0);
1111 lck
->lck_rw_want_write
= TRUE
;
1112 lck_interlock_unlock(lck
, istate
);
1118 * Wait for readers (and upgrades) to finish...
1119 * the test for these conditions must be done simultaneously with
1120 * a check of the interlock not being held since
1121 * the rw_shared_count will drop to 0 first and then want_upgrade
1122 * will be set to 1 in the shared_to_exclusive scenario... those
1123 * adjustments are done behind the interlock and represent an
1124 * atomic change in state and must be considered as such
1125 * however, once we see the read count at 0, the want_upgrade not set
1126 * and the interlock not held, we are safe to proceed
1128 while (lck_rw_held_read_or_upgrade(lck
)) {
1132 * Either sleeping or spinning is happening, start
1133 * a timing of our delay interval now. If we set it
1134 * to -1 we don't have accurate data so we cannot later
1135 * decide to record a dtrace spin or sleep event.
1137 if (dtrace_ls_initialized
== FALSE
) {
1138 dtrace_ls_initialized
= TRUE
;
1139 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
1140 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
1141 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
1142 if (dtrace_ls_enabled
) {
1144 * Either sleeping or spinning is happening,
1145 * start a timing of our delay interval now.
1147 readers_at_sleep
= lck
->lck_rw_shared_count
;
1148 wait_interval
= mach_absolute_time();
1153 istate
= ml_get_interrupts_enabled();
1155 deadline
= lck_rw_deadline_for_spin(lck
);
1157 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1159 while ((lockheld
= lck_rw_held_read_or_upgrade(lck
)) && mach_absolute_time() < deadline
)
1160 lck_rw_lock_pause(istate
);
1162 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_END
, trace_lck
, 0, 0, lockheld
, 0);
1167 * if we get here, the deadline has expired w/o us
1168 * being able to grab the lock exclusively
1169 * check to see if we're allowed to do a thread_block
1171 if (lck
->lck_rw_can_sleep
) {
1173 istate
= lck_interlock_lock(lck
);
1175 if (lck
->lck_rw_shared_count
!= 0 || lck
->lck_rw_want_upgrade
) {
1176 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1178 lck
->lck_w_waiting
= TRUE
;
1180 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite
);
1181 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
),
1182 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1183 lck_interlock_unlock(lck
, istate
);
1185 if (res
== THREAD_WAITING
) {
1186 res
= thread_block(THREAD_CONTINUE_NULL
);
1189 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_END
, trace_lck
, res
, slept
, 0, 0);
1191 lck_interlock_unlock(lck
, istate
);
1193 * must own the lock now, since we checked for
1194 * readers or upgrade owner behind the interlock
1195 * no need for a call to 'lck_rw_held_read_or_upgrade'
1204 * Decide what latencies we suffered that are Dtrace events.
1205 * If we have set wait_interval, then we either spun or slept.
1206 * At least we get out from under the interlock before we record
1207 * which is the best we can do here to minimize the impact
1209 * If we have set wait_interval to -1, then dtrace was not enabled when we
1210 * started sleeping/spinning so we don't record this event.
1212 if (dtrace_ls_enabled
== TRUE
) {
1214 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN
, lck
,
1215 mach_absolute_time() - wait_interval
, 1);
1218 * For the blocking case, we also record if when we blocked
1219 * it was held for read or write, and how many readers.
1220 * Notice that above we recorded this before we dropped
1221 * the interlock so the count is accurate.
1223 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK
, lck
,
1224 mach_absolute_time() - wait_interval
, 1,
1225 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1228 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE
, lck
, 1);
1233 * Routine: lck_rw_done
1236 lck_rw_type_t
lck_rw_done(lck_rw_t
*lock
)
1238 uint32_t data
, prev
;
1241 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_release_smp
);
1242 if (data
& LCK_RW_INTERLOCK
) { /* wait for interlock to clear */
1243 atomic_exchange_abort();
1244 lck_rw_interlock_spin(lock
);
1247 if (data
& LCK_RW_SHARED_MASK
) {
1248 data
-= LCK_RW_SHARED_READER
;
1249 if ((data
& LCK_RW_SHARED_MASK
) == 0) /* if reader count has now gone to 0, check for waiters */
1251 } else { /* if reader count == 0, must be exclusive lock */
1252 if (data
& LCK_RW_WANT_UPGRADE
) {
1253 data
&= ~(LCK_RW_WANT_UPGRADE
);
1255 if (data
& LCK_RW_WANT_WRITE
)
1256 data
&= ~(LCK_RW_WANT_EXCL
);
1257 else /* lock is not 'owned', panic */
1258 panic("Releasing non-exclusive RW lock without a reader refcount!");
1261 if (prev
& LCK_RW_W_WAITING
) {
1262 data
&= ~(LCK_RW_W_WAITING
);
1263 if ((prev
& LCK_RW_PRIV_EXCL
) == 0)
1264 data
&= ~(LCK_RW_R_WAITING
);
1266 data
&= ~(LCK_RW_R_WAITING
);
1268 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_release_smp
))
1272 return lck_rw_done_gen(lock
, prev
);
1276 * Routine: lck_rw_done_gen
1278 * called from lck_rw_done()
1279 * prior_lock_state is the value in the 1st
1280 * word of the lock at the time of a successful
1281 * atomic compare and exchange with the new value...
1282 * it represents the state of the lock before we
1283 * decremented the rw_shared_count or cleared either
1284 * rw_want_upgrade or rw_want_write and
1285 * the lck_x_waiting bits... since the wrapper
1286 * routine has already changed the state atomically,
1287 * we just need to decide if we should
1288 * wake up anyone and what value to return... we do
1289 * this by examining the state of the lock before
1292 static lck_rw_type_t
1295 uint32_t prior_lock_state
)
1298 lck_rw_type_t lock_type
;
1300 uint32_t rwlock_count
;
1303 * prior_lock state is a snapshot of the 1st word of the
1304 * lock in question... we'll fake up a pointer to it
1305 * and carefully not access anything beyond whats defined
1306 * in the first word of a lck_rw_t
1308 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1310 if (fake_lck
->lck_rw_shared_count
<= 1) {
1311 if (fake_lck
->lck_w_waiting
)
1312 thread_wakeup(RW_LOCK_WRITER_EVENT(lck
));
1314 if (!(fake_lck
->lck_rw_priv_excl
&& fake_lck
->lck_w_waiting
) && fake_lck
->lck_r_waiting
)
1315 thread_wakeup(RW_LOCK_READER_EVENT(lck
));
1317 if (fake_lck
->lck_rw_shared_count
)
1318 lock_type
= LCK_RW_TYPE_SHARED
;
1320 lock_type
= LCK_RW_TYPE_EXCLUSIVE
;
1322 /* Check if dropping the lock means that we need to unpromote */
1323 thread
= current_thread();
1324 rwlock_count
= thread
->rwlock_count
--;
1326 if (rwlock_count
== 0) {
1327 panic("rw lock count underflow for thread %p", thread
);
1330 if ((rwlock_count
== 1 /* field now 0 */) && (thread
->sched_flags
& TH_SFLAG_RW_PROMOTED
)) {
1331 /* sched_flags checked without lock, but will be rechecked while clearing */
1332 lck_rw_clear_promotion(thread
, unslide_for_kdebug(lck
));
1336 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE
, lck
, lock_type
== LCK_RW_TYPE_SHARED
? 0 : 1);
1344 * Routine: lck_rw_unlock
1349 lck_rw_type_t lck_rw_type
)
1351 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1352 lck_rw_unlock_shared(lck
);
1353 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1354 lck_rw_unlock_exclusive(lck
);
1356 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type
);
1361 * Routine: lck_rw_unlock_shared
1364 lck_rw_unlock_shared(
1369 assertf(lck
->lck_rw_shared_count
> 0, "lck %p has shared_count=0x%x", lck
, lck
->lck_rw_shared_count
);
1370 ret
= lck_rw_done(lck
);
1372 if (ret
!= LCK_RW_TYPE_SHARED
)
1373 panic("lck_rw_unlock_shared(): lock %p held in mode: %d\n", lck
, ret
);
1378 * Routine: lck_rw_unlock_exclusive
1381 lck_rw_unlock_exclusive(
1386 ret
= lck_rw_done(lck
);
1388 if (ret
!= LCK_RW_TYPE_EXCLUSIVE
)
1389 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret
);
1394 * Routine: lck_rw_lock
1399 lck_rw_type_t lck_rw_type
)
1401 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1402 lck_rw_lock_shared(lck
);
1403 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1404 lck_rw_lock_exclusive(lck
);
1406 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type
);
1410 * Routine: lck_rw_lock_shared
1413 lck_rw_lock_shared(lck_rw_t
*lock
)
1415 uint32_t data
, prev
;
1417 current_thread()->rwlock_count
++;
1419 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1420 if (data
& (LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
| LCK_RW_INTERLOCK
)) {
1421 atomic_exchange_abort();
1422 lck_rw_lock_shared_gen(lock
);
1425 data
+= LCK_RW_SHARED_READER
;
1426 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
))
1431 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE
, lock
, DTRACE_RW_SHARED
);
1432 #endif /* CONFIG_DTRACE */
1437 * Routine: lck_rw_lock_shared_gen
1439 * assembly fast path code has determined that this lock
1440 * is held exclusively... this is where we spin/block
1441 * until we can acquire the lock in the shared mode
1444 lck_rw_lock_shared_gen(
1447 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(lck
);
1448 uint64_t deadline
= 0;
1451 wait_result_t res
= 0;
1452 boolean_t istate
= -1;
1455 uint64_t wait_interval
= 0;
1456 int readers_at_sleep
= 0;
1457 boolean_t dtrace_ls_initialized
= FALSE
;
1458 boolean_t dtrace_rwl_shared_spin
, dtrace_rwl_shared_block
, dtrace_ls_enabled
= FALSE
;
1461 while ( !lck_rw_grab_shared(lck
)) {
1464 if (dtrace_ls_initialized
== FALSE
) {
1465 dtrace_ls_initialized
= TRUE
;
1466 dtrace_rwl_shared_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_SPIN
] != 0);
1467 dtrace_rwl_shared_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_BLOCK
] != 0);
1468 dtrace_ls_enabled
= dtrace_rwl_shared_spin
|| dtrace_rwl_shared_block
;
1469 if (dtrace_ls_enabled
) {
1471 * Either sleeping or spinning is happening,
1472 * start a timing of our delay interval now.
1474 readers_at_sleep
= lck
->lck_rw_shared_count
;
1475 wait_interval
= mach_absolute_time();
1480 istate
= ml_get_interrupts_enabled();
1482 deadline
= lck_rw_deadline_for_spin(lck
);
1484 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_START
,
1485 trace_lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, 0, 0);
1487 while (((gotlock
= lck_rw_grab_shared(lck
)) == 0) && mach_absolute_time() < deadline
)
1488 lck_rw_lock_pause(istate
);
1490 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_END
,
1491 trace_lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, gotlock
, 0);
1496 * if we get here, the deadline has expired w/o us
1497 * being able to grab the lock for read
1498 * check to see if we're allowed to do a thread_block
1500 if (lck
->lck_rw_can_sleep
) {
1502 istate
= lck_interlock_lock(lck
);
1504 if ((lck
->lck_rw_want_write
|| lck
->lck_rw_want_upgrade
) &&
1505 ((lck
->lck_rw_shared_count
== 0) || lck
->lck_rw_priv_excl
)) {
1507 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_START
,
1508 trace_lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, 0, 0);
1510 lck
->lck_r_waiting
= TRUE
;
1512 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead
);
1513 res
= assert_wait(RW_LOCK_READER_EVENT(lck
),
1514 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1515 lck_interlock_unlock(lck
, istate
);
1517 if (res
== THREAD_WAITING
) {
1518 res
= thread_block(THREAD_CONTINUE_NULL
);
1521 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_END
,
1522 trace_lck
, res
, slept
, 0, 0);
1524 lck
->lck_rw_shared_count
++;
1525 lck_interlock_unlock(lck
, istate
);
1532 if (dtrace_ls_enabled
== TRUE
) {
1534 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN
, lck
, mach_absolute_time() - wait_interval
, 0);
1536 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK
, lck
,
1537 mach_absolute_time() - wait_interval
, 0,
1538 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1541 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE
, lck
, 0);
1547 * Routine: lck_rw_lock_exclusive
1551 lck_rw_lock_exclusive(lck_rw_t
*lock
)
1553 current_thread()->rwlock_count
++;
1554 if (atomic_test_and_set32(&lock
->data
,
1555 (LCK_RW_SHARED_MASK
| LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
| LCK_RW_INTERLOCK
),
1556 LCK_RW_WANT_EXCL
, memory_order_acquire_smp
, FALSE
)) {
1558 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE
, lock
, DTRACE_RW_EXCL
);
1559 #endif /* CONFIG_DTRACE */
1561 lck_rw_lock_exclusive_gen(lock
);
1566 * Routine: lck_rw_lock_shared_to_exclusive
1570 lck_rw_lock_shared_to_exclusive(lck_rw_t
*lock
)
1572 uint32_t data
, prev
;
1575 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1576 if (data
& LCK_RW_INTERLOCK
) {
1577 atomic_exchange_abort();
1578 lck_rw_interlock_spin(lock
);
1581 if (data
& LCK_RW_WANT_UPGRADE
) {
1582 data
-= LCK_RW_SHARED_READER
;
1583 if ((data
& LCK_RW_SHARED_MASK
) == 0) /* we were the last reader */
1584 data
&= ~(LCK_RW_W_WAITING
); /* so clear the wait indicator */
1585 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
))
1586 return lck_rw_lock_shared_to_exclusive_failure(lock
, prev
);
1588 data
|= LCK_RW_WANT_UPGRADE
; /* ask for WANT_UPGRADE */
1589 data
-= LCK_RW_SHARED_READER
; /* and shed our read count */
1590 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
))
1595 /* we now own the WANT_UPGRADE */
1596 if (data
& LCK_RW_SHARED_MASK
) /* check to see if all of the readers are drained */
1597 lck_rw_lock_shared_to_exclusive_success(lock
); /* if not, we need to go wait */
1599 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
, lock
, 0);
1606 * Routine: lck_rw_lock_shared_to_exclusive_failure
1608 * assembly fast path code has already dropped our read
1609 * count and determined that someone else owns 'lck_rw_want_upgrade'
1610 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1611 * all we need to do here is determine if a wakeup is needed
1614 lck_rw_lock_shared_to_exclusive_failure(
1616 uint32_t prior_lock_state
)
1619 thread_t thread
= current_thread();
1620 uint32_t rwlock_count
;
1622 /* Check if dropping the lock means that we need to unpromote */
1623 rwlock_count
= thread
->rwlock_count
--;
1625 if (rwlock_count
== 0) {
1626 panic("rw lock count underflow for thread %p", thread
);
1629 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1631 if (fake_lck
->lck_w_waiting
&& fake_lck
->lck_rw_shared_count
== 1) {
1633 * Someone else has requested upgrade.
1634 * Since we've released the read lock, wake
1635 * him up if he's blocked waiting
1637 thread_wakeup(RW_LOCK_WRITER_EVENT(lck
));
1640 if ((rwlock_count
== 1 /* field now 0 */) && (thread
->sched_flags
& TH_SFLAG_RW_PROMOTED
)) {
1641 /* sched_flags checked without lock, but will be rechecked while clearing */
1642 lck_rw_clear_promotion(thread
, unslide_for_kdebug(lck
));
1645 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_CODE
) | DBG_FUNC_NONE
,
1646 VM_KERNEL_UNSLIDE_OR_PERM(lck
), lck
->lck_rw_shared_count
, lck
->lck_rw_want_upgrade
, 0, 0);
1653 * Routine: lck_rw_lock_shared_to_exclusive_failure
1655 * assembly fast path code has already dropped our read
1656 * count and successfully acquired 'lck_rw_want_upgrade'
1657 * we just need to wait for the rest of the readers to drain
1658 * and then we can return as the exclusive holder of this lock
1661 lck_rw_lock_shared_to_exclusive_success(
1664 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(lck
);
1665 uint64_t deadline
= 0;
1667 int still_shared
= 0;
1669 boolean_t istate
= -1;
1672 uint64_t wait_interval
= 0;
1673 int readers_at_sleep
= 0;
1674 boolean_t dtrace_ls_initialized
= FALSE
;
1675 boolean_t dtrace_rwl_shared_to_excl_spin
, dtrace_rwl_shared_to_excl_block
, dtrace_ls_enabled
= FALSE
;
1678 while (lck
->lck_rw_shared_count
!= 0) {
1681 if (dtrace_ls_initialized
== FALSE
) {
1682 dtrace_ls_initialized
= TRUE
;
1683 dtrace_rwl_shared_to_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
] != 0);
1684 dtrace_rwl_shared_to_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
] != 0);
1685 dtrace_ls_enabled
= dtrace_rwl_shared_to_excl_spin
|| dtrace_rwl_shared_to_excl_block
;
1686 if (dtrace_ls_enabled
) {
1688 * Either sleeping or spinning is happening,
1689 * start a timing of our delay interval now.
1691 readers_at_sleep
= lck
->lck_rw_shared_count
;
1692 wait_interval
= mach_absolute_time();
1697 istate
= ml_get_interrupts_enabled();
1699 deadline
= lck_rw_deadline_for_spin(lck
);
1701 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_START
,
1702 trace_lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1704 while ((still_shared
= lck
->lck_rw_shared_count
) && mach_absolute_time() < deadline
)
1705 lck_rw_lock_pause(istate
);
1707 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_END
,
1708 trace_lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1713 * if we get here, the deadline has expired w/o
1714 * the rw_shared_count having drained to 0
1715 * check to see if we're allowed to do a thread_block
1717 if (lck
->lck_rw_can_sleep
) {
1719 istate
= lck_interlock_lock(lck
);
1721 if (lck
->lck_rw_shared_count
!= 0) {
1722 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_START
,
1723 trace_lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1725 lck
->lck_w_waiting
= TRUE
;
1727 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade
);
1728 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
),
1729 THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
1730 lck_interlock_unlock(lck
, istate
);
1732 if (res
== THREAD_WAITING
) {
1733 res
= thread_block(THREAD_CONTINUE_NULL
);
1736 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_END
,
1737 trace_lck
, res
, slept
, 0, 0);
1739 lck_interlock_unlock(lck
, istate
);
1746 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1748 if (dtrace_ls_enabled
== TRUE
) {
1750 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
, lck
, mach_absolute_time() - wait_interval
, 0);
1752 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
, lck
,
1753 mach_absolute_time() - wait_interval
, 1,
1754 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1757 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
, lck
, 1);
1763 * Routine: lck_rw_lock_exclusive_to_shared
1766 void lck_rw_lock_exclusive_to_shared(lck_rw_t
*lock
)
1768 uint32_t data
, prev
;
1771 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_release_smp
);
1772 if (data
& LCK_RW_INTERLOCK
) {
1773 atomic_exchange_abort();
1774 lck_rw_interlock_spin(lock
); /* wait for interlock to clear */
1777 data
+= LCK_RW_SHARED_READER
;
1778 if (data
& LCK_RW_WANT_UPGRADE
)
1779 data
&= ~(LCK_RW_WANT_UPGRADE
);
1781 data
&= ~(LCK_RW_WANT_EXCL
);
1782 if (!((prev
& LCK_RW_W_WAITING
) && (prev
& LCK_RW_PRIV_EXCL
)))
1783 data
&= ~(LCK_RW_W_WAITING
);
1784 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_release_smp
))
1788 return lck_rw_lock_exclusive_to_shared_gen(lock
, prev
);
1793 * Routine: lck_rw_lock_exclusive_to_shared_gen
1795 * assembly fast path has already dropped
1796 * our exclusive state and bumped lck_rw_shared_count
1797 * all we need to do here is determine if anyone
1798 * needs to be awakened.
1801 lck_rw_lock_exclusive_to_shared_gen(
1803 uint32_t prior_lock_state
)
1805 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(lck
);
1808 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1810 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_START
,
1811 trace_lck
, fake_lck
->lck_rw_want_write
, fake_lck
->lck_rw_want_upgrade
, 0, 0);
1814 * don't wake up anyone waiting to take the lock exclusively
1815 * since we hold a read count... when the read count drops to 0,
1816 * the writers will be woken.
1818 * wake up any waiting readers if we don't have any writers waiting,
1819 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1821 if (!(fake_lck
->lck_rw_priv_excl
&& fake_lck
->lck_w_waiting
) && fake_lck
->lck_r_waiting
)
1822 thread_wakeup(RW_LOCK_READER_EVENT(lck
));
1824 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_END
,
1825 trace_lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, lck
->lck_rw_shared_count
, 0);
1828 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE
, lck
, 0);
1834 * Routine: lck_rw_try_lock
1839 lck_rw_type_t lck_rw_type
)
1841 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1842 return(lck_rw_try_lock_shared(lck
));
1843 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1844 return(lck_rw_try_lock_exclusive(lck
));
1846 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type
);
1851 * Routine: lck_rw_try_lock_shared
1854 boolean_t
lck_rw_try_lock_shared(lck_rw_t
*lock
)
1856 uint32_t data
, prev
;
1859 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1860 if (data
& LCK_RW_INTERLOCK
) {
1861 atomic_exchange_abort();
1862 lck_rw_interlock_spin(lock
);
1865 if (data
& (LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
)) {
1866 atomic_exchange_abort();
1867 return FALSE
; /* lock is busy */
1869 data
+= LCK_RW_SHARED_READER
; /* Increment reader refcount */
1870 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
))
1874 current_thread()->rwlock_count
++;
1875 /* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
1877 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE
, lock
, DTRACE_RW_SHARED
);
1878 #endif /* CONFIG_DTRACE */
1884 * Routine: lck_rw_try_lock_exclusive
1887 boolean_t
lck_rw_try_lock_exclusive(lck_rw_t
*lock
)
1889 uint32_t data
, prev
;
1892 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1893 if (data
& LCK_RW_INTERLOCK
) {
1894 atomic_exchange_abort();
1895 lck_rw_interlock_spin(lock
);
1898 if (data
& (LCK_RW_SHARED_MASK
| LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
)) {
1899 atomic_exchange_abort();
1900 return FALSE
; /* can't get it */
1902 data
|= LCK_RW_WANT_EXCL
;
1903 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
))
1908 current_thread()->rwlock_count
++;
1910 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE
, lock
, DTRACE_RW_EXCL
);
1911 #endif /* CONFIG_DTRACE */
1922 case LCK_RW_ASSERT_SHARED
:
1923 if (lck
->lck_rw_shared_count
!= 0) {
1927 case LCK_RW_ASSERT_EXCLUSIVE
:
1928 if ((lck
->lck_rw_want_write
||
1929 lck
->lck_rw_want_upgrade
) &&
1930 lck
->lck_rw_shared_count
== 0) {
1934 case LCK_RW_ASSERT_HELD
:
1935 if (lck
->lck_rw_want_write
||
1936 lck
->lck_rw_want_upgrade
||
1937 lck
->lck_rw_shared_count
!= 0) {
1941 case LCK_RW_ASSERT_NOTHELD
:
1942 if (!(lck
->lck_rw_want_write
||
1943 lck
->lck_rw_want_upgrade
||
1944 lck
->lck_rw_shared_count
!= 0)) {
1952 panic("rw lock (%p)%s held (mode=%u), first word %08x\n", lck
, (type
== LCK_RW_ASSERT_NOTHELD
? "" : " not"), type
, *(uint32_t *)lck
);
1955 /* On return to userspace, this routine is called if the rwlock_count is somehow imbalanced */
1957 lck_rw_clear_promotions_x86(thread_t thread
)
1960 /* It's fatal to leave a RW lock locked and return to userspace */
1961 panic("%u rw lock(s) held on return to userspace for thread %p", thread
->rwlock_count
, thread
);
1963 /* Paper over the issue */
1964 thread
->rwlock_count
= 0;
1965 lck_rw_clear_promotion(thread
, 0);
1970 lck_rw_lock_yield_shared(lck_rw_t
*lck
, boolean_t force_yield
)
1972 lck_rw_assert(lck
, LCK_RW_ASSERT_SHARED
);
1974 if (lck
->lck_rw_want_write
|| lck
->lck_rw_want_upgrade
|| force_yield
) {
1975 lck_rw_unlock_shared(lck
);
1977 lck_rw_lock_shared(lck
);
1985 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
1986 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
1989 kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t
*lck
) {
1991 panic("panic: rw lock exclusive check done outside of kernel debugger");
1993 return ((lck
->lck_rw_want_upgrade
|| lck
->lck_rw_want_write
) && (lck
->lck_rw_shared_count
== 0)) ? TRUE
: FALSE
;
1997 * Slow path routines for lck_mtx locking and unlocking functions.
1999 * These functions were previously implemented in x86 assembly,
2000 * and some optimizations are in place in this c code to obtain a compiled code
2001 * as performant and compact as the assembly version.
2003 * To avoid to inline these functions on the fast path, all functions directly called by
2004 * the fast paths have the __attribute__((noinline)) specified. Also they are all implemented
2005 * in such a way the fast path can tail call into them. In this way the return address
2006 * does not need to be pushed on the caller stack and stack optimization can happen on the caller.
2008 * Slow path code is structured in such a way there are no calls to functions that will return
2009 * on the context of the caller function, i.e. all functions called are or tail call functions
2010 * or inline functions. The number of arguments of the tail call functions are less then six,
2011 * so that they can be passed over registers and do not need to be pushed on stack.
2012 * This allows the compiler to not create a stack frame for the functions.
2014 * __improbable and __probable are used to compile the slow path code in such a way
2015 * the fast path case will be on a sequence of instructions with as less jumps as possible,
2016 * to make this case the most optimized even if falling through the slow path.
2020 * Intel lock invariants:
2022 * lck_mtx_waiters: contains the count of threads currently in the mutex waitqueue
2023 * lck_mtx_pri: contains the max priority of all waiters during a contention period
2024 * not cleared on last unlock, but stomped over on next first contention
2025 * lck_mtx_promoted: set when the current lock owner has been promoted
2026 * cleared when lock owner unlocks, set on acquire or wait.
2028 * The lock owner is promoted to the max priority of all its waiters only if it
2029 * was a lower priority when it acquired or was an owner when a waiter waited.
2030 * Max priority is capped at MAXPRI_PROMOTE.
2032 * The last waiter will not be promoted as it is woken up, but the last
2033 * lock owner may not have been the last thread to have been woken up depending on the
2034 * luck of the draw. Therefore a last-owner may still have the promoted-on-wakeup
2037 * TODO: Figure out an algorithm for stopping a lock holder which is already at the right
2038 * priority from dropping priority in the future without having to take thread lock
2043 extern zone_t lck_mtx_zone
;
2047 * N.B.: On x86, statistics are currently recorded for all indirect mutexes.
2048 * Also, only the acquire attempt count (GRP_MTX_STAT_UTIL) is maintained
2049 * as a 64-bit quantity (the new x86 specific statistics are also maintained
2050 * as 32-bit quantities).
2053 * Enable this preprocessor define to record the first miss alone
2054 * By default, we count every miss, hence multiple misses may be
2055 * recorded for a single lock acquire attempt via lck_mtx_lock
2057 #undef LOG_FIRST_MISS_ALONE
2060 * This preprocessor define controls whether the R-M-W update of the
2061 * per-group statistics elements are atomic (LOCK-prefixed)
2062 * Enabled by default.
2064 #define ATOMIC_STAT_UPDATES 1
2068 * Routine: lck_mtx_alloc_init
2077 if ((lck
= (lck_mtx_t
*)zalloc(lck_mtx_zone
)) != 0)
2078 lck_mtx_init(lck
, grp
, attr
);
2080 if ((lck
= (lck_mtx_t
*)kalloc(sizeof(lck_mtx_t
))) != 0)
2081 lck_mtx_init(lck
, grp
, attr
);
2087 * Routine: lck_mtx_free
2094 lck_mtx_destroy(lck
, grp
);
2096 zfree(lck_mtx_zone
, lck
);
2098 kfree(lck
, sizeof(lck_mtx_t
));
2103 * Routine: lck_mtx_ext_init
2111 bzero((void *)lck
, sizeof(lck_mtx_ext_t
));
2113 if ((attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2114 lck
->lck_mtx_deb
.type
= MUTEX_TAG
;
2115 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_DEBUG
;
2118 lck
->lck_mtx_grp
= grp
;
2120 if (grp
->lck_grp_attr
& LCK_GRP_ATTR_STAT
)
2121 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_STAT
;
2123 lck
->lck_mtx
.lck_mtx_is_ext
= 1;
2124 lck
->lck_mtx
.lck_mtx_pad32
= 0xFFFFFFFF;
2128 * Routine: lck_mtx_init
2136 lck_mtx_ext_t
*lck_ext
;
2137 lck_attr_t
*lck_attr
;
2139 if (attr
!= LCK_ATTR_NULL
)
2142 lck_attr
= &LockDefaultLckAttr
;
2144 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2145 if ((lck_ext
= (lck_mtx_ext_t
*)kalloc(sizeof(lck_mtx_ext_t
))) != 0) {
2146 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
2147 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
2148 lck
->lck_mtx_ptr
= lck_ext
;
2151 lck
->lck_mtx_owner
= 0;
2152 lck
->lck_mtx_state
= 0;
2154 lck
->lck_mtx_pad32
= 0xFFFFFFFF;
2155 lck_grp_reference(grp
);
2156 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
2160 * Routine: lck_mtx_init_ext
2165 lck_mtx_ext_t
*lck_ext
,
2169 lck_attr_t
*lck_attr
;
2171 if (attr
!= LCK_ATTR_NULL
)
2174 lck_attr
= &LockDefaultLckAttr
;
2176 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2177 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
2178 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
2179 lck
->lck_mtx_ptr
= lck_ext
;
2181 lck
->lck_mtx_owner
= 0;
2182 lck
->lck_mtx_state
= 0;
2184 lck
->lck_mtx_pad32
= 0xFFFFFFFF;
2186 lck_grp_reference(grp
);
2187 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
2191 lck_mtx_lock_mark_destroyed(
2198 /* convert to destroyed state */
2199 ordered_store_mtx_state_release(mutex
, LCK_MTX_TAG_DESTROYED
);
2203 state
= ordered_load_mtx_state(mutex
);
2204 lck_mtx_interlock_lock(mutex
, &state
);
2206 ordered_store_mtx_state_release(mutex
, LCK_MTX_TAG_DESTROYED
);
2208 enable_preemption();
2212 * Routine: lck_mtx_destroy
2221 if (lck
->lck_mtx_tag
== LCK_MTX_TAG_DESTROYED
)
2224 lck_mtx_assert(lck
, LCK_MTX_ASSERT_NOTOWNED
);
2226 indirect
= (lck
->lck_mtx_tag
== LCK_MTX_TAG_INDIRECT
);
2228 lck_mtx_lock_mark_destroyed(lck
, indirect
);
2231 kfree(lck
->lck_mtx_ptr
, sizeof(lck_mtx_ext_t
));
2232 lck_grp_lckcnt_decr(grp
, LCK_TYPE_MTX
);
2233 lck_grp_deallocate(grp
);
2238 #if DEVELOPMENT | DEBUG
2239 __attribute__((noinline
))
2241 lck_mtx_owner_check_panic(
2244 thread_t owner
= (thread_t
)lock
->lck_mtx_owner
;
2245 panic("Mutex unlock attempted from non-owner thread. Owner=%p lock=%p", owner
, lock
);
2249 __attribute__((always_inline
))
2255 *lock
= &((*lock
)->lck_mtx_ptr
->lck_mtx
);
2256 *state
= ordered_load_mtx_state(*lock
);
2261 * Routine: lck_mtx_unlock_slow
2263 * Unlocks a mutex held by current thread.
2265 * It will wake up waiters if necessary and
2268 * Interlock can be held.
2270 __attribute__((noinline
))
2272 lck_mtx_unlock_slow(
2276 uint32_t state
, prev
;
2277 boolean_t indirect
= FALSE
;
2279 state
= ordered_load_mtx_state(lock
);
2281 /* Is this an indirect mutex? */
2282 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
2283 indirect
= get_indirect_mutex(&lock
, &state
);
2286 thread
= current_thread();
2288 #if DEVELOPMENT | DEBUG
2289 thread_t owner
= (thread_t
)lock
->lck_mtx_owner
;
2290 if(__improbable(owner
!= thread
))
2291 return lck_mtx_owner_check_panic(lock
);
2294 /* check if it is held as a spinlock */
2295 if (__improbable((state
& LCK_MTX_MLOCKED_MSK
) == 0))
2298 lck_mtx_interlock_lock_clear_flags(lock
, LCK_MTX_MLOCKED_MSK
, &state
);
2301 /* preemption disabled, interlock held and mutex not held */
2304 ordered_store_mtx_owner(lock
, 0);
2305 /* keep original state in prev for later evaluation */
2307 /* release interlock, promotion and clear spin flag */
2308 state
&= (~(LCK_MTX_ILOCKED_MSK
| LCK_MTX_SPIN_MSK
| LCK_MTX_PROMOTED_MSK
));
2309 if ((state
& LCK_MTX_WAITERS_MSK
))
2310 state
-= LCK_MTX_WAITER
; /* decrement waiter count */
2311 ordered_store_mtx_state_release(lock
, state
); /* since I own the interlock, I don't need an atomic update */
2314 /* perform lock statistics after drop to prevent delay */
2316 thread
->mutex_count
--; /* lock statistic */
2317 #endif /* MACH_LDEBUG */
2319 /* check if there are waiters to wake up or priority to drop */
2320 if ((prev
& (LCK_MTX_PROMOTED_MSK
| LCK_MTX_WAITERS_MSK
)))
2321 return lck_mtx_unlock_wakeup_tail(lock
, prev
, indirect
);
2323 /* re-enable preemption */
2324 lck_mtx_unlock_finish_inline(lock
, FALSE
);
2329 #define LCK_MTX_LCK_WAIT_CODE 0x20
2330 #define LCK_MTX_LCK_WAKEUP_CODE 0x21
2331 #define LCK_MTX_LCK_SPIN_CODE 0x22
2332 #define LCK_MTX_LCK_ACQUIRE_CODE 0x23
2333 #define LCK_MTX_LCK_DEMOTE_CODE 0x24
2336 * Routine: lck_mtx_unlock_wakeup_tail
2338 * Invoked on unlock when there is
2339 * contention, i.e. the assembly routine sees
2340 * that mutex->lck_mtx_waiters != 0 or
2341 * that mutex->lck_mtx_promoted != 0
2343 * neither the mutex or interlock is held
2345 * Note that this routine might not be called if there are pending
2346 * waiters which have previously been woken up, and they didn't
2347 * end up boosting the old owner.
2349 * assembly routine previously did the following to mutex:
2350 * (after saving the state in prior_lock_state)
2351 * cleared lck_mtx_promoted
2352 * decremented lck_mtx_waiters if nonzero
2354 * This function needs to be called as a tail call
2355 * to optimize the compiled code.
2357 __attribute__((noinline
))
2359 lck_mtx_unlock_wakeup_tail (
2361 int prior_lock_state
,
2364 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(mutex
);
2368 * prior_lock state is a snapshot of the 2nd word of the
2369 * lock in question... we'll fake up a lock with the bits
2370 * copied into place and carefully not access anything
2371 * beyond whats defined in the second word of a lck_mtx_t
2373 fake_lck
.lck_mtx_state
= prior_lock_state
;
2375 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAKEUP_CODE
) | DBG_FUNC_START
,
2376 trace_lck
, fake_lck
.lck_mtx_promoted
, fake_lck
.lck_mtx_waiters
, fake_lck
.lck_mtx_pri
, 0);
2378 if (__probable(fake_lck
.lck_mtx_waiters
)) {
2379 kern_return_t did_wake
;
2381 if (fake_lck
.lck_mtx_waiters
> 1)
2382 did_wake
= thread_wakeup_one_with_pri(LCK_MTX_EVENT(mutex
), fake_lck
.lck_mtx_pri
);
2384 did_wake
= thread_wakeup_one(LCK_MTX_EVENT(mutex
));
2386 * The waiters count always precisely matches the number of threads on the waitqueue.
2387 * i.e. we should never see ret == KERN_NOT_WAITING.
2389 assert(did_wake
== KERN_SUCCESS
);
2392 /* When lck_mtx_promoted was set, then I as the owner definitely have a promotion */
2393 if (__improbable(fake_lck
.lck_mtx_promoted
)) {
2394 thread_t thread
= current_thread();
2396 spl_t s
= splsched();
2397 thread_lock(thread
);
2399 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_DEMOTE_CODE
) | DBG_FUNC_NONE
,
2400 thread_tid(thread
), thread
->promotions
, thread
->sched_flags
& TH_SFLAG_PROMOTED
, 0, 0);
2401 assert(thread
->was_promoted_on_wakeup
== 0);
2402 assert(thread
->promotions
> 0);
2404 assert_promotions_invariant(thread
);
2406 if (--thread
->promotions
== 0)
2407 sched_thread_unpromote(thread
, trace_lck
);
2409 assert_promotions_invariant(thread
);
2411 thread_unlock(thread
);
2415 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAKEUP_CODE
) | DBG_FUNC_END
,
2416 trace_lck
, 0, mutex
->lck_mtx_waiters
, 0, 0);
2418 lck_mtx_unlock_finish_inline(mutex
, indirect
);
2422 * Routine: lck_mtx_lock_acquire_x86
2424 * Invoked on acquiring the mutex when there is
2425 * contention (i.e. the assembly routine sees that
2426 * that mutex->lck_mtx_waiters != 0 or
2427 * thread->was_promoted_on_wakeup != 0)...
2429 * mutex is owned... interlock is held... preemption is disabled
2431 __attribute__((always_inline
))
2433 lck_mtx_lock_acquire_inline(
2436 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(mutex
);
2439 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_ACQUIRE_CODE
) | DBG_FUNC_START
,
2440 trace_lck
, thread
->was_promoted_on_wakeup
, mutex
->lck_mtx_waiters
, mutex
->lck_mtx_pri
, 0);
2442 if (mutex
->lck_mtx_waiters
)
2443 priority
= mutex
->lck_mtx_pri
;
2445 priority
= 0; /* not worth resetting lck_mtx_pri here, it will be reset by next waiter */
2447 /* the priority must have been set correctly by wait */
2448 assert(priority
<= MAXPRI_PROMOTE
);
2449 assert(priority
== 0 || priority
>= BASEPRI_DEFAULT
);
2451 /* if the mutex wasn't owned, then the owner wasn't promoted */
2452 assert(mutex
->lck_mtx_promoted
== 0);
2454 thread_t thread
= (thread_t
)mutex
->lck_mtx_owner
; /* faster than current_thread() */
2456 if (thread
->sched_pri
< priority
|| thread
->was_promoted_on_wakeup
) {
2457 spl_t s
= splsched();
2458 thread_lock(thread
);
2460 if (thread
->was_promoted_on_wakeup
)
2461 assert(thread
->promotions
> 0);
2463 /* Intel only promotes if priority goes up */
2464 if (thread
->sched_pri
< priority
&& thread
->promotion_priority
< priority
) {
2465 /* Remember that I need to drop this promotion on unlock */
2466 mutex
->lck_mtx_promoted
= 1;
2468 if (thread
->promotions
++ == 0) {
2469 /* This is the first promotion for the owner */
2470 sched_thread_promote_to_pri(thread
, priority
, trace_lck
);
2473 * Holder was previously promoted due to a different mutex,
2474 * raise to match this one.
2475 * Or, this thread was promoted on wakeup but someone else
2476 * later contended on mutex at higher priority before we got here
2478 sched_thread_update_promotion_to_pri(thread
, priority
, trace_lck
);
2482 if (thread
->was_promoted_on_wakeup
) {
2483 thread
->was_promoted_on_wakeup
= 0;
2484 if (--thread
->promotions
== 0)
2485 sched_thread_unpromote(thread
, trace_lck
);
2488 thread_unlock(thread
);
2491 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_ACQUIRE_CODE
) | DBG_FUNC_END
,
2492 trace_lck
, 0, mutex
->lck_mtx_waiters
, 0, 0);
2496 lck_mtx_lock_acquire_x86(
2499 return lck_mtx_lock_acquire_inline(mutex
);
2503 * Tail call helpers for lock functions that perform
2504 * lck_mtx_lock_acquire followed by the caller's finish routine, to optimize
2505 * the caller's compiled code.
2508 __attribute__((noinline
))
2510 lck_mtx_lock_acquire_tail(
2514 lck_mtx_lock_acquire_inline(mutex
);
2515 lck_mtx_lock_finish_inline(mutex
, ordered_load_mtx_state(mutex
), indirect
);
2518 __attribute__((noinline
))
2520 lck_mtx_try_lock_acquire_tail(
2523 lck_mtx_lock_acquire_inline(mutex
);
2524 lck_mtx_try_lock_finish_inline(mutex
, ordered_load_mtx_state(mutex
));
2529 __attribute__((noinline
))
2531 lck_mtx_convert_spin_acquire_tail(
2534 lck_mtx_lock_acquire_inline(mutex
);
2535 lck_mtx_convert_spin_finish_inline(mutex
, ordered_load_mtx_state(mutex
));
2542 lck_mtx_ilk_unlock_inline(mutex
, ordered_load_mtx_state(mutex
));
2547 lck_mtx_interlock_lock_set_and_clear_flags(
2551 uint32_t *new_state
)
2553 uint32_t state
, prev
;
2557 /* have to wait for interlock to clear */
2558 while (__improbable(state
& (LCK_MTX_ILOCKED_MSK
| xor_flags
))) {
2560 state
= ordered_load_mtx_state(mutex
);
2562 prev
= state
; /* prev contains snapshot for exchange */
2563 state
|= LCK_MTX_ILOCKED_MSK
| xor_flags
; /* pick up interlock */
2564 state
&= ~and_flags
; /* clear flags */
2566 disable_preemption();
2567 if (atomic_compare_exchange32(&mutex
->lck_mtx_state
, prev
, state
, memory_order_acquire_smp
, FALSE
))
2569 enable_preemption();
2571 state
= ordered_load_mtx_state(mutex
);
2578 lck_mtx_interlock_lock_clear_flags(
2581 uint32_t *new_state
)
2583 return lck_mtx_interlock_lock_set_and_clear_flags(mutex
, 0, and_flags
, new_state
);
2587 lck_mtx_interlock_lock(
2589 uint32_t *new_state
)
2591 return lck_mtx_interlock_lock_set_and_clear_flags(mutex
, 0, 0, new_state
);
2595 lck_mtx_interlock_try_lock_set_flags(
2598 uint32_t *new_state
)
2600 uint32_t state
, prev
;
2603 /* have to wait for interlock to clear */
2604 if (state
& (LCK_MTX_ILOCKED_MSK
| or_flags
)) {
2607 prev
= state
; /* prev contains snapshot for exchange */
2608 state
|= LCK_MTX_ILOCKED_MSK
| or_flags
; /* pick up interlock */
2609 disable_preemption();
2610 if (atomic_compare_exchange32(&mutex
->lck_mtx_state
, prev
, state
, memory_order_acquire_smp
, FALSE
)) {
2615 enable_preemption();
2620 lck_mtx_interlock_try_lock(
2622 uint32_t *new_state
)
2624 return lck_mtx_interlock_try_lock_set_flags(mutex
, 0, new_state
);
2628 lck_mtx_interlock_try_lock_disable_interrupts(
2634 *istate
= ml_set_interrupts_enabled(FALSE
);
2635 state
= ordered_load_mtx_state(mutex
);
2637 if (lck_mtx_interlock_try_lock(mutex
, &state
)) {
2640 ml_set_interrupts_enabled(*istate
);
2646 lck_mtx_interlock_unlock_enable_interrupts(
2650 lck_mtx_ilk_unlock(mutex
);
2651 ml_set_interrupts_enabled(istate
);
2654 static void __inline__
2658 #if ATOMIC_STAT_UPDATES
2659 os_atomic_inc(stat
, relaxed
);
2665 static void __inline__
2666 lck_mtx_update_miss(
2667 struct _lck_mtx_ext_
*lock
,
2670 #if LOG_FIRST_MISS_ALONE
2671 if ((*first_miss
& 1) == 0) {
2673 #pragma unused(first_miss)
2675 uint64_t* stat
= &lock
->lck_mtx_grp
->lck_grp_miss
;
2676 lck_mtx_inc_stats(stat
);
2678 #if LOG_FIRST_MISS_ALONE
2684 static void __inline__
2685 lck_mtx_update_direct_wait(
2686 struct _lck_mtx_ext_
*lock
)
2688 uint64_t* stat
= &lock
->lck_mtx_grp
->lck_grp_direct_wait
;
2689 lck_mtx_inc_stats(stat
);
2692 static void __inline__
2693 lck_mtx_update_wait(
2694 struct _lck_mtx_ext_
*lock
,
2697 #if LOG_FIRST_MISS_ALONE
2698 if ((*first_miss
& 2) == 0) {
2700 #pragma unused(first_miss)
2702 uint64_t* stat
= &lock
->lck_mtx_grp
->lck_grp_wait
;
2703 lck_mtx_inc_stats(stat
);
2705 #if LOG_FIRST_MISS_ALONE
2711 static void __inline__
2712 lck_mtx_update_util(
2713 struct _lck_mtx_ext_
*lock
)
2715 uint64_t* stat
= &lock
->lck_mtx_grp
->lck_grp_util
;
2716 lck_mtx_inc_stats(stat
);
2719 __attribute__((noinline
))
2721 lck_mtx_lock_contended(
2724 boolean_t
*first_miss
)
2726 lck_mtx_spinwait_ret_type_t ret
;
2733 lck_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, first_miss
);
2736 ret
= lck_mtx_lock_spinwait_x86(lock
);
2737 state
= ordered_load_mtx_state(lock
);
2739 case LCK_MTX_SPINWAIT_NO_SPIN
:
2741 * owner not on core, lck_mtx_lock_spinwait_x86 didn't even
2745 lck_mtx_update_direct_wait((struct _lck_mtx_ext_
*)lock
);
2748 /* just fall through case LCK_MTX_SPINWAIT_SPUN */
2749 case LCK_MTX_SPINWAIT_SPUN
:
2751 * mutex not acquired but lck_mtx_lock_spinwait_x86 tried to spin
2752 * interlock not held
2754 lck_mtx_interlock_lock(lock
, &state
);
2755 assert(state
& LCK_MTX_ILOCKED_MSK
);
2757 if (state
& LCK_MTX_MLOCKED_MSK
) {
2759 lck_mtx_update_wait((struct _lck_mtx_ext_
*)lock
, first_miss
);
2761 lck_mtx_lock_wait_x86(lock
);
2763 * interlock is not held here.
2768 /* grab the mutex */
2769 state
|= LCK_MTX_MLOCKED_MSK
;
2770 ordered_store_mtx_state_release(lock
, state
);
2771 thread
= current_thread();
2772 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
2775 thread
->mutex_count
++;
2777 #endif /* MACH_LDEBUG */
2781 case LCK_MTX_SPINWAIT_ACQUIRED
:
2783 * mutex has been acquired by lck_mtx_lock_spinwait_x86
2784 * interlock is held and preemption disabled
2785 * owner is set and mutex marked as locked
2786 * statistics updated too
2790 panic("lck_mtx_lock_spinwait_x86 returned %d for mutex %p\n", ret
, lock
);
2794 * interlock is already acquired here
2797 /* mutex has been acquired */
2798 thread
= (thread_t
)lock
->lck_mtx_owner
;
2799 if (state
& LCK_MTX_WAITERS_MSK
|| thread
->was_promoted_on_wakeup
) {
2800 return lck_mtx_lock_acquire_tail(lock
, indirect
);
2803 /* release the interlock */
2804 lck_mtx_lock_finish_inline(lock
, ordered_load_mtx_state(lock
), indirect
);
2808 * Helper noinline functions for calling
2809 * panic to optimize compiled code.
2812 __attribute__((noinline
))
2817 panic("trying to interlock destroyed mutex (%p)", lock
);
2820 __attribute__((noinline
))
2822 lck_mtx_try_destroyed(
2825 panic("trying to interlock destroyed mutex (%p)", lock
);
2829 __attribute__((always_inline
))
2831 lck_mtx_lock_wait_interlock_to_clear(
2833 uint32_t* new_state
)
2839 state
= ordered_load_mtx_state(lock
);
2840 if (!(state
& (LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
))) {
2844 if (state
& LCK_MTX_MLOCKED_MSK
) {
2845 /* if it is held as mutex, just fail */
2851 __attribute__((always_inline
))
2853 lck_mtx_try_lock_wait_interlock_to_clear(
2855 uint32_t* new_state
)
2861 state
= ordered_load_mtx_state(lock
);
2862 if (state
& (LCK_MTX_MLOCKED_MSK
| LCK_MTX_SPIN_MSK
)) {
2863 /* if it is held as mutex or spin, just fail */
2866 if (!(state
& LCK_MTX_ILOCKED_MSK
)) {
2874 * Routine: lck_mtx_lock_slow
2876 * Locks a mutex for current thread.
2877 * If the lock is contended this function might
2880 * Called with interlock not held.
2882 __attribute__((noinline
))
2887 boolean_t indirect
= FALSE
;
2891 state
= ordered_load_mtx_state(lock
);
2893 /* is the interlock or mutex held */
2894 if (__improbable(state
& ((LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
)))) {
2896 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
2897 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
2898 * set in state (state == lck_mtx_tag)
2902 /* is the mutex already held and not indirect */
2903 if (__improbable(!(state
& LCK_MTX_ILOCKED_MSK
))){
2904 /* no, must have been the mutex */
2905 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
2908 /* check to see if it is marked destroyed */
2909 if (__improbable(state
== LCK_MTX_TAG_DESTROYED
)) {
2910 return lck_mtx_destroyed(lock
);
2913 /* Is this an indirect mutex? */
2914 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
2915 indirect
= get_indirect_mutex(&lock
, &state
);
2918 lck_mtx_update_util((struct _lck_mtx_ext_
*)lock
);
2920 if (state
& LCK_MTX_SPIN_MSK
) {
2921 /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
2922 assert(state
& LCK_MTX_ILOCKED_MSK
);
2923 lck_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
2927 if (!lck_mtx_lock_wait_interlock_to_clear(lock
, &state
)) {
2928 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
2932 /* no - can't be INDIRECT, DESTROYED or locked */
2933 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock
, LCK_MTX_MLOCKED_MSK
, &state
))) {
2934 if (!lck_mtx_lock_wait_interlock_to_clear(lock
, &state
)) {
2935 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
2939 /* lock and interlock acquired */
2941 thread_t thread
= current_thread();
2942 /* record owner of mutex */
2943 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
2947 thread
->mutex_count
++; /* lock statistic */
2951 * Check if there are waiters to
2952 * inherit their priority.
2954 if (__improbable(state
& LCK_MTX_WAITERS_MSK
)) {
2955 return lck_mtx_lock_acquire_tail(lock
, indirect
);
2958 /* release the interlock */
2959 lck_mtx_lock_finish_inline(lock
, ordered_load_mtx_state(lock
), indirect
);
2964 __attribute__((noinline
))
2966 lck_mtx_try_lock_slow(
2969 boolean_t indirect
= FALSE
;
2973 state
= ordered_load_mtx_state(lock
);
2975 /* is the interlock or mutex held */
2976 if (__improbable(state
& ((LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
)))) {
2978 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
2979 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
2980 * set in state (state == lck_mtx_tag)
2983 /* is the mutex already held and not indirect */
2984 if (__improbable(!(state
& LCK_MTX_ILOCKED_MSK
))){
2988 /* check to see if it is marked destroyed */
2989 if (__improbable(state
== LCK_MTX_TAG_DESTROYED
)) {
2990 return lck_mtx_try_destroyed(lock
);
2993 /* Is this an indirect mutex? */
2994 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
2995 indirect
= get_indirect_mutex(&lock
, &state
);
2998 lck_mtx_update_util((struct _lck_mtx_ext_
*)lock
);
3001 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock
, &state
)) {
3003 lck_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
3008 /* no - can't be INDIRECT, DESTROYED or locked */
3009 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock
, LCK_MTX_MLOCKED_MSK
, &state
))) {
3010 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock
, &state
)) {
3012 lck_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
3017 /* lock and interlock acquired */
3019 thread_t thread
= current_thread();
3020 /* record owner of mutex */
3021 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
3025 thread
->mutex_count
++; /* lock statistic */
3029 * Check if there are waiters to
3030 * inherit their priority.
3032 if (__improbable(state
& LCK_MTX_WAITERS_MSK
)) {
3033 return lck_mtx_try_lock_acquire_tail(lock
);
3036 /* release the interlock */
3037 lck_mtx_try_lock_finish_inline(lock
, ordered_load_mtx_state(lock
));
3043 __attribute__((noinline
))
3045 lck_mtx_lock_spin_slow(
3048 boolean_t indirect
= FALSE
;
3052 state
= ordered_load_mtx_state(lock
);
3054 /* is the interlock or mutex held */
3055 if (__improbable(state
& ((LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
)))) {
3057 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
3058 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
3059 * set in state (state == lck_mtx_tag)
3063 /* is the mutex already held and not indirect */
3064 if (__improbable(!(state
& LCK_MTX_ILOCKED_MSK
))){
3065 /* no, must have been the mutex */
3066 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
3069 /* check to see if it is marked destroyed */
3070 if (__improbable(state
== LCK_MTX_TAG_DESTROYED
)) {
3071 return lck_mtx_destroyed(lock
);
3074 /* Is this an indirect mutex? */
3075 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
3076 indirect
= get_indirect_mutex(&lock
, &state
);
3079 lck_mtx_update_util((struct _lck_mtx_ext_
*)lock
);
3081 if (state
& LCK_MTX_SPIN_MSK
) {
3082 /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
3083 assert(state
& LCK_MTX_ILOCKED_MSK
);
3084 lck_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
3088 if (!lck_mtx_lock_wait_interlock_to_clear(lock
, &state
)) {
3089 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
3093 /* no - can't be INDIRECT, DESTROYED or locked */
3094 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock
, LCK_MTX_SPIN_MSK
, &state
) )) {
3095 if (!lck_mtx_lock_wait_interlock_to_clear(lock
, &state
)) {
3096 return lck_mtx_lock_contended(lock
, indirect
, &first_miss
);
3100 /* lock as spinlock and interlock acquired */
3102 thread_t thread
= current_thread();
3103 /* record owner of mutex */
3104 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
3108 thread
->mutex_count
++; /* lock statistic */
3113 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE
, lock
, 0);
3115 /* return with the interlock held and preemption disabled */
3119 __attribute__((noinline
))
3121 lck_mtx_try_lock_spin_slow(
3124 boolean_t indirect
= FALSE
;
3128 state
= ordered_load_mtx_state(lock
);
3130 /* is the interlock or mutex held */
3131 if (__improbable(state
& ((LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
)))) {
3133 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
3134 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
3135 * set in state (state == lck_mtx_tag)
3138 /* is the mutex already held and not indirect */
3139 if (__improbable(!(state
& LCK_MTX_ILOCKED_MSK
))){
3143 /* check to see if it is marked destroyed */
3144 if (__improbable(state
== LCK_MTX_TAG_DESTROYED
)) {
3145 return lck_mtx_try_destroyed(lock
);
3148 /* Is this an indirect mutex? */
3149 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
3150 indirect
= get_indirect_mutex(&lock
, &state
);
3153 lck_mtx_update_util((struct _lck_mtx_ext_
*)lock
);
3156 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock
, &state
)) {
3158 lck_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
3163 /* no - can't be INDIRECT, DESTROYED or locked */
3164 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock
, LCK_MTX_SPIN_MSK
, &state
))) {
3165 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock
, &state
)) {
3167 lck_mtx_update_miss((struct _lck_mtx_ext_
*)lock
, &first_miss
);
3172 /* lock and interlock acquired */
3174 thread_t thread
= current_thread();
3175 /* record owner of mutex */
3176 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
3180 thread
->mutex_count
++; /* lock statistic */
3185 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE
, lock
, 0);
3191 __attribute__((noinline
))
3193 lck_mtx_convert_spin(
3198 state
= ordered_load_mtx_state(lock
);
3200 /* Is this an indirect mutex? */
3201 if (__improbable(state
== LCK_MTX_TAG_INDIRECT
)) {
3202 /* If so, take indirection */
3203 get_indirect_mutex(&lock
, &state
);
3206 assertf((thread_t
)lock
->lck_mtx_owner
== current_thread(), "lock %p not owned by thread %p (current owner %p)", lock
, current_thread(), (thread_t
)lock
->lck_mtx_owner
);
3208 if (__improbable(state
& LCK_MTX_MLOCKED_MSK
)) {
3209 /* already owned as a mutex, just return */
3213 assert(get_preemption_level() > 0);
3214 assert(state
& LCK_MTX_ILOCKED_MSK
);
3215 assert(state
& LCK_MTX_SPIN_MSK
);
3218 * Check if there are waiters to
3219 * inherit their priority.
3221 if (__improbable(state
& LCK_MTX_WAITERS_MSK
)) {
3222 return lck_mtx_convert_spin_acquire_tail(lock
);
3225 lck_mtx_convert_spin_finish_inline(lock
, ordered_load_mtx_state(lock
));
3230 static inline boolean_t
3231 lck_mtx_lock_grab_mutex(
3236 state
= ordered_load_mtx_state(lock
);
3238 if (!lck_mtx_interlock_try_lock_set_flags(lock
, LCK_MTX_MLOCKED_MSK
, &state
)) {
3242 /* lock and interlock acquired */
3244 thread_t thread
= current_thread();
3245 /* record owner of mutex */
3246 ordered_store_mtx_owner(lock
, (uintptr_t)thread
);
3250 thread
->mutex_count
++; /* lock statistic */
3256 __attribute__((noinline
))
3262 thread_t thread
, owner
;
3265 thread
= current_thread();
3266 state
= ordered_load_mtx_state(lock
);
3268 if (state
== LCK_MTX_TAG_INDIRECT
) {
3269 get_indirect_mutex(&lock
, &state
);
3272 owner
= (thread_t
)lock
->lck_mtx_owner
;
3274 if (type
== LCK_MTX_ASSERT_OWNED
) {
3275 if (owner
!= thread
|| !(state
& (LCK_MTX_ILOCKED_MSK
| LCK_MTX_MLOCKED_MSK
)))
3276 panic("mutex (%p) not owned\n", lock
);
3278 assert (type
== LCK_MTX_ASSERT_NOTOWNED
);
3279 if (owner
== thread
)
3280 panic("mutex (%p) owned\n", lock
);
3285 * Routine: lck_mtx_lock_spinwait_x86
3287 * Invoked trying to acquire a mutex when there is contention but
3288 * the holder is running on another processor. We spin for up to a maximum
3289 * time waiting for the lock to be released.
3291 * Called with the interlock unlocked.
3292 * returns LCK_MTX_SPINWAIT_ACQUIRED if mutex acquired
3293 * returns LCK_MTX_SPINWAIT_SPUN if we spun
3294 * returns LCK_MTX_SPINWAIT_NO_SPIN if we didn't spin due to the holder not running
3296 __attribute__((noinline
))
3297 lck_mtx_spinwait_ret_type_t
3298 lck_mtx_lock_spinwait_x86(
3301 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(mutex
);
3303 uint64_t overall_deadline
;
3304 uint64_t check_owner_deadline
;
3306 lck_mtx_spinwait_ret_type_t retval
= LCK_MTX_SPINWAIT_SPUN
;
3309 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_SPIN_CODE
) | DBG_FUNC_START
,
3310 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(mutex
->lck_mtx_owner
), mutex
->lck_mtx_waiters
, 0, 0);
3312 cur_time
= mach_absolute_time();
3313 overall_deadline
= cur_time
+ MutexSpin
;
3314 check_owner_deadline
= cur_time
;
3318 * - mutex is locked, and
3319 * - its locked as a spin lock, and
3320 * - owner is running on another processor, and
3321 * - owner (processor) is not idling, and
3322 * - we haven't spun for long enough.
3325 if (__probable(lck_mtx_lock_grab_mutex(mutex
))) {
3326 retval
= LCK_MTX_SPINWAIT_ACQUIRED
;
3329 cur_time
= mach_absolute_time();
3331 if (cur_time
>= overall_deadline
)
3334 if (cur_time
>= check_owner_deadline
&& mutex
->lck_mtx_owner
) {
3338 * We will repeatedly peek at the state of the lock while spinning,
3339 * and we will acquire the interlock to do so.
3340 * The thread that will unlock the mutex will also need to acquire
3341 * the interlock, and we want to avoid to slow it down.
3342 * To avoid to get an interrupt while holding the interlock
3343 * and increase the time we are holding it, we
3344 * will try to acquire the interlock with interrupts disabled.
3345 * This is safe because it is a "try_lock", if we can't acquire
3346 * the interlock we re-enable the interrupts and fail, so it is
3347 * ok to call it even if the interlock was already held.
3349 if (lck_mtx_interlock_try_lock_disable_interrupts(mutex
, &istate
)) {
3351 if ((holder
= (thread_t
) mutex
->lck_mtx_owner
) != NULL
) {
3353 if ( !(holder
->machine
.specFlags
& OnProc
) ||
3354 (holder
->state
& TH_IDLE
)) {
3356 lck_mtx_interlock_unlock_enable_interrupts(mutex
, istate
);
3359 retval
= LCK_MTX_SPINWAIT_NO_SPIN
;
3363 lck_mtx_interlock_unlock_enable_interrupts(mutex
, istate
);
3365 check_owner_deadline
= cur_time
+ (MutexSpin
/ 4);
3376 * We've already kept a count via overall_deadline of how long we spun.
3377 * If dtrace is active, then we compute backwards to decide how
3380 * Note that we record a different probe id depending on whether
3381 * this is a direct or indirect mutex. This allows us to
3382 * penalize only lock groups that have debug/stats enabled
3383 * with dtrace processing if desired.
3385 if (__probable(mutex
->lck_mtx_is_ext
== 0)) {
3386 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN
, mutex
,
3387 mach_absolute_time() - (overall_deadline
- MutexSpin
));
3389 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN
, mutex
,
3390 mach_absolute_time() - (overall_deadline
- MutexSpin
));
3392 /* The lockstat acquire event is recorded by the assembly code beneath us. */
3395 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_SPIN_CODE
) | DBG_FUNC_END
,
3396 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(mutex
->lck_mtx_owner
), mutex
->lck_mtx_waiters
, retval
, 0);
3404 * Routine: lck_mtx_lock_wait_x86
3406 * Invoked in order to wait on contention.
3408 * Called with the interlock locked and
3409 * preemption disabled...
3410 * returns it unlocked and with preemption enabled
3412 * lck_mtx_waiters is 1:1 with a wakeup needing to occur.
3413 * A runnable waiter can exist between wait and acquire
3414 * without a waiters count being set.
3415 * This allows us to never make a spurious wakeup call.
3418 * This avoids taking the thread lock if the owning thread is the same priority.
3419 * This optimizes the case of same-priority threads contending on a lock.
3420 * However, that allows the owning thread to drop in priority while holding the lock,
3421 * because there is no state that the priority change can notice that
3422 * says that the targeted thread holds a contended mutex.
3424 * One possible solution: priority changes could look for some atomic tag
3425 * on the thread saying 'holding contended lock', and then set up a promotion.
3426 * Needs a story for dropping that promotion - the last contended unlock
3427 * has to notice that this has happened.
3429 __attribute__((noinline
))
3431 lck_mtx_lock_wait_x86 (
3435 uint64_t sleep_start
= 0;
3437 if (lockstat_probemap
[LS_LCK_MTX_LOCK_BLOCK
] || lockstat_probemap
[LS_LCK_MTX_EXT_LOCK_BLOCK
]) {
3438 sleep_start
= mach_absolute_time();
3441 thread_t self
= current_thread();
3442 assert(self
->waiting_for_mutex
== NULL
);
3444 self
->waiting_for_mutex
= mutex
;
3446 __kdebug_only
uintptr_t trace_lck
= unslide_for_kdebug(mutex
);
3448 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAIT_CODE
) | DBG_FUNC_START
,
3449 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(mutex
->lck_mtx_owner
),
3450 mutex
->lck_mtx_waiters
, mutex
->lck_mtx_pri
, 0);
3452 integer_t waiter_pri
= self
->sched_pri
;
3453 waiter_pri
= MAX(waiter_pri
, self
->base_pri
);
3454 waiter_pri
= MAX(waiter_pri
, BASEPRI_DEFAULT
);
3455 waiter_pri
= MIN(waiter_pri
, MAXPRI_PROMOTE
);
3457 assert(mutex
->lck_mtx_pri
<= MAXPRI_PROMOTE
);
3459 /* Re-initialize lck_mtx_pri if this is the first contention */
3460 if (mutex
->lck_mtx_waiters
== 0 || mutex
->lck_mtx_pri
<= waiter_pri
)
3461 mutex
->lck_mtx_pri
= waiter_pri
;
3463 thread_t holder
= (thread_t
)mutex
->lck_mtx_owner
;
3465 assert(holder
!= NULL
);
3468 * Intel only causes a promotion when priority needs to change,
3469 * reducing thread lock holds but leaving us vulnerable to the holder
3470 * dropping priority.
3472 if (holder
->sched_pri
< mutex
->lck_mtx_pri
) {
3473 int promote_pri
= mutex
->lck_mtx_pri
;
3475 spl_t s
= splsched();
3476 thread_lock(holder
);
3478 /* Check again in case sched_pri changed */
3479 if (holder
->sched_pri
< promote_pri
&& holder
->promotion_priority
< promote_pri
) {
3480 if (mutex
->lck_mtx_promoted
== 0) {
3481 /* This is the first promotion for this mutex */
3482 mutex
->lck_mtx_promoted
= 1;
3484 if (holder
->promotions
++ == 0) {
3485 /* This is the first promotion for holder */
3486 sched_thread_promote_to_pri(holder
, promote_pri
, trace_lck
);
3489 * Holder was previously promoted due to a different mutex,
3490 * check if it needs to raise to match this one
3492 sched_thread_update_promotion_to_pri(holder
, promote_pri
,
3497 * Holder was previously promoted due to this mutex,
3498 * check if the pri needs to go up
3500 sched_thread_update_promotion_to_pri(holder
, promote_pri
, trace_lck
);
3504 thread_unlock(holder
);
3508 mutex
->lck_mtx_waiters
++;
3510 thread_set_pending_block_hint(self
, kThreadWaitKernelMutex
);
3511 assert_wait(LCK_MTX_EVENT(mutex
), THREAD_UNINT
| THREAD_WAIT_NOREPORT_USER
);
3513 lck_mtx_ilk_unlock(mutex
);
3515 thread_block(THREAD_CONTINUE_NULL
);
3517 self
->waiting_for_mutex
= NULL
;
3519 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAIT_CODE
) | DBG_FUNC_END
,
3520 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(mutex
->lck_mtx_owner
),
3521 mutex
->lck_mtx_waiters
, mutex
->lck_mtx_pri
, 0);
3525 * Record the Dtrace lockstat probe for blocking, block time
3526 * measured from when we were entered.
3529 if (mutex
->lck_mtx_is_ext
== 0) {
3530 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK
, mutex
,
3531 mach_absolute_time() - sleep_start
);
3533 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK
, mutex
,
3534 mach_absolute_time() - sleep_start
);
3541 * Routine: kdp_lck_mtx_lock_spin_is_acquired
3542 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
3543 * Returns: TRUE if lock is acquired.
3546 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t
*lck
)
3549 panic("panic: kdp_lck_mtx_lock_spin_is_acquired called outside of kernel debugger");
3552 if (lck
->lck_mtx_ilocked
|| lck
->lck_mtx_mlocked
) {
3560 kdp_lck_mtx_find_owner(__unused
struct waitq
* waitq
, event64_t event
, thread_waitinfo_t
* waitinfo
)
3562 lck_mtx_t
* mutex
= LCK_EVENT_TO_MUTEX(event
);
3563 waitinfo
->context
= VM_KERNEL_UNSLIDE_OR_PERM(mutex
);
3564 thread_t holder
= (thread_t
)mutex
->lck_mtx_owner
;
3565 waitinfo
->owner
= thread_tid(holder
);
3569 kdp_rwlck_find_owner(__unused
struct waitq
* waitq
, event64_t event
, thread_waitinfo_t
* waitinfo
)
3571 lck_rw_t
*rwlck
= NULL
;
3572 switch(waitinfo
->wait_type
) {
3573 case kThreadWaitKernelRWLockRead
:
3574 rwlck
= READ_EVENT_TO_RWLOCK(event
);
3576 case kThreadWaitKernelRWLockWrite
:
3577 case kThreadWaitKernelRWLockUpgrade
:
3578 rwlck
= WRITE_EVENT_TO_RWLOCK(event
);
3581 panic("%s was called with an invalid blocking type", __FUNCTION__
);
3584 waitinfo
->context
= VM_KERNEL_UNSLIDE_OR_PERM(rwlck
);
3585 waitinfo
->owner
= 0;