2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Locking primitives implementation
64 #include <mach_ldebug.h>
66 #include <kern/locks.h>
67 #include <kern/kalloc.h>
68 #include <kern/misc_protos.h>
69 #include <kern/thread.h>
70 #include <kern/processor.h>
71 #include <kern/cpu_data.h>
72 #include <kern/cpu_number.h>
73 #include <kern/sched_prim.h>
75 #include <kern/debug.h>
78 #include <i386/machine_routines.h> /* machine_timeout_suspended() */
79 #include <machine/atomic.h>
80 #include <machine/machine_cpu.h>
83 #include <sys/kdebug.h>
84 #include <mach/branch_predicates.h>
87 * We need only enough declarations from the BSD-side to be able to
88 * test if our probe is active, and to call __dtrace_probe(). Setting
89 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
92 #define NEED_DTRACE_DEFS
93 #include <../bsd/sys/lockstat.h>
95 #define DTRACE_RW_SHARED 0x0 //reader
96 #define DTRACE_RW_EXCL 0x1 //writer
97 #define DTRACE_NO_FLAG 0x0 //not applicable
101 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
102 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
103 #define LCK_RW_LCK_SHARED_CODE 0x102
104 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
105 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
106 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
108 #define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
109 #define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
110 #define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
111 #define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
112 #define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
113 #define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
114 #define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
115 #define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
118 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
120 unsigned int LcksOpts
=0;
122 #if DEVELOPMENT || DEBUG
123 unsigned int LckDisablePreemptCheck
= 0;
130 * Perform simple lock checks.
132 int uslock_check
= 1;
133 int max_lock_loops
= 100000000;
134 decl_simple_lock_data(extern , printf_lock
)
135 decl_simple_lock_data(extern , panic_lock
)
136 #endif /* USLOCK_DEBUG */
138 extern unsigned int not_in_kdp
;
141 * We often want to know the addresses of the callers
142 * of the various lock routines. However, this information
143 * is only used for debugging and statistics.
146 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
147 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
149 #define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
150 #define DECL_PC(pc) pc_t pc;
151 #else /* ANY_LOCK_DEBUG */
155 * Eliminate lint complaints about unused local pc variables.
157 #define OBTAIN_PC(pc) ++pc
159 #define OBTAIN_PC(pc)
161 #endif /* USLOCK_DEBUG */
163 // Enforce program order of loads and stores.
164 #define ordered_load(target) _Generic( (target),\
165 uint32_t* : __c11_atomic_load((_Atomic uint32_t* )(target), memory_order_relaxed), \
166 uintptr_t*: __c11_atomic_load((_Atomic uintptr_t*)(target), memory_order_relaxed) )
167 #define ordered_store(target, value) _Generic( (target),\
168 uint32_t* : __c11_atomic_store((_Atomic uint32_t* )(target), (value), memory_order_relaxed), \
169 uintptr_t*: __c11_atomic_store((_Atomic uintptr_t*)(target), (value), memory_order_relaxed) )
172 * atomic exchange API is a low level abstraction of the operations
173 * to atomically read, modify, and write a pointer. This abstraction works
174 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
175 * well as the ARM exclusive instructions.
177 * atomic_exchange_begin() - begin exchange and retrieve current value
178 * atomic_exchange_complete() - conclude an exchange
179 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
182 atomic_exchange_begin32(uint32_t *target
, uint32_t *previous
, enum memory_order ord
)
186 (void)ord
; // Memory order not used
187 val
= __c11_atomic_load((_Atomic
uint32_t *)target
, memory_order_relaxed
);
193 atomic_exchange_complete32(uint32_t *target
, uint32_t previous
, uint32_t newval
, enum memory_order ord
)
195 return __c11_atomic_compare_exchange_strong((_Atomic
uint32_t *)target
, &previous
, newval
, ord
, memory_order_relaxed
);
199 atomic_exchange_abort(void) { }
202 atomic_test_and_set32(uint32_t *target
, uint32_t test_mask
, uint32_t set_mask
, enum memory_order ord
, boolean_t wait
)
204 uint32_t value
, prev
;
207 value
= atomic_exchange_begin32(target
, &prev
, ord
);
208 if (value
& test_mask
) {
212 atomic_exchange_abort();
216 if (atomic_exchange_complete32(target
, prev
, value
, ord
))
222 * Portable lock package implementation of usimple_locks.
226 #define USLDBG(stmt) stmt
227 void usld_lock_init(usimple_lock_t
, unsigned short);
228 void usld_lock_pre(usimple_lock_t
, pc_t
);
229 void usld_lock_post(usimple_lock_t
, pc_t
);
230 void usld_unlock(usimple_lock_t
, pc_t
);
231 void usld_lock_try_pre(usimple_lock_t
, pc_t
);
232 void usld_lock_try_post(usimple_lock_t
, pc_t
);
233 int usld_lock_common_checks(usimple_lock_t
, char *);
234 #else /* USLOCK_DEBUG */
236 #endif /* USLOCK_DEBUG */
240 * Forward definitions
243 static void lck_rw_lock_shared_gen(lck_rw_t
*lck
);
244 static void lck_rw_lock_exclusive_gen(lck_rw_t
*lck
);
245 static boolean_t
lck_rw_lock_shared_to_exclusive_success(lck_rw_t
*lck
);
246 static boolean_t
lck_rw_lock_shared_to_exclusive_failure(lck_rw_t
*lck
, uint32_t prior_lock_state
);
247 static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t
*lck
, uint32_t prior_lock_state
);
248 static lck_rw_type_t
lck_rw_done_gen(lck_rw_t
*lck
, uint32_t prior_lock_state
);
249 void lck_rw_clear_promotions_x86(thread_t thread
);
250 static boolean_t
lck_rw_held_read_or_upgrade(lck_rw_t
*lock
);
251 static boolean_t
lck_rw_grab_want(lck_rw_t
*lock
);
252 static boolean_t
lck_rw_grab_shared(lck_rw_t
*lock
);
255 * Routine: lck_spin_alloc_init
264 if ((lck
= (lck_spin_t
*)kalloc(sizeof(lck_spin_t
))) != 0)
265 lck_spin_init(lck
, grp
, attr
);
271 * Routine: lck_spin_free
278 lck_spin_destroy(lck
, grp
);
279 kfree(lck
, sizeof(lck_spin_t
));
283 * Routine: lck_spin_init
289 __unused lck_attr_t
*attr
)
291 usimple_lock_init((usimple_lock_t
) lck
, 0);
292 lck_grp_reference(grp
);
293 lck_grp_lckcnt_incr(grp
, LCK_TYPE_SPIN
);
297 * Routine: lck_spin_destroy
304 if (lck
->interlock
== LCK_SPIN_TAG_DESTROYED
)
306 lck
->interlock
= LCK_SPIN_TAG_DESTROYED
;
307 lck_grp_lckcnt_decr(grp
, LCK_TYPE_SPIN
);
308 lck_grp_deallocate(grp
);
313 * Routine: lck_spin_lock
319 usimple_lock((usimple_lock_t
) lck
);
323 * Routine: lck_spin_unlock
329 usimple_unlock((usimple_lock_t
) lck
);
334 * Routine: lck_spin_try_lock
340 boolean_t lrval
= (boolean_t
)usimple_lock_try((usimple_lock_t
) lck
);
341 #if DEVELOPMENT || DEBUG
350 * Routine: lck_spin_assert
353 lck_spin_assert(lck_spin_t
*lock
, unsigned int type
)
355 thread_t thread
, holder
;
358 if (__improbable(type
!= LCK_ASSERT_OWNED
&& type
!= LCK_ASSERT_NOTOWNED
)) {
359 panic("lck_spin_assert(): invalid arg (%u)", type
);
362 state
= lock
->interlock
;
363 holder
= (thread_t
)state
;
364 thread
= current_thread();
365 if (type
== LCK_ASSERT_OWNED
) {
366 if (__improbable(holder
== THREAD_NULL
)) {
367 panic("Lock not owned %p = %lx", lock
, state
);
369 if (__improbable(holder
!= thread
)) {
370 panic("Lock not owned by current thread %p = %lx", lock
, state
);
372 } else if (type
== LCK_ASSERT_NOTOWNED
) {
373 if (__improbable(holder
!= THREAD_NULL
)) {
374 if (holder
== thread
) {
375 panic("Lock owned by current thread %p = %lx", lock
, state
);
377 panic("Lock %p owned by thread %p", lock
, holder
);
384 * Routine: kdp_lck_spin_is_acquired
385 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
386 * Returns: TRUE if lock is acquired.
389 kdp_lck_spin_is_acquired(lck_spin_t
*lck
) {
391 panic("panic: spinlock acquired check done outside of kernel debugger");
393 return (lck
->interlock
!= 0)? TRUE
: FALSE
;
397 * Initialize a usimple_lock.
399 * No change in preemption state.
404 __unused
unsigned short tag
)
406 #ifndef MACHINE_SIMPLE_LOCK
407 USLDBG(usld_lock_init(l
, tag
));
408 hw_lock_init(&l
->interlock
);
410 simple_lock_init((simple_lock_t
)l
,tag
);
414 volatile uint32_t spinlock_owner_cpu
= ~0;
415 volatile usimple_lock_t spinlock_timed_out
;
417 uint32_t spinlock_timeout_NMI(uintptr_t thread_addr
) {
420 for (i
= 0; i
< real_ncpus
; i
++) {
421 if ((cpu_data_ptr
[i
] != NULL
) && ((uintptr_t)cpu_data_ptr
[i
]->cpu_active_thread
== thread_addr
)) {
422 spinlock_owner_cpu
= i
;
423 if ((uint32_t) cpu_number() != i
) {
424 /* Cause NMI and panic on the owner's cpu */
425 NMIPI_panic(cpu_to_cpumask(i
), SPINLOCK_TIMEOUT
);
431 return spinlock_owner_cpu
;
435 * Acquire a usimple_lock.
437 * Returns with preemption disabled. Note
438 * that the hw_lock routines are responsible for
439 * maintaining preemption state.
445 #ifndef MACHINE_SIMPLE_LOCK
449 USLDBG(usld_lock_pre(l
, pc
));
451 if(__improbable(hw_lock_to(&l
->interlock
, LockTimeOutTSC
) == 0)) {
452 boolean_t uslock_acquired
= FALSE
;
453 while (machine_timeout_suspended()) {
455 if ((uslock_acquired
= hw_lock_to(&l
->interlock
, LockTimeOutTSC
)))
459 if (uslock_acquired
== FALSE
) {
461 uintptr_t lowner
= (uintptr_t)l
->interlock
.lock_data
;
462 spinlock_timed_out
= l
;
463 lock_cpu
= spinlock_timeout_NMI(lowner
);
464 panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx, time: %llu",
465 l
, lowner
, current_thread(), lock_cpu
, (uintptr_t)l
->interlock
.lock_data
, mach_absolute_time());
468 #if DEVELOPMENT || DEBUG
472 USLDBG(usld_lock_post(l
, pc
));
474 simple_lock((simple_lock_t
)l
);
477 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE
, l
, 0);
483 * Release a usimple_lock.
485 * Returns with preemption enabled. Note
486 * that the hw_lock routines are responsible for
487 * maintaining preemption state.
493 #ifndef MACHINE_SIMPLE_LOCK
497 USLDBG(usld_unlock(l
, pc
));
498 #if DEVELOPMENT || DEBUG
501 hw_lock_unlock(&l
->interlock
);
503 simple_unlock_rwmb((simple_lock_t
)l
);
509 * Conditionally acquire a usimple_lock.
511 * On success, returns with preemption disabled.
512 * On failure, returns with preemption in the same state
513 * as when first invoked. Note that the hw_lock routines
514 * are responsible for maintaining preemption state.
516 * XXX No stats are gathered on a miss; I preserved this
517 * behavior from the original assembly-language code, but
518 * doesn't it make sense to log misses? XXX
524 #ifndef MACHINE_SIMPLE_LOCK
525 unsigned int success
;
529 USLDBG(usld_lock_try_pre(l
, pc
));
530 if ((success
= hw_lock_try(&l
->interlock
))) {
531 #if DEVELOPMENT || DEBUG
534 USLDBG(usld_lock_try_post(l
, pc
));
538 return(simple_lock_try((simple_lock_t
)l
));
543 * Acquire a usimple_lock while polling for pending TLB flushes
544 * and spinning on a lock.
548 usimple_lock_try_lock_loop(usimple_lock_t l
)
550 boolean_t istate
= ml_get_interrupts_enabled();
551 while (!simple_lock_try((l
))) {
553 handle_pending_TLB_flushes();
560 * States of a usimple_lock. The default when initializing
561 * a usimple_lock is setting it up for debug checking.
563 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
564 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
565 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
566 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
567 #define USLOCK_CHECKING(l) (uslock_check && \
568 ((l)->debug.state & USLOCK_CHECKED))
571 * Trace activities of a particularly interesting lock.
573 void usl_trace(usimple_lock_t
, int, pc_t
, const char *);
577 * Initialize the debugging information contained
583 __unused
unsigned short tag
)
585 if (l
== USIMPLE_LOCK_NULL
)
586 panic("lock initialization: null lock pointer");
587 l
->lock_type
= USLOCK_TAG
;
588 l
->debug
.state
= uslock_check
? USLOCK_INITIALIZED
: 0;
589 l
->debug
.lock_cpu
= l
->debug
.unlock_cpu
= 0;
590 l
->debug
.lock_pc
= l
->debug
.unlock_pc
= INVALID_PC
;
591 l
->debug
.lock_thread
= l
->debug
.unlock_thread
= INVALID_THREAD
;
592 l
->debug
.duration
[0] = l
->debug
.duration
[1] = 0;
593 l
->debug
.unlock_cpu
= l
->debug
.unlock_cpu
= 0;
594 l
->debug
.unlock_pc
= l
->debug
.unlock_pc
= INVALID_PC
;
595 l
->debug
.unlock_thread
= l
->debug
.unlock_thread
= INVALID_THREAD
;
600 * These checks apply to all usimple_locks, not just
601 * those with USLOCK_CHECKED turned on.
604 usld_lock_common_checks(
608 if (l
== USIMPLE_LOCK_NULL
)
609 panic("%s: null lock pointer", caller
);
610 if (l
->lock_type
!= USLOCK_TAG
)
611 panic("%s: %p is not a usimple lock, 0x%x", caller
, l
, l
->lock_type
);
612 if (!(l
->debug
.state
& USLOCK_INIT
))
613 panic("%s: %p is not an initialized lock, 0x%x", caller
, l
, l
->debug
.state
);
614 return USLOCK_CHECKING(l
);
619 * Debug checks on a usimple_lock just before attempting
628 char caller
[] = "usimple_lock";
631 if (!usld_lock_common_checks(l
, caller
))
635 * Note that we have a weird case where we are getting a lock when we are]
636 * in the process of putting the system to sleep. We are running with no
637 * current threads, therefore we can't tell if we are trying to retake a lock
638 * we have or someone on the other processor has it. Therefore we just
639 * ignore this test if the locking thread is 0.
642 if ((l
->debug
.state
& USLOCK_TAKEN
) && l
->debug
.lock_thread
&&
643 l
->debug
.lock_thread
== (void *) current_thread()) {
644 printf("%s: lock %p already locked (at %p) by",
645 caller
, l
, l
->debug
.lock_pc
);
646 printf(" current thread %p (new attempt at pc %p)\n",
647 l
->debug
.lock_thread
, pc
);
650 mp_disable_preemption();
651 usl_trace(l
, cpu_number(), pc
, caller
);
652 mp_enable_preemption();
657 * Debug checks on a usimple_lock just after acquiring it.
659 * Pre-emption has been disabled at this point,
660 * so we are safe in using cpu_number.
668 char caller
[] = "successful usimple_lock";
671 if (!usld_lock_common_checks(l
, caller
))
674 if (!((l
->debug
.state
& ~USLOCK_TAKEN
) == USLOCK_INITIALIZED
))
675 panic("%s: lock %p became uninitialized",
677 if ((l
->debug
.state
& USLOCK_TAKEN
))
678 panic("%s: lock 0x%p became TAKEN by someone else",
681 mycpu
= cpu_number();
682 l
->debug
.lock_thread
= (void *)current_thread();
683 l
->debug
.state
|= USLOCK_TAKEN
;
684 l
->debug
.lock_pc
= pc
;
685 l
->debug
.lock_cpu
= mycpu
;
687 usl_trace(l
, mycpu
, pc
, caller
);
692 * Debug checks on a usimple_lock just before
693 * releasing it. Note that the caller has not
694 * yet released the hardware lock.
696 * Preemption is still disabled, so there's
697 * no problem using cpu_number.
705 char caller
[] = "usimple_unlock";
708 if (!usld_lock_common_checks(l
, caller
))
711 mycpu
= cpu_number();
713 if (!(l
->debug
.state
& USLOCK_TAKEN
))
714 panic("%s: lock 0x%p hasn't been taken",
716 if (l
->debug
.lock_thread
!= (void *) current_thread())
717 panic("%s: unlocking lock 0x%p, owned by thread %p",
718 caller
, l
, l
->debug
.lock_thread
);
719 if (l
->debug
.lock_cpu
!= mycpu
) {
720 printf("%s: unlocking lock 0x%p on cpu 0x%x",
722 printf(" (acquired on cpu 0x%x)\n", l
->debug
.lock_cpu
);
725 usl_trace(l
, mycpu
, pc
, caller
);
727 l
->debug
.unlock_thread
= l
->debug
.lock_thread
;
728 l
->debug
.lock_thread
= INVALID_PC
;
729 l
->debug
.state
&= ~USLOCK_TAKEN
;
730 l
->debug
.unlock_pc
= pc
;
731 l
->debug
.unlock_cpu
= mycpu
;
736 * Debug checks on a usimple_lock just before
737 * attempting to acquire it.
739 * Preemption isn't guaranteed to be disabled.
746 char caller
[] = "usimple_lock_try";
748 if (!usld_lock_common_checks(l
, caller
))
750 mp_disable_preemption();
751 usl_trace(l
, cpu_number(), pc
, caller
);
752 mp_enable_preemption();
757 * Debug checks on a usimple_lock just after
758 * successfully attempting to acquire it.
760 * Preemption has been disabled by the
761 * lock acquisition attempt, so it's safe
770 char caller
[] = "successful usimple_lock_try";
772 if (!usld_lock_common_checks(l
, caller
))
775 if (!((l
->debug
.state
& ~USLOCK_TAKEN
) == USLOCK_INITIALIZED
))
776 panic("%s: lock 0x%p became uninitialized",
778 if ((l
->debug
.state
& USLOCK_TAKEN
))
779 panic("%s: lock 0x%p became TAKEN by someone else",
782 mycpu
= cpu_number();
783 l
->debug
.lock_thread
= (void *) current_thread();
784 l
->debug
.state
|= USLOCK_TAKEN
;
785 l
->debug
.lock_pc
= pc
;
786 l
->debug
.lock_cpu
= mycpu
;
788 usl_trace(l
, mycpu
, pc
, caller
);
793 * For very special cases, set traced_lock to point to a
794 * specific lock of interest. The result is a series of
795 * XPRs showing lock operations on that lock. The lock_seq
796 * value is used to show the order of those operations.
798 usimple_lock_t traced_lock
;
799 unsigned int lock_seq
;
806 const char * op_name
)
808 if (traced_lock
== l
) {
810 "seq %d, cpu %d, %s @ %x\n",
811 (uintptr_t) lock_seq
, (uintptr_t) mycpu
,
812 (uintptr_t) op_name
, (uintptr_t) pc
, 0);
818 #endif /* USLOCK_DEBUG */
821 * Routine: lck_rw_alloc_init
829 if ((lck
= (lck_rw_t
*)kalloc(sizeof(lck_rw_t
))) != 0) {
830 bzero(lck
, sizeof(lck_rw_t
));
831 lck_rw_init(lck
, grp
, attr
);
838 * Routine: lck_rw_free
844 lck_rw_destroy(lck
, grp
);
845 kfree(lck
, sizeof(lck_rw_t
));
849 * Routine: lck_rw_init
857 lck_attr_t
*lck_attr
= (attr
!= LCK_ATTR_NULL
) ?
858 attr
: &LockDefaultLckAttr
;
860 hw_lock_byte_init(&lck
->lck_rw_interlock
);
861 lck
->lck_rw_want_write
= FALSE
;
862 lck
->lck_rw_want_upgrade
= FALSE
;
863 lck
->lck_rw_shared_count
= 0;
864 lck
->lck_rw_can_sleep
= TRUE
;
865 lck
->lck_r_waiting
= lck
->lck_w_waiting
= 0;
867 lck
->lck_rw_priv_excl
= ((lck_attr
->lck_attr_val
&
868 LCK_ATTR_RW_SHARED_PRIORITY
) == 0);
870 lck_grp_reference(grp
);
871 lck_grp_lckcnt_incr(grp
, LCK_TYPE_RW
);
875 * Routine: lck_rw_destroy
882 if (lck
->lck_rw_tag
== LCK_RW_TAG_DESTROYED
)
885 lck_rw_assert(lck
, LCK_RW_ASSERT_NOTHELD
);
887 lck
->lck_rw_tag
= LCK_RW_TAG_DESTROYED
;
888 lck_grp_lckcnt_decr(grp
, LCK_TYPE_RW
);
889 lck_grp_deallocate(grp
);
894 * Sleep locks. These use the same data structure and algorithm
895 * as the spin locks, but the process sleeps while it is waiting
896 * for the lock. These work on uniprocessor systems.
899 #define DECREMENTER_TIMEOUT 1000000
902 * We disable interrupts while holding the RW interlock to prevent an
903 * interrupt from exacerbating hold time.
904 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
906 static inline boolean_t
907 lck_interlock_lock(lck_rw_t
*lck
)
911 istate
= ml_set_interrupts_enabled(FALSE
);
912 hw_lock_byte_lock(&lck
->lck_rw_interlock
);
917 lck_interlock_unlock(lck_rw_t
*lck
, boolean_t istate
)
919 hw_lock_byte_unlock(&lck
->lck_rw_interlock
);
920 ml_set_interrupts_enabled(istate
);
924 * This inline is used when busy-waiting for an rw lock.
925 * If interrupts were disabled when the lock primitive was called,
926 * we poll the IPI handler for pending tlb flushes.
927 * XXX This is a hack to avoid deadlocking on the pmap_system_lock.
930 lck_rw_lock_pause(boolean_t interrupts_enabled
)
932 if (!interrupts_enabled
)
933 handle_pending_TLB_flushes();
937 static inline boolean_t
938 lck_rw_held_read_or_upgrade(lck_rw_t
*lock
)
940 if (ordered_load(&lock
->data
) & (LCK_RW_SHARED_MASK
| LCK_RW_INTERLOCK
| LCK_RW_WANT_UPGRADE
))
946 * compute the deadline to spin against when
947 * waiting for a change of state on a lck_rw_t
949 static inline uint64_t
950 lck_rw_deadline_for_spin(lck_rw_t
*lck
)
952 if (lck
->lck_rw_can_sleep
) {
953 if (lck
->lck_r_waiting
|| lck
->lck_w_waiting
|| lck
->lck_rw_shared_count
> machine_info
.max_cpus
) {
955 * there are already threads waiting on this lock... this
956 * implies that they have spun beyond their deadlines waiting for
957 * the desired state to show up so we will not bother spinning at this time...
959 * the current number of threads sharing this lock exceeds our capacity to run them
960 * concurrently and since all states we're going to spin for require the rw_shared_count
961 * to be at 0, we'll not bother spinning since the latency for this to happen is
964 return (mach_absolute_time());
966 return (mach_absolute_time() + MutexSpin
);
968 return (mach_absolute_time() + (100000LL * 1000000000LL));
973 * Spin while interlock is held.
977 lck_rw_interlock_spin(lck_rw_t
*lock
)
979 while (ordered_load(&lock
->data
) & LCK_RW_INTERLOCK
) {
985 lck_rw_grab_want(lck_rw_t
*lock
)
990 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_relaxed
);
991 if ((data
& LCK_RW_INTERLOCK
) == 0)
993 atomic_exchange_abort();
994 lck_rw_interlock_spin(lock
);
996 if (data
& LCK_RW_WANT_WRITE
) {
997 atomic_exchange_abort();
1000 data
|= LCK_RW_WANT_WRITE
;
1001 return atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_relaxed
);
1005 lck_rw_grab_shared(lck_rw_t
*lock
)
1007 uint32_t data
, prev
;
1010 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1011 if ((data
& LCK_RW_INTERLOCK
) == 0)
1013 atomic_exchange_abort();
1014 lck_rw_interlock_spin(lock
);
1016 if (data
& (LCK_RW_WANT_WRITE
| LCK_RW_WANT_UPGRADE
)) {
1017 if (((data
& LCK_RW_SHARED_MASK
) == 0) || (data
& LCK_RW_PRIV_EXCL
)) {
1018 atomic_exchange_abort();
1022 data
+= LCK_RW_SHARED_READER
;
1023 return atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
);
1027 * Routine: lck_rw_lock_exclusive
1030 lck_rw_lock_exclusive_gen(
1033 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lck
);
1034 uint64_t deadline
= 0;
1038 wait_result_t res
= 0;
1039 boolean_t istate
= -1;
1042 boolean_t dtrace_ls_initialized
= FALSE
;
1043 boolean_t dtrace_rwl_excl_spin
, dtrace_rwl_excl_block
, dtrace_ls_enabled
= FALSE
;
1044 uint64_t wait_interval
= 0;
1045 int readers_at_sleep
= 0;
1049 * Try to acquire the lck_rw_want_write bit.
1051 while ( !lck_rw_grab_want(lck
)) {
1054 if (dtrace_ls_initialized
== FALSE
) {
1055 dtrace_ls_initialized
= TRUE
;
1056 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
1057 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
1058 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
1059 if (dtrace_ls_enabled
) {
1061 * Either sleeping or spinning is happening,
1062 * start a timing of our delay interval now.
1064 readers_at_sleep
= lck
->lck_rw_shared_count
;
1065 wait_interval
= mach_absolute_time();
1070 istate
= ml_get_interrupts_enabled();
1072 deadline
= lck_rw_deadline_for_spin(lck
);
1074 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1076 while (((gotlock
= lck_rw_grab_want(lck
)) == 0) && mach_absolute_time() < deadline
)
1077 lck_rw_lock_pause(istate
);
1079 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_END
, trace_lck
, 0, 0, gotlock
, 0);
1084 * if we get here, the deadline has expired w/o us
1085 * being able to grab the lock exclusively
1086 * check to see if we're allowed to do a thread_block
1088 if (lck
->lck_rw_can_sleep
) {
1090 istate
= lck_interlock_lock(lck
);
1092 if (lck
->lck_rw_want_write
) {
1094 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1096 lck
->lck_w_waiting
= TRUE
;
1098 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite
);
1099 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
), THREAD_UNINT
);
1100 lck_interlock_unlock(lck
, istate
);
1102 if (res
== THREAD_WAITING
) {
1103 res
= thread_block(THREAD_CONTINUE_NULL
);
1106 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_END
, trace_lck
, res
, slept
, 0, 0);
1108 lck
->lck_rw_want_write
= TRUE
;
1109 lck_interlock_unlock(lck
, istate
);
1115 * Wait for readers (and upgrades) to finish...
1116 * the test for these conditions must be done simultaneously with
1117 * a check of the interlock not being held since
1118 * the rw_shared_count will drop to 0 first and then want_upgrade
1119 * will be set to 1 in the shared_to_exclusive scenario... those
1120 * adjustments are done behind the interlock and represent an
1121 * atomic change in state and must be considered as such
1122 * however, once we see the read count at 0, the want_upgrade not set
1123 * and the interlock not held, we are safe to proceed
1125 while (lck_rw_held_read_or_upgrade(lck
)) {
1129 * Either sleeping or spinning is happening, start
1130 * a timing of our delay interval now. If we set it
1131 * to -1 we don't have accurate data so we cannot later
1132 * decide to record a dtrace spin or sleep event.
1134 if (dtrace_ls_initialized
== FALSE
) {
1135 dtrace_ls_initialized
= TRUE
;
1136 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
1137 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
1138 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
1139 if (dtrace_ls_enabled
) {
1141 * Either sleeping or spinning is happening,
1142 * start a timing of our delay interval now.
1144 readers_at_sleep
= lck
->lck_rw_shared_count
;
1145 wait_interval
= mach_absolute_time();
1150 istate
= ml_get_interrupts_enabled();
1152 deadline
= lck_rw_deadline_for_spin(lck
);
1154 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1156 while ((lockheld
= lck_rw_held_read_or_upgrade(lck
)) && mach_absolute_time() < deadline
)
1157 lck_rw_lock_pause(istate
);
1159 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_END
, trace_lck
, 0, 0, lockheld
, 0);
1164 * if we get here, the deadline has expired w/o us
1165 * being able to grab the lock exclusively
1166 * check to see if we're allowed to do a thread_block
1168 if (lck
->lck_rw_can_sleep
) {
1170 istate
= lck_interlock_lock(lck
);
1172 if (lck
->lck_rw_shared_count
!= 0 || lck
->lck_rw_want_upgrade
) {
1173 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1175 lck
->lck_w_waiting
= TRUE
;
1177 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite
);
1178 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
), THREAD_UNINT
);
1179 lck_interlock_unlock(lck
, istate
);
1181 if (res
== THREAD_WAITING
) {
1182 res
= thread_block(THREAD_CONTINUE_NULL
);
1185 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_END
, trace_lck
, res
, slept
, 0, 0);
1187 lck_interlock_unlock(lck
, istate
);
1189 * must own the lock now, since we checked for
1190 * readers or upgrade owner behind the interlock
1191 * no need for a call to 'lck_rw_held_read_or_upgrade'
1200 * Decide what latencies we suffered that are Dtrace events.
1201 * If we have set wait_interval, then we either spun or slept.
1202 * At least we get out from under the interlock before we record
1203 * which is the best we can do here to minimize the impact
1205 * If we have set wait_interval to -1, then dtrace was not enabled when we
1206 * started sleeping/spinning so we don't record this event.
1208 if (dtrace_ls_enabled
== TRUE
) {
1210 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN
, lck
,
1211 mach_absolute_time() - wait_interval
, 1);
1214 * For the blocking case, we also record if when we blocked
1215 * it was held for read or write, and how many readers.
1216 * Notice that above we recorded this before we dropped
1217 * the interlock so the count is accurate.
1219 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK
, lck
,
1220 mach_absolute_time() - wait_interval
, 1,
1221 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1224 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE
, lck
, 1);
1229 * Routine: lck_rw_done
1232 lck_rw_type_t
lck_rw_done(lck_rw_t
*lock
)
1234 uint32_t data
, prev
;
1237 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_release_smp
);
1238 if (data
& LCK_RW_INTERLOCK
) { /* wait for interlock to clear */
1239 atomic_exchange_abort();
1240 lck_rw_interlock_spin(lock
);
1243 if (data
& LCK_RW_SHARED_MASK
) {
1244 data
-= LCK_RW_SHARED_READER
;
1245 if ((data
& LCK_RW_SHARED_MASK
) == 0) /* if reader count has now gone to 0, check for waiters */
1247 } else { /* if reader count == 0, must be exclusive lock */
1248 if (data
& LCK_RW_WANT_UPGRADE
) {
1249 data
&= ~(LCK_RW_WANT_UPGRADE
);
1251 if (data
& LCK_RW_WANT_WRITE
)
1252 data
&= ~(LCK_RW_WANT_EXCL
);
1253 else /* lock is not 'owned', panic */
1254 panic("Releasing non-exclusive RW lock without a reader refcount!");
1257 if (prev
& LCK_RW_W_WAITING
) {
1258 data
&= ~(LCK_RW_W_WAITING
);
1259 if ((prev
& LCK_RW_PRIV_EXCL
) == 0)
1260 data
&= ~(LCK_RW_R_WAITING
);
1262 data
&= ~(LCK_RW_R_WAITING
);
1264 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_release_smp
))
1268 return lck_rw_done_gen(lock
, prev
);
1272 * Routine: lck_rw_done_gen
1274 * called from lck_rw_done()
1275 * prior_lock_state is the value in the 1st
1276 * word of the lock at the time of a successful
1277 * atomic compare and exchange with the new value...
1278 * it represents the state of the lock before we
1279 * decremented the rw_shared_count or cleared either
1280 * rw_want_upgrade or rw_want_write and
1281 * the lck_x_waiting bits... since the wrapper
1282 * routine has already changed the state atomically,
1283 * we just need to decide if we should
1284 * wake up anyone and what value to return... we do
1285 * this by examining the state of the lock before
1288 static lck_rw_type_t
1291 uint32_t prior_lock_state
)
1294 lck_rw_type_t lock_type
;
1296 uint32_t rwlock_count
;
1299 * prior_lock state is a snapshot of the 1st word of the
1300 * lock in question... we'll fake up a pointer to it
1301 * and carefully not access anything beyond whats defined
1302 * in the first word of a lck_rw_t
1304 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1306 if (fake_lck
->lck_rw_shared_count
<= 1) {
1307 if (fake_lck
->lck_w_waiting
)
1308 thread_wakeup(RW_LOCK_WRITER_EVENT(lck
));
1310 if (!(fake_lck
->lck_rw_priv_excl
&& fake_lck
->lck_w_waiting
) && fake_lck
->lck_r_waiting
)
1311 thread_wakeup(RW_LOCK_READER_EVENT(lck
));
1313 if (fake_lck
->lck_rw_shared_count
)
1314 lock_type
= LCK_RW_TYPE_SHARED
;
1316 lock_type
= LCK_RW_TYPE_EXCLUSIVE
;
1318 /* Check if dropping the lock means that we need to unpromote */
1319 thread
= current_thread();
1320 rwlock_count
= thread
->rwlock_count
--;
1322 if (rwlock_count
== 0) {
1323 panic("rw lock count underflow for thread %p", thread
);
1326 if ((rwlock_count
== 1 /* field now 0 */) && (thread
->sched_flags
& TH_SFLAG_RW_PROMOTED
)) {
1327 /* sched_flags checked without lock, but will be rechecked while clearing */
1328 lck_rw_clear_promotion(thread
);
1332 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE
, lck
, lock_type
== LCK_RW_TYPE_SHARED
? 0 : 1);
1340 * Routine: lck_rw_unlock
1345 lck_rw_type_t lck_rw_type
)
1347 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1348 lck_rw_unlock_shared(lck
);
1349 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1350 lck_rw_unlock_exclusive(lck
);
1352 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type
);
1357 * Routine: lck_rw_unlock_shared
1360 lck_rw_unlock_shared(
1365 assertf(lck
->lck_rw_shared_count
> 0, "lck %p has shared_count=0x%x", lck
, lck
->lck_rw_shared_count
);
1366 ret
= lck_rw_done(lck
);
1368 if (ret
!= LCK_RW_TYPE_SHARED
)
1369 panic("lck_rw_unlock_shared(): lock %p held in mode: %d\n", lck
, ret
);
1374 * Routine: lck_rw_unlock_exclusive
1377 lck_rw_unlock_exclusive(
1382 ret
= lck_rw_done(lck
);
1384 if (ret
!= LCK_RW_TYPE_EXCLUSIVE
)
1385 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret
);
1390 * Routine: lck_rw_lock
1395 lck_rw_type_t lck_rw_type
)
1397 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1398 lck_rw_lock_shared(lck
);
1399 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1400 lck_rw_lock_exclusive(lck
);
1402 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type
);
1406 * Routine: lck_rw_lock_shared
1409 lck_rw_lock_shared(lck_rw_t
*lock
)
1411 uint32_t data
, prev
;
1413 current_thread()->rwlock_count
++;
1415 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1416 if (data
& (LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
| LCK_RW_INTERLOCK
)) {
1417 atomic_exchange_abort();
1418 lck_rw_lock_shared_gen(lock
);
1421 data
+= LCK_RW_SHARED_READER
;
1422 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
))
1427 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE
, lock
, DTRACE_RW_SHARED
);
1428 #endif /* CONFIG_DTRACE */
1433 * Routine: lck_rw_lock_shared_gen
1435 * assembly fast path code has determined that this lock
1436 * is held exclusively... this is where we spin/block
1437 * until we can acquire the lock in the shared mode
1440 lck_rw_lock_shared_gen(
1443 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lck
);
1444 uint64_t deadline
= 0;
1447 wait_result_t res
= 0;
1448 boolean_t istate
= -1;
1451 uint64_t wait_interval
= 0;
1452 int readers_at_sleep
= 0;
1453 boolean_t dtrace_ls_initialized
= FALSE
;
1454 boolean_t dtrace_rwl_shared_spin
, dtrace_rwl_shared_block
, dtrace_ls_enabled
= FALSE
;
1457 while ( !lck_rw_grab_shared(lck
)) {
1460 if (dtrace_ls_initialized
== FALSE
) {
1461 dtrace_ls_initialized
= TRUE
;
1462 dtrace_rwl_shared_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_SPIN
] != 0);
1463 dtrace_rwl_shared_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_BLOCK
] != 0);
1464 dtrace_ls_enabled
= dtrace_rwl_shared_spin
|| dtrace_rwl_shared_block
;
1465 if (dtrace_ls_enabled
) {
1467 * Either sleeping or spinning is happening,
1468 * start a timing of our delay interval now.
1470 readers_at_sleep
= lck
->lck_rw_shared_count
;
1471 wait_interval
= mach_absolute_time();
1476 istate
= ml_get_interrupts_enabled();
1478 deadline
= lck_rw_deadline_for_spin(lck
);
1480 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_START
,
1481 trace_lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, 0, 0);
1483 while (((gotlock
= lck_rw_grab_shared(lck
)) == 0) && mach_absolute_time() < deadline
)
1484 lck_rw_lock_pause(istate
);
1486 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_END
,
1487 trace_lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, gotlock
, 0);
1492 * if we get here, the deadline has expired w/o us
1493 * being able to grab the lock for read
1494 * check to see if we're allowed to do a thread_block
1496 if (lck
->lck_rw_can_sleep
) {
1498 istate
= lck_interlock_lock(lck
);
1500 if ((lck
->lck_rw_want_write
|| lck
->lck_rw_want_upgrade
) &&
1501 ((lck
->lck_rw_shared_count
== 0) || lck
->lck_rw_priv_excl
)) {
1503 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_START
,
1504 trace_lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, 0, 0);
1506 lck
->lck_r_waiting
= TRUE
;
1508 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead
);
1509 res
= assert_wait(RW_LOCK_READER_EVENT(lck
), THREAD_UNINT
);
1510 lck_interlock_unlock(lck
, istate
);
1512 if (res
== THREAD_WAITING
) {
1513 res
= thread_block(THREAD_CONTINUE_NULL
);
1516 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_END
,
1517 trace_lck
, res
, slept
, 0, 0);
1519 lck
->lck_rw_shared_count
++;
1520 lck_interlock_unlock(lck
, istate
);
1527 if (dtrace_ls_enabled
== TRUE
) {
1529 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN
, lck
, mach_absolute_time() - wait_interval
, 0);
1531 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK
, lck
,
1532 mach_absolute_time() - wait_interval
, 0,
1533 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1536 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE
, lck
, 0);
1542 * Routine: lck_rw_lock_exclusive
1546 lck_rw_lock_exclusive(lck_rw_t
*lock
)
1548 current_thread()->rwlock_count
++;
1549 if (atomic_test_and_set32(&lock
->data
,
1550 (LCK_RW_SHARED_MASK
| LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
| LCK_RW_INTERLOCK
),
1551 LCK_RW_WANT_EXCL
, memory_order_acquire_smp
, FALSE
)) {
1553 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE
, lock
, DTRACE_RW_EXCL
);
1554 #endif /* CONFIG_DTRACE */
1556 lck_rw_lock_exclusive_gen(lock
);
1561 * Routine: lck_rw_lock_shared_to_exclusive
1565 lck_rw_lock_shared_to_exclusive(lck_rw_t
*lock
)
1567 uint32_t data
, prev
;
1570 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1571 if (data
& LCK_RW_INTERLOCK
) {
1572 atomic_exchange_abort();
1573 lck_rw_interlock_spin(lock
);
1576 if (data
& LCK_RW_WANT_UPGRADE
) {
1577 data
-= LCK_RW_SHARED_READER
;
1578 if ((data
& LCK_RW_SHARED_MASK
) == 0) /* we were the last reader */
1579 data
&= ~(LCK_RW_W_WAITING
); /* so clear the wait indicator */
1580 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
))
1581 return lck_rw_lock_shared_to_exclusive_failure(lock
, prev
);
1583 data
|= LCK_RW_WANT_UPGRADE
; /* ask for WANT_UPGRADE */
1584 data
-= LCK_RW_SHARED_READER
; /* and shed our read count */
1585 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
))
1590 /* we now own the WANT_UPGRADE */
1591 if (data
& LCK_RW_SHARED_MASK
) /* check to see if all of the readers are drained */
1592 lck_rw_lock_shared_to_exclusive_success(lock
); /* if not, we need to go wait */
1594 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
, lock
, 0);
1601 * Routine: lck_rw_lock_shared_to_exclusive_failure
1603 * assembly fast path code has already dropped our read
1604 * count and determined that someone else owns 'lck_rw_want_upgrade'
1605 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1606 * all we need to do here is determine if a wakeup is needed
1609 lck_rw_lock_shared_to_exclusive_failure(
1611 uint32_t prior_lock_state
)
1614 thread_t thread
= current_thread();
1615 uint32_t rwlock_count
;
1617 /* Check if dropping the lock means that we need to unpromote */
1618 rwlock_count
= thread
->rwlock_count
--;
1620 if (rwlock_count
== 0) {
1621 panic("rw lock count underflow for thread %p", thread
);
1624 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1626 if (fake_lck
->lck_w_waiting
&& fake_lck
->lck_rw_shared_count
== 1) {
1628 * Someone else has requested upgrade.
1629 * Since we've released the read lock, wake
1630 * him up if he's blocked waiting
1632 thread_wakeup(RW_LOCK_WRITER_EVENT(lck
));
1635 if ((rwlock_count
== 1 /* field now 0 */) && (thread
->sched_flags
& TH_SFLAG_RW_PROMOTED
)) {
1636 /* sched_flags checked without lock, but will be rechecked while clearing */
1637 lck_rw_clear_promotion(thread
);
1640 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_CODE
) | DBG_FUNC_NONE
,
1641 VM_KERNEL_UNSLIDE_OR_PERM(lck
), lck
->lck_rw_shared_count
, lck
->lck_rw_want_upgrade
, 0, 0);
1648 * Routine: lck_rw_lock_shared_to_exclusive_failure
1650 * assembly fast path code has already dropped our read
1651 * count and successfully acquired 'lck_rw_want_upgrade'
1652 * we just need to wait for the rest of the readers to drain
1653 * and then we can return as the exclusive holder of this lock
1656 lck_rw_lock_shared_to_exclusive_success(
1659 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lck
);
1660 uint64_t deadline
= 0;
1662 int still_shared
= 0;
1664 boolean_t istate
= -1;
1667 uint64_t wait_interval
= 0;
1668 int readers_at_sleep
= 0;
1669 boolean_t dtrace_ls_initialized
= FALSE
;
1670 boolean_t dtrace_rwl_shared_to_excl_spin
, dtrace_rwl_shared_to_excl_block
, dtrace_ls_enabled
= FALSE
;
1673 while (lck
->lck_rw_shared_count
!= 0) {
1676 if (dtrace_ls_initialized
== FALSE
) {
1677 dtrace_ls_initialized
= TRUE
;
1678 dtrace_rwl_shared_to_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
] != 0);
1679 dtrace_rwl_shared_to_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
] != 0);
1680 dtrace_ls_enabled
= dtrace_rwl_shared_to_excl_spin
|| dtrace_rwl_shared_to_excl_block
;
1681 if (dtrace_ls_enabled
) {
1683 * Either sleeping or spinning is happening,
1684 * start a timing of our delay interval now.
1686 readers_at_sleep
= lck
->lck_rw_shared_count
;
1687 wait_interval
= mach_absolute_time();
1692 istate
= ml_get_interrupts_enabled();
1694 deadline
= lck_rw_deadline_for_spin(lck
);
1696 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_START
,
1697 trace_lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1699 while ((still_shared
= lck
->lck_rw_shared_count
) && mach_absolute_time() < deadline
)
1700 lck_rw_lock_pause(istate
);
1702 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_END
,
1703 trace_lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1708 * if we get here, the deadline has expired w/o
1709 * the rw_shared_count having drained to 0
1710 * check to see if we're allowed to do a thread_block
1712 if (lck
->lck_rw_can_sleep
) {
1714 istate
= lck_interlock_lock(lck
);
1716 if (lck
->lck_rw_shared_count
!= 0) {
1717 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_START
,
1718 trace_lck
, lck
->lck_rw_shared_count
, 0, 0, 0);
1720 lck
->lck_w_waiting
= TRUE
;
1722 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade
);
1723 res
= assert_wait(RW_LOCK_WRITER_EVENT(lck
), THREAD_UNINT
);
1724 lck_interlock_unlock(lck
, istate
);
1726 if (res
== THREAD_WAITING
) {
1727 res
= thread_block(THREAD_CONTINUE_NULL
);
1730 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_END
,
1731 trace_lck
, res
, slept
, 0, 0);
1733 lck_interlock_unlock(lck
, istate
);
1740 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1742 if (dtrace_ls_enabled
== TRUE
) {
1744 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
, lck
, mach_absolute_time() - wait_interval
, 0);
1746 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
, lck
,
1747 mach_absolute_time() - wait_interval
, 1,
1748 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1751 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
, lck
, 1);
1757 * Routine: lck_rw_lock_exclusive_to_shared
1760 void lck_rw_lock_exclusive_to_shared(lck_rw_t
*lock
)
1762 uint32_t data
, prev
;
1765 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_release_smp
);
1766 if (data
& LCK_RW_INTERLOCK
) {
1767 atomic_exchange_abort();
1768 lck_rw_interlock_spin(lock
); /* wait for interlock to clear */
1771 data
+= LCK_RW_SHARED_READER
;
1772 if (data
& LCK_RW_WANT_UPGRADE
)
1773 data
&= ~(LCK_RW_WANT_UPGRADE
);
1775 data
&= ~(LCK_RW_WANT_EXCL
);
1776 if (!((prev
& LCK_RW_W_WAITING
) && (prev
& LCK_RW_PRIV_EXCL
)))
1777 data
&= ~(LCK_RW_W_WAITING
);
1778 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_release_smp
))
1782 return lck_rw_lock_exclusive_to_shared_gen(lock
, prev
);
1787 * Routine: lck_rw_lock_exclusive_to_shared_gen
1789 * assembly fast path has already dropped
1790 * our exclusive state and bumped lck_rw_shared_count
1791 * all we need to do here is determine if anyone
1792 * needs to be awakened.
1795 lck_rw_lock_exclusive_to_shared_gen(
1797 uint32_t prior_lock_state
)
1799 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lck
);
1802 fake_lck
= (lck_rw_t
*)&prior_lock_state
;
1804 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_START
,
1805 trace_lck
, fake_lck
->lck_rw_want_write
, fake_lck
->lck_rw_want_upgrade
, 0, 0);
1808 * don't wake up anyone waiting to take the lock exclusively
1809 * since we hold a read count... when the read count drops to 0,
1810 * the writers will be woken.
1812 * wake up any waiting readers if we don't have any writers waiting,
1813 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1815 if (!(fake_lck
->lck_rw_priv_excl
&& fake_lck
->lck_w_waiting
) && fake_lck
->lck_r_waiting
)
1816 thread_wakeup(RW_LOCK_READER_EVENT(lck
));
1818 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_END
,
1819 trace_lck
, lck
->lck_rw_want_write
, lck
->lck_rw_want_upgrade
, lck
->lck_rw_shared_count
, 0);
1822 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE
, lck
, 0);
1828 * Routine: lck_rw_try_lock
1833 lck_rw_type_t lck_rw_type
)
1835 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1836 return(lck_rw_try_lock_shared(lck
));
1837 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1838 return(lck_rw_try_lock_exclusive(lck
));
1840 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type
);
1845 * Routine: lck_rw_try_lock_shared
1848 boolean_t
lck_rw_try_lock_shared(lck_rw_t
*lock
)
1850 uint32_t data
, prev
;
1853 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1854 if (data
& LCK_RW_INTERLOCK
) {
1855 atomic_exchange_abort();
1856 lck_rw_interlock_spin(lock
);
1859 if (data
& (LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
)) {
1860 atomic_exchange_abort();
1861 return FALSE
; /* lock is busy */
1863 data
+= LCK_RW_SHARED_READER
; /* Increment reader refcount */
1864 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
))
1868 current_thread()->rwlock_count
++;
1869 /* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
1871 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE
, lock
, DTRACE_RW_SHARED
);
1872 #endif /* CONFIG_DTRACE */
1878 * Routine: lck_rw_try_lock_exclusive
1881 boolean_t
lck_rw_try_lock_exclusive(lck_rw_t
*lock
)
1883 uint32_t data
, prev
;
1886 data
= atomic_exchange_begin32(&lock
->data
, &prev
, memory_order_acquire_smp
);
1887 if (data
& LCK_RW_INTERLOCK
) {
1888 atomic_exchange_abort();
1889 lck_rw_interlock_spin(lock
);
1892 if (data
& (LCK_RW_SHARED_MASK
| LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
)) {
1893 atomic_exchange_abort();
1894 return FALSE
; /* can't get it */
1896 data
|= LCK_RW_WANT_EXCL
;
1897 if (atomic_exchange_complete32(&lock
->data
, prev
, data
, memory_order_acquire_smp
))
1902 current_thread()->rwlock_count
++;
1904 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE
, lock
, DTRACE_RW_EXCL
);
1905 #endif /* CONFIG_DTRACE */
1916 case LCK_RW_ASSERT_SHARED
:
1917 if (lck
->lck_rw_shared_count
!= 0) {
1921 case LCK_RW_ASSERT_EXCLUSIVE
:
1922 if ((lck
->lck_rw_want_write
||
1923 lck
->lck_rw_want_upgrade
) &&
1924 lck
->lck_rw_shared_count
== 0) {
1928 case LCK_RW_ASSERT_HELD
:
1929 if (lck
->lck_rw_want_write
||
1930 lck
->lck_rw_want_upgrade
||
1931 lck
->lck_rw_shared_count
!= 0) {
1935 case LCK_RW_ASSERT_NOTHELD
:
1936 if (!(lck
->lck_rw_want_write
||
1937 lck
->lck_rw_want_upgrade
||
1938 lck
->lck_rw_shared_count
!= 0)) {
1946 panic("rw lock (%p)%s held (mode=%u), first word %08x\n", lck
, (type
== LCK_RW_ASSERT_NOTHELD
? "" : " not"), type
, *(uint32_t *)lck
);
1949 /* On return to userspace, this routine is called if the rwlock_count is somehow imbalanced */
1951 lck_rw_clear_promotions_x86(thread_t thread
)
1954 /* It's fatal to leave a RW lock locked and return to userspace */
1955 panic("%u rw lock(s) held on return to userspace for thread %p", thread
->rwlock_count
, thread
);
1957 /* Paper over the issue */
1958 thread
->rwlock_count
= 0;
1959 lck_rw_clear_promotion(thread
);
1964 lck_rw_lock_yield_shared(lck_rw_t
*lck
, boolean_t force_yield
)
1966 lck_rw_assert(lck
, LCK_RW_ASSERT_SHARED
);
1968 if (lck
->lck_rw_want_write
|| lck
->lck_rw_want_upgrade
|| force_yield
) {
1969 lck_rw_unlock_shared(lck
);
1971 lck_rw_lock_shared(lck
);
1979 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
1980 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
1983 kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t
*lck
) {
1985 panic("panic: rw lock exclusive check done outside of kernel debugger");
1987 return ((lck
->lck_rw_want_upgrade
|| lck
->lck_rw_want_write
) && (lck
->lck_rw_shared_count
== 0)) ? TRUE
: FALSE
;
1992 extern zone_t lck_mtx_zone
;
1995 * Routine: lck_mtx_alloc_init
2004 if ((lck
= (lck_mtx_t
*)zalloc(lck_mtx_zone
)) != 0)
2005 lck_mtx_init(lck
, grp
, attr
);
2007 if ((lck
= (lck_mtx_t
*)kalloc(sizeof(lck_mtx_t
))) != 0)
2008 lck_mtx_init(lck
, grp
, attr
);
2014 * Routine: lck_mtx_free
2021 lck_mtx_destroy(lck
, grp
);
2023 zfree(lck_mtx_zone
, lck
);
2025 kfree(lck
, sizeof(lck_mtx_t
));
2030 * Routine: lck_mtx_ext_init
2038 bzero((void *)lck
, sizeof(lck_mtx_ext_t
));
2040 if ((attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2041 lck
->lck_mtx_deb
.type
= MUTEX_TAG
;
2042 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_DEBUG
;
2045 lck
->lck_mtx_grp
= grp
;
2047 if (grp
->lck_grp_attr
& LCK_GRP_ATTR_STAT
)
2048 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_STAT
;
2050 lck
->lck_mtx
.lck_mtx_is_ext
= 1;
2051 lck
->lck_mtx
.lck_mtx_pad32
= 0xFFFFFFFF;
2055 * Routine: lck_mtx_init
2063 lck_mtx_ext_t
*lck_ext
;
2064 lck_attr_t
*lck_attr
;
2066 if (attr
!= LCK_ATTR_NULL
)
2069 lck_attr
= &LockDefaultLckAttr
;
2071 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2072 if ((lck_ext
= (lck_mtx_ext_t
*)kalloc(sizeof(lck_mtx_ext_t
))) != 0) {
2073 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
2074 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
2075 lck
->lck_mtx_ptr
= lck_ext
;
2078 lck
->lck_mtx_owner
= 0;
2079 lck
->lck_mtx_state
= 0;
2081 lck
->lck_mtx_pad32
= 0xFFFFFFFF;
2082 lck_grp_reference(grp
);
2083 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
2087 * Routine: lck_mtx_init_ext
2092 lck_mtx_ext_t
*lck_ext
,
2096 lck_attr_t
*lck_attr
;
2098 if (attr
!= LCK_ATTR_NULL
)
2101 lck_attr
= &LockDefaultLckAttr
;
2103 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2104 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
2105 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
2106 lck
->lck_mtx_ptr
= lck_ext
;
2108 lck
->lck_mtx_owner
= 0;
2109 lck
->lck_mtx_state
= 0;
2111 lck
->lck_mtx_pad32
= 0xFFFFFFFF;
2113 lck_grp_reference(grp
);
2114 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
2118 * Routine: lck_mtx_destroy
2125 boolean_t lck_is_indirect
;
2127 if (lck
->lck_mtx_tag
== LCK_MTX_TAG_DESTROYED
)
2130 lck_mtx_assert(lck
, LCK_MTX_ASSERT_NOTOWNED
);
2132 lck_is_indirect
= (lck
->lck_mtx_tag
== LCK_MTX_TAG_INDIRECT
);
2134 lck_mtx_lock_mark_destroyed(lck
);
2136 if (lck_is_indirect
)
2137 kfree(lck
->lck_mtx_ptr
, sizeof(lck_mtx_ext_t
));
2138 lck_grp_lckcnt_decr(grp
, LCK_TYPE_MTX
);
2139 lck_grp_deallocate(grp
);
2144 #define LCK_MTX_LCK_WAIT_CODE 0x20
2145 #define LCK_MTX_LCK_WAKEUP_CODE 0x21
2146 #define LCK_MTX_LCK_SPIN_CODE 0x22
2147 #define LCK_MTX_LCK_ACQUIRE_CODE 0x23
2148 #define LCK_MTX_LCK_DEMOTE_CODE 0x24
2152 * Routine: lck_mtx_unlock_wakeup_x86
2154 * Invoked on unlock when there is
2155 * contention (i.e. the assembly routine sees that
2156 * that mutex->lck_mtx_waiters != 0 or
2157 * that mutex->lck_mtx_promoted != 0...
2159 * neither the mutex or interlock is held
2162 lck_mtx_unlock_wakeup_x86 (
2164 int prior_lock_state
)
2166 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(mutex
);
2170 * prior_lock state is a snapshot of the 2nd word of the
2171 * lock in question... we'll fake up a lock with the bits
2172 * copied into place and carefully not access anything
2173 * beyond whats defined in the second word of a lck_mtx_t
2175 fake_lck
.lck_mtx_state
= prior_lock_state
;
2177 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAKEUP_CODE
) | DBG_FUNC_START
,
2178 trace_lck
, fake_lck
.lck_mtx_promoted
, fake_lck
.lck_mtx_waiters
, fake_lck
.lck_mtx_pri
, 0);
2180 if (__probable(fake_lck
.lck_mtx_waiters
)) {
2181 if (fake_lck
.lck_mtx_waiters
> 1)
2182 thread_wakeup_one_with_pri(LCK_MTX_EVENT(mutex
), fake_lck
.lck_mtx_pri
);
2184 thread_wakeup_one(LCK_MTX_EVENT(mutex
));
2187 if (__improbable(fake_lck
.lck_mtx_promoted
)) {
2188 thread_t thread
= current_thread();
2191 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_DEMOTE_CODE
) | DBG_FUNC_NONE
,
2192 thread_tid(thread
), thread
->promotions
, thread
->sched_flags
& TH_SFLAG_PROMOTED
, 0, 0);
2194 if (thread
->promotions
> 0) {
2195 spl_t s
= splsched();
2197 thread_lock(thread
);
2199 if (--thread
->promotions
== 0 && (thread
->sched_flags
& TH_SFLAG_PROMOTED
)) {
2201 thread
->sched_flags
&= ~TH_SFLAG_PROMOTED
;
2203 if (thread
->sched_flags
& TH_SFLAG_RW_PROMOTED
) {
2204 /* Thread still has a RW lock promotion */
2205 } else if (thread
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
) {
2206 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_DEMOTE
) | DBG_FUNC_NONE
,
2207 thread
->sched_pri
, DEPRESSPRI
, 0, trace_lck
, 0);
2209 set_sched_pri(thread
, DEPRESSPRI
);
2212 if (thread
->base_pri
< thread
->sched_pri
) {
2213 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_DEMOTE
) | DBG_FUNC_NONE
,
2214 thread
->sched_pri
, thread
->base_pri
, 0, trace_lck
, 0);
2216 thread_recompute_sched_pri(thread
, FALSE
);
2220 thread_unlock(thread
);
2224 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAKEUP_CODE
) | DBG_FUNC_END
,
2225 trace_lck
, 0, mutex
->lck_mtx_waiters
, 0, 0);
2230 * Routine: lck_mtx_lock_acquire_x86
2232 * Invoked on acquiring the mutex when there is
2233 * contention (i.e. the assembly routine sees that
2234 * that mutex->lck_mtx_waiters != 0 or
2235 * thread->was_promoted_on_wakeup != 0)...
2237 * mutex is owned... interlock is held... preemption is disabled
2240 lck_mtx_lock_acquire_x86(
2243 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(mutex
);
2248 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_ACQUIRE_CODE
) | DBG_FUNC_START
,
2249 trace_lck
, thread
->was_promoted_on_wakeup
, mutex
->lck_mtx_waiters
, mutex
->lck_mtx_pri
, 0);
2251 if (mutex
->lck_mtx_waiters
)
2252 priority
= mutex
->lck_mtx_pri
;
2256 thread
= (thread_t
)mutex
->lck_mtx_owner
; /* faster then current_thread() */
2258 if (thread
->sched_pri
< priority
|| thread
->was_promoted_on_wakeup
) {
2260 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_PROMOTE
) | DBG_FUNC_NONE
,
2261 thread
->sched_pri
, priority
, thread
->was_promoted_on_wakeup
, trace_lck
, 0);
2264 thread_lock(thread
);
2266 if (thread
->sched_pri
< priority
) {
2267 /* Do not promote past promotion ceiling */
2268 assert(priority
<= MAXPRI_PROMOTE
);
2269 set_sched_pri(thread
, priority
);
2271 if (mutex
->lck_mtx_promoted
== 0) {
2272 mutex
->lck_mtx_promoted
= 1;
2274 thread
->promotions
++;
2275 thread
->sched_flags
|= TH_SFLAG_PROMOTED
;
2277 thread
->was_promoted_on_wakeup
= 0;
2279 thread_unlock(thread
);
2282 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_ACQUIRE_CODE
) | DBG_FUNC_END
,
2283 trace_lck
, 0, mutex
->lck_mtx_waiters
, 0, 0);
2288 lck_mtx_interlock_try_lock(lck_mtx_t
*mutex
, boolean_t
*istate
)
2292 *istate
= ml_set_interrupts_enabled(FALSE
);
2293 retval
= lck_mtx_ilk_try_lock(mutex
);
2296 ml_set_interrupts_enabled(*istate
);
2302 lck_mtx_interlock_unlock(lck_mtx_t
*mutex
, boolean_t istate
)
2304 lck_mtx_ilk_unlock(mutex
);
2305 ml_set_interrupts_enabled(istate
);
2310 * Routine: lck_mtx_lock_spinwait_x86
2312 * Invoked trying to acquire a mutex when there is contention but
2313 * the holder is running on another processor. We spin for up to a maximum
2314 * time waiting for the lock to be released.
2316 * Called with the interlock unlocked.
2317 * returns 0 if mutex acquired
2318 * returns 1 if we spun
2319 * returns 2 if we didn't spin due to the holder not running
2322 lck_mtx_lock_spinwait_x86(
2325 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(mutex
);
2327 uint64_t overall_deadline
;
2328 uint64_t check_owner_deadline
;
2333 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_SPIN_CODE
) | DBG_FUNC_START
,
2334 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(mutex
->lck_mtx_owner
), mutex
->lck_mtx_waiters
, 0, 0);
2336 cur_time
= mach_absolute_time();
2337 overall_deadline
= cur_time
+ MutexSpin
;
2338 check_owner_deadline
= cur_time
;
2342 * - mutex is locked, and
2343 * - its locked as a spin lock, and
2344 * - owner is running on another processor, and
2345 * - owner (processor) is not idling, and
2346 * - we haven't spun for long enough.
2349 if (__probable(lck_mtx_lock_grab_mutex(mutex
))) {
2353 cur_time
= mach_absolute_time();
2355 if (cur_time
>= overall_deadline
)
2358 if (cur_time
>= check_owner_deadline
&& mutex
->lck_mtx_owner
) {
2361 if (lck_mtx_interlock_try_lock(mutex
, &istate
)) {
2363 if ((holder
= (thread_t
) mutex
->lck_mtx_owner
) != NULL
) {
2365 if ( !(holder
->machine
.specFlags
& OnProc
) ||
2366 (holder
->state
& TH_IDLE
)) {
2368 lck_mtx_interlock_unlock(mutex
, istate
);
2375 lck_mtx_interlock_unlock(mutex
, istate
);
2377 check_owner_deadline
= cur_time
+ (MutexSpin
/ 4);
2388 * We've already kept a count via overall_deadline of how long we spun.
2389 * If dtrace is active, then we compute backwards to decide how
2392 * Note that we record a different probe id depending on whether
2393 * this is a direct or indirect mutex. This allows us to
2394 * penalize only lock groups that have debug/stats enabled
2395 * with dtrace processing if desired.
2397 if (__probable(mutex
->lck_mtx_is_ext
== 0)) {
2398 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN
, mutex
,
2399 mach_absolute_time() - (overall_deadline
- MutexSpin
));
2401 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN
, mutex
,
2402 mach_absolute_time() - (overall_deadline
- MutexSpin
));
2404 /* The lockstat acquire event is recorded by the assembly code beneath us. */
2407 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_SPIN_CODE
) | DBG_FUNC_END
,
2408 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(mutex
->lck_mtx_owner
), mutex
->lck_mtx_waiters
, retval
, 0);
2416 * Routine: lck_mtx_lock_wait_x86
2418 * Invoked in order to wait on contention.
2420 * Called with the interlock locked and
2421 * preemption disabled...
2422 * returns it unlocked and with preemption enabled
2425 lck_mtx_lock_wait_x86 (
2428 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(mutex
);
2429 thread_t self
= current_thread();
2434 uint64_t sleep_start
= 0;
2436 if (lockstat_probemap
[LS_LCK_MTX_LOCK_BLOCK
] || lockstat_probemap
[LS_LCK_MTX_EXT_LOCK_BLOCK
]) {
2437 sleep_start
= mach_absolute_time();
2440 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAIT_CODE
) | DBG_FUNC_START
,
2441 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(mutex
->lck_mtx_owner
), mutex
->lck_mtx_waiters
, mutex
->lck_mtx_pri
, 0);
2443 priority
= self
->sched_pri
;
2445 if (priority
< self
->base_pri
)
2446 priority
= self
->base_pri
;
2447 if (priority
< BASEPRI_DEFAULT
)
2448 priority
= BASEPRI_DEFAULT
;
2450 /* Do not promote past promotion ceiling */
2451 priority
= MIN(priority
, MAXPRI_PROMOTE
);
2453 if (mutex
->lck_mtx_waiters
== 0 || priority
> mutex
->lck_mtx_pri
)
2454 mutex
->lck_mtx_pri
= priority
;
2455 mutex
->lck_mtx_waiters
++;
2457 if ( (holder
= (thread_t
)mutex
->lck_mtx_owner
) &&
2458 holder
->sched_pri
< mutex
->lck_mtx_pri
) {
2460 thread_lock(holder
);
2462 /* holder priority may have been bumped by another thread
2463 * before thread_lock was taken
2465 if (holder
->sched_pri
< mutex
->lck_mtx_pri
) {
2466 KERNEL_DEBUG_CONSTANT(
2467 MACHDBG_CODE(DBG_MACH_SCHED
, MACH_PROMOTE
) | DBG_FUNC_NONE
,
2468 holder
->sched_pri
, priority
, thread_tid(holder
), trace_lck
, 0);
2469 /* Assert that we're not altering the priority of a
2470 * thread above the MAXPRI_PROMOTE band
2472 assert(holder
->sched_pri
< MAXPRI_PROMOTE
);
2473 set_sched_pri(holder
, priority
);
2475 if (mutex
->lck_mtx_promoted
== 0) {
2476 holder
->promotions
++;
2477 holder
->sched_flags
|= TH_SFLAG_PROMOTED
;
2479 mutex
->lck_mtx_promoted
= 1;
2482 thread_unlock(holder
);
2485 thread_set_pending_block_hint(self
, kThreadWaitKernelMutex
);
2486 assert_wait(LCK_MTX_EVENT(mutex
), THREAD_UNINT
);
2488 lck_mtx_ilk_unlock(mutex
);
2490 thread_block(THREAD_CONTINUE_NULL
);
2492 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_MTX_LCK_WAIT_CODE
) | DBG_FUNC_END
,
2493 trace_lck
, VM_KERNEL_UNSLIDE_OR_PERM(mutex
->lck_mtx_owner
), mutex
->lck_mtx_waiters
, mutex
->lck_mtx_pri
, 0);
2497 * Record the Dtrace lockstat probe for blocking, block time
2498 * measured from when we were entered.
2501 if (mutex
->lck_mtx_is_ext
== 0) {
2502 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK
, mutex
,
2503 mach_absolute_time() - sleep_start
);
2505 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK
, mutex
,
2506 mach_absolute_time() - sleep_start
);
2513 * Routine: kdp_lck_mtx_lock_spin_is_acquired
2514 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2515 * Returns: TRUE if lock is acquired.
2518 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t
*lck
)
2521 panic("panic: kdp_lck_mtx_lock_spin_is_acquired called outside of kernel debugger");
2524 if (lck
->lck_mtx_ilocked
|| lck
->lck_mtx_mlocked
) {
2532 kdp_lck_mtx_find_owner(__unused
struct waitq
* waitq
, event64_t event
, thread_waitinfo_t
* waitinfo
)
2534 lck_mtx_t
* mutex
= LCK_EVENT_TO_MUTEX(event
);
2535 waitinfo
->context
= VM_KERNEL_UNSLIDE_OR_PERM(mutex
);
2536 thread_t holder
= (thread_t
)mutex
->lck_mtx_owner
;
2537 waitinfo
->owner
= thread_tid(holder
);
2541 kdp_rwlck_find_owner(__unused
struct waitq
* waitq
, event64_t event
, thread_waitinfo_t
* waitinfo
)
2543 lck_rw_t
*rwlck
= NULL
;
2544 switch(waitinfo
->wait_type
) {
2545 case kThreadWaitKernelRWLockRead
:
2546 rwlck
= READ_EVENT_TO_RWLOCK(event
);
2548 case kThreadWaitKernelRWLockWrite
:
2549 case kThreadWaitKernelRWLockUpgrade
:
2550 rwlck
= WRITE_EVENT_TO_RWLOCK(event
);
2553 panic("%s was called with an invalid blocking type", __FUNCTION__
);
2556 waitinfo
->context
= VM_KERNEL_UNSLIDE_OR_PERM(rwlck
);
2557 waitinfo
->owner
= 0;