2 * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
33 * Mellon University All Rights Reserved.
35 * Permission to use, copy, modify and distribute this software and its
36 * documentation is hereby granted, provided that both the copyright notice
37 * and this permission notice appear in all copies of the software,
38 * derivative works or modified versions, and any portions thereof, and that
39 * both notices appear in supporting documentation.
41 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
42 * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
43 * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 * Carnegie Mellon requests users of this software to return to
47 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
48 * School of Computer Science Carnegie Mellon University Pittsburgh PA
51 * any improvements or extensions that they make and grant Carnegie Mellon the
52 * rights to redistribute these changes.
56 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * Locking primitives implementation
62 #define ATOMIC_PRIVATE 1
63 #define LOCK_PRIVATE 1
65 #include <mach_ldebug.h>
67 #include <kern/kalloc.h>
68 #include <kern/locks.h>
69 #include <kern/misc_protos.h>
70 #include <kern/thread.h>
71 #include <kern/processor.h>
72 #include <kern/sched_prim.h>
74 #include <kern/debug.h>
75 #include <kern/kcdata.h>
78 #include <arm/cpu_data_internal.h>
79 #include <arm/proc_reg.h>
81 #include <machine/atomic.h>
82 #include <machine/machine_cpu.h>
84 #include <sys/kdebug.h>
87 * We need only enough declarations from the BSD-side to be able to
88 * test if our probe is active, and to call __dtrace_probe(). Setting
89 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
92 #define NEED_DTRACE_DEFS
93 #include <../bsd/sys/lockstat.h>
95 #define DTRACE_RW_SHARED 0x0 //reader
96 #define DTRACE_RW_EXCL 0x1 //writer
97 #define DTRACE_NO_FLAG 0x0 //not applicable
99 #endif /* CONFIG_DTRACE */
101 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
102 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
103 #define LCK_RW_LCK_SHARED_CODE 0x102
104 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
105 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
106 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
109 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
111 // Panic in tests that check lock usage correctness
112 // These are undesirable when in a panic or a debugger is runnning.
113 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
115 unsigned int LcksOpts
= 0;
117 #if CONFIG_DTRACE && __SMP__
118 extern uint64_t dtrace_spin_threshold
;
126 * Perform simple lock checks.
128 int uslock_check
= 1;
129 int max_lock_loops
= 100000000;
130 decl_simple_lock_data(extern, printf_lock
)
131 decl_simple_lock_data(extern, panic_lock
)
132 #endif /* USLOCK_DEBUG */
134 extern unsigned int not_in_kdp
;
137 * We often want to know the addresses of the callers
138 * of the various lock routines. However, this information
139 * is only used for debugging and statistics.
142 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
143 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
147 * Eliminate lint complaints about unused local pc variables.
149 #define OBTAIN_PC(pc,l) ++pc
151 #define OBTAIN_PC(pc,l)
156 * Portable lock package implementation of usimple_locks.
160 #define USLDBG(stmt) stmt
161 void usld_lock_init(usimple_lock_t
, unsigned short);
162 void usld_lock_pre(usimple_lock_t
, pc_t
);
163 void usld_lock_post(usimple_lock_t
, pc_t
);
164 void usld_unlock(usimple_lock_t
, pc_t
);
165 void usld_lock_try_pre(usimple_lock_t
, pc_t
);
166 void usld_lock_try_post(usimple_lock_t
, pc_t
);
167 int usld_lock_common_checks(usimple_lock_t
, const char *);
168 #else /* USLOCK_DEBUG */
170 #endif /* USLOCK_DEBUG */
173 * Owner thread pointer when lock held in spin mode
175 #define LCK_MTX_SPIN_TAG 0xfffffff0
178 #define interlock_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
179 #define interlock_try(lock) hw_lock_bit_try((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
180 #define interlock_unlock(lock) hw_unlock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
181 #define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
182 #define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
184 #define memory_barrier() __c11_atomic_thread_fence(memory_order_acq_rel_smp)
185 #define load_memory_barrier() __c11_atomic_thread_fence(memory_order_acquire_smp)
186 #define store_memory_barrier() __c11_atomic_thread_fence(memory_order_release_smp)
188 // Enforce program order of loads and stores.
189 #define ordered_load(target, type) \
190 __c11_atomic_load((_Atomic type *)(target), memory_order_relaxed)
191 #define ordered_store(target, type, value) \
192 __c11_atomic_store((_Atomic type *)(target), value, memory_order_relaxed)
194 #define ordered_load_mtx(lock) ordered_load(&(lock)->lck_mtx_data, uintptr_t)
195 #define ordered_store_mtx(lock, value) ordered_store(&(lock)->lck_mtx_data, uintptr_t, (value))
196 #define ordered_load_rw(lock) ordered_load(&(lock)->lck_rw_data, uint32_t)
197 #define ordered_store_rw(lock, value) ordered_store(&(lock)->lck_rw_data, uint32_t, (value))
198 #define ordered_load_rw_owner(lock) ordered_load(&(lock)->lck_rw_owner, thread_t)
199 #define ordered_store_rw_owner(lock, value) ordered_store(&(lock)->lck_rw_owner, thread_t, (value))
200 #define ordered_load_hw(lock) ordered_load(&(lock)->lock_data, uintptr_t)
201 #define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, uintptr_t, (value))
202 #define ordered_load_bit(lock) ordered_load((lock), uint32_t)
203 #define ordered_store_bit(lock, value) ordered_store((lock), uint32_t, (value))
206 // Prevent the compiler from reordering memory operations around this
207 #define compiler_memory_fence() __asm__ volatile ("" ::: "memory")
209 #define LOCK_PANIC_TIMEOUT 0xc00000
210 #define NOINLINE __attribute__((noinline))
214 #define interrupts_disabled(mask) (mask & PSR_INTMASK)
216 #define interrupts_disabled(mask) (mask & DAIF_IRQF)
221 #define enable_fiq() __asm__ volatile ("cpsie f" ::: "memory");
222 #define enable_interrupts() __asm__ volatile ("cpsie if" ::: "memory");
226 * Forward declarations
229 static void lck_rw_lock_shared_gen(lck_rw_t
*lck
);
230 static void lck_rw_lock_exclusive_gen(lck_rw_t
*lck
);
231 static boolean_t
lck_rw_lock_shared_to_exclusive_success(lck_rw_t
*lck
);
232 static boolean_t
lck_rw_lock_shared_to_exclusive_failure(lck_rw_t
*lck
, uint32_t prior_lock_state
);
233 static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t
*lck
, uint32_t prior_lock_state
);
234 static lck_rw_type_t
lck_rw_done_gen(lck_rw_t
*lck
, uint32_t prior_lock_state
);
235 void lck_rw_clear_promotions_x86(thread_t thread
);
236 static boolean_t
lck_rw_grab(lck_rw_t
*lock
, int mode
, boolean_t wait
);
239 * atomic exchange API is a low level abstraction of the operations
240 * to atomically read, modify, and write a pointer. This abstraction works
241 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
242 * well as the ARM exclusive instructions.
244 * atomic_exchange_begin() - begin exchange and retrieve current value
245 * atomic_exchange_complete() - conclude an exchange
246 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
249 atomic_exchange_begin32(uint32_t *target
, uint32_t *previous
, enum memory_order ord
)
253 val
= load_exclusive32(target
, ord
);
259 atomic_exchange_complete32(uint32_t *target
, uint32_t previous
, uint32_t newval
, enum memory_order ord
)
261 (void)previous
; // Previous not needed, monitor is held
262 return store_exclusive32(target
, newval
, ord
);
266 atomic_exchange_abort(void)
272 atomic_test_and_set32(uint32_t *target
, uint32_t test_mask
, uint32_t set_mask
, enum memory_order ord
, boolean_t wait
)
274 uint32_t value
, prev
;
277 value
= atomic_exchange_begin32(target
, &prev
, ord
);
278 if (value
& test_mask
) {
280 wait_for_event(); // Wait with monitor held
282 atomic_exchange_abort(); // Clear exclusive monitor
286 if (atomic_exchange_complete32(target
, prev
, value
, ord
))
291 void _disable_preemption(void)
293 thread_t thread
= current_thread();
296 count
= thread
->machine
.preemption_count
+ 1;
297 ordered_store(&thread
->machine
.preemption_count
, unsigned int, count
);
300 void _enable_preemption(void)
302 thread_t thread
= current_thread();
306 #define INTERRUPT_MASK PSR_IRQF
308 #define INTERRUPT_MASK DAIF_IRQF
311 count
= thread
->machine
.preemption_count
;
313 panic("Preemption count negative"); // Count will go negative when released
316 goto update_count
; // Preemption is still disabled, just update
317 state
= get_interrupts(); // Get interrupt state
318 if (state
& INTERRUPT_MASK
)
319 goto update_count
; // Interrupts are already masked, can't take AST here
321 disable_interrupts_noread(); // Disable interrupts
322 ordered_store(&thread
->machine
.preemption_count
, unsigned int, count
);
323 if (thread
->machine
.CpuDatap
->cpu_pending_ast
& AST_URGENT
) {
325 #if __ARM_USER_PROTECT__
326 uintptr_t up
= arm_user_protect_begin(thread
);
327 #endif // __ARM_USER_PROTECT__
330 ast_taken_kernel(); // Handle urgent AST
332 #if __ARM_USER_PROTECT__
333 arm_user_protect_end(thread
, up
, TRUE
);
334 #endif // __ARM_USER_PROTECT__
336 return; // Return early on arm only due to FIQ enabling
339 restore_interrupts(state
); // Enable interrupts
343 ordered_store(&thread
->machine
.preemption_count
, unsigned int, count
);
347 int get_preemption_level(void)
349 return current_thread()->machine
.preemption_count
;
352 /* Forward declarations for unexported functions that are used externally */
353 void hw_lock_bit(hw_lock_bit_t
*lock
, unsigned int bit
);
354 void hw_unlock_bit(hw_lock_bit_t
*lock
, unsigned int bit
);
358 hw_lock_bit_to_contended(hw_lock_bit_t
*lock
, uint32_t mask
, uint32_t timeout
);
362 hw_lock_bit_to(hw_lock_bit_t
*lock
, unsigned int bit
, uint32_t timeout
)
364 unsigned int success
= 0;
365 uint32_t mask
= (1 << bit
);
370 _disable_preemption();
372 if (__improbable(!atomic_test_and_set32(lock
, mask
, mask
, memory_order_acquire
, FALSE
)))
373 success
= hw_lock_bit_to_contended(lock
, mask
, timeout
);
378 state
= ordered_load_bit(lock
);
379 if (!(mask
& state
)) {
380 ordered_store_bit(lock
, state
| mask
);
387 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE
, lock
, bit
);
394 static unsigned int NOINLINE
395 hw_lock_bit_to_contended(hw_lock_bit_t
*lock
, uint32_t mask
, uint32_t timeout
)
401 boolean_t dtrace_enabled
= lockstat_probemap
[LS_LCK_SPIN_LOCK_SPIN
] != 0;
402 if (__improbable(dtrace_enabled
))
403 begin
= mach_absolute_time();
406 for (i
= 0; i
< LOCK_SNOOP_SPINS
; i
++) {
407 // Always load-exclusive before wfe
408 // This grabs the monitor and wakes up on a release event
409 if (atomic_test_and_set32(lock
, mask
, mask
, memory_order_acquire
, TRUE
)) {
414 end
= ml_get_timebase() + timeout
;
415 else if (ml_get_timebase() >= end
)
421 if (__improbable(dtrace_enabled
)) {
422 uint64_t spintime
= mach_absolute_time() - begin
;
423 if (spintime
> dtrace_spin_threshold
)
424 LOCKSTAT_RECORD2(LS_LCK_SPIN_LOCK_SPIN
, lock
, spintime
, mask
);
432 hw_lock_bit(hw_lock_bit_t
*lock
, unsigned int bit
)
434 if (hw_lock_bit_to(lock
, bit
, LOCK_PANIC_TIMEOUT
))
437 panic("hw_lock_bit(): timed out (%p)", lock
);
439 panic("hw_lock_bit(): interlock held (%p)", lock
);
444 hw_lock_bit_try(hw_lock_bit_t
*lock
, unsigned int bit
)
447 uint32_t mask
= (1 << bit
);
451 boolean_t success
= FALSE
;
453 intmask
= disable_interrupts();
455 // TODO: consider weak (non-looping) atomic test-and-set
456 success
= atomic_test_and_set32(lock
, mask
, mask
, memory_order_acquire
, FALSE
);
458 state
= ordered_load_bit(lock
);
459 if (!(mask
& state
)) {
460 ordered_store_bit(lock
, state
| mask
);
465 disable_preemption();
466 restore_interrupts(intmask
);
470 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE
, lock
, bit
);
477 * Routine: hw_unlock_bit
479 * Release spin-lock. The second parameter is the bit number to test and set.
480 * Decrement the preemption level.
483 hw_unlock_bit(hw_lock_bit_t
*lock
, unsigned int bit
)
485 uint32_t mask
= (1 << bit
);
491 __c11_atomic_fetch_and((_Atomic
uint32_t *)lock
, ~mask
, memory_order_release
);
494 state
= ordered_load_bit(lock
);
495 ordered_store_bit(lock
, state
& ~mask
);
498 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE
, lock
, bit
);
505 * Routine: lck_spin_alloc_init
514 if ((lck
= (lck_spin_t
*) kalloc(sizeof(lck_spin_t
))) != 0)
515 lck_spin_init(lck
, grp
, attr
);
521 * Routine: lck_spin_free
528 lck_spin_destroy(lck
, grp
);
529 kfree((void *) lck
, sizeof(lck_spin_t
));
533 * Routine: lck_spin_init
539 __unused lck_attr_t
* attr
)
541 hw_lock_init(&lck
->hwlock
);
542 lck
->type
= LCK_SPIN_TYPE
;
543 lck_grp_reference(grp
);
544 lck_grp_lckcnt_incr(grp
, LCK_TYPE_SPIN
);
545 store_memory_barrier();
549 * arm_usimple_lock is a lck_spin_t without a group or attributes
552 arm_usimple_lock_init(simple_lock_t lck
, __unused
unsigned short initial_value
)
554 lck
->type
= LCK_SPIN_TYPE
;
555 hw_lock_init(&lck
->hwlock
);
556 store_memory_barrier();
561 * Routine: lck_spin_lock
564 lck_spin_lock(lck_spin_t
*lock
)
566 #if DEVELOPMENT || DEBUG
567 if (lock
->type
!= LCK_SPIN_TYPE
)
568 panic("Invalid spinlock %p", lock
);
569 #endif // DEVELOPMENT || DEBUG
570 hw_lock_lock(&lock
->hwlock
);
574 * Routine: lck_spin_try_lock
577 lck_spin_try_lock(lck_spin_t
*lock
)
579 return hw_lock_try(&lock
->hwlock
);
583 * Routine: lck_spin_unlock
586 lck_spin_unlock(lck_spin_t
*lock
)
588 #if DEVELOPMENT || DEBUG
589 if ((LCK_MTX_STATE_TO_THREAD(lock
->lck_spin_data
) != current_thread()) && LOCK_CORRECTNESS_PANIC())
590 panic("Spinlock not owned by thread %p = %lx", lock
, lock
->lck_spin_data
);
591 if (lock
->type
!= LCK_SPIN_TYPE
)
592 panic("Invalid spinlock type %p", lock
);
593 #endif // DEVELOPMENT || DEBUG
594 hw_lock_unlock(&lock
->hwlock
);
598 * Routine: lck_spin_destroy
605 if (lck
->lck_spin_data
== LCK_SPIN_TAG_DESTROYED
)
607 lck
->lck_spin_data
= LCK_SPIN_TAG_DESTROYED
;
608 lck_grp_lckcnt_decr(grp
, LCK_TYPE_SPIN
);
609 lck_grp_deallocate(grp
);
613 * Routine: kdp_lck_spin_is_acquired
614 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
617 kdp_lck_spin_is_acquired(lck_spin_t
*lck
) {
619 panic("panic: spinlock acquired check done outside of kernel debugger");
621 return ((lck
->lck_spin_data
& ~LCK_SPIN_TAG_DESTROYED
) != 0) ? TRUE
:FALSE
;
625 * Initialize a usimple_lock.
627 * No change in preemption state.
634 #ifndef MACHINE_SIMPLE_LOCK
635 USLDBG(usld_lock_init(l
, tag
));
636 hw_lock_init(&l
->lck_spin_data
);
638 simple_lock_init((simple_lock_t
) l
, tag
);
644 * Acquire a usimple_lock.
646 * Returns with preemption disabled. Note
647 * that the hw_lock routines are responsible for
648 * maintaining preemption state.
654 #ifndef MACHINE_SIMPLE_LOCK
658 USLDBG(usld_lock_pre(l
, pc
));
660 if (!hw_lock_to(&l
->lck_spin_data
, LockTimeOut
)) /* Try to get the lock
662 panic("simple lock deadlock detection - l=%p, cpu=%d, ret=%p", &l
, cpu_number(), pc
);
664 USLDBG(usld_lock_post(l
, pc
));
666 simple_lock((simple_lock_t
) l
);
671 extern void sync(void);
674 * Release a usimple_lock.
676 * Returns with preemption enabled. Note
677 * that the hw_lock routines are responsible for
678 * maintaining preemption state.
684 #ifndef MACHINE_SIMPLE_LOCK
688 USLDBG(usld_unlock(l
, pc
));
690 hw_lock_unlock(&l
->lck_spin_data
);
692 simple_unlock((simple_lock_t
) l
);
698 * Conditionally acquire a usimple_lock.
700 * On success, returns with preemption disabled.
701 * On failure, returns with preemption in the same state
702 * as when first invoked. Note that the hw_lock routines
703 * are responsible for maintaining preemption state.
705 * XXX No stats are gathered on a miss; I preserved this
706 * behavior from the original assembly-language code, but
707 * doesn't it make sense to log misses? XXX
713 #ifndef MACHINE_SIMPLE_LOCK
715 unsigned int success
;
718 USLDBG(usld_lock_try_pre(l
, pc
));
719 if ((success
= hw_lock_try(&l
->lck_spin_data
))) {
720 USLDBG(usld_lock_try_post(l
, pc
));
724 return (simple_lock_try((simple_lock_t
) l
));
730 * States of a usimple_lock. The default when initializing
731 * a usimple_lock is setting it up for debug checking.
733 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
734 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
735 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
736 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
737 #define USLOCK_CHECKING(l) (uslock_check && \
738 ((l)->debug.state & USLOCK_CHECKED))
741 * Trace activities of a particularly interesting lock.
743 void usl_trace(usimple_lock_t
, int, pc_t
, const char *);
747 * Initialize the debugging information contained
753 __unused
unsigned short tag
)
755 if (l
== USIMPLE_LOCK_NULL
)
756 panic("lock initialization: null lock pointer");
757 l
->lock_type
= USLOCK_TAG
;
758 l
->debug
.state
= uslock_check
? USLOCK_INITIALIZED
: 0;
759 l
->debug
.lock_cpu
= l
->debug
.unlock_cpu
= 0;
760 l
->debug
.lock_pc
= l
->debug
.unlock_pc
= INVALID_PC
;
761 l
->debug
.lock_thread
= l
->debug
.unlock_thread
= INVALID_THREAD
;
762 l
->debug
.duration
[0] = l
->debug
.duration
[1] = 0;
763 l
->debug
.unlock_cpu
= l
->debug
.unlock_cpu
= 0;
764 l
->debug
.unlock_pc
= l
->debug
.unlock_pc
= INVALID_PC
;
765 l
->debug
.unlock_thread
= l
->debug
.unlock_thread
= INVALID_THREAD
;
770 * These checks apply to all usimple_locks, not just
771 * those with USLOCK_CHECKED turned on.
774 usld_lock_common_checks(
778 if (l
== USIMPLE_LOCK_NULL
)
779 panic("%s: null lock pointer", caller
);
780 if (l
->lock_type
!= USLOCK_TAG
)
781 panic("%s: 0x%x is not a usimple lock", caller
, (integer_t
) l
);
782 if (!(l
->debug
.state
& USLOCK_INIT
))
783 panic("%s: 0x%x is not an initialized lock",
784 caller
, (integer_t
) l
);
785 return USLOCK_CHECKING(l
);
790 * Debug checks on a usimple_lock just before attempting
799 const char *caller
= "usimple_lock";
802 if (!usld_lock_common_checks(l
, caller
))
806 * Note that we have a weird case where we are getting a lock when we are]
807 * in the process of putting the system to sleep. We are running with no
808 * current threads, therefore we can't tell if we are trying to retake a lock
809 * we have or someone on the other processor has it. Therefore we just
810 * ignore this test if the locking thread is 0.
813 if ((l
->debug
.state
& USLOCK_TAKEN
) && l
->debug
.lock_thread
&&
814 l
->debug
.lock_thread
== (void *) current_thread()) {
815 printf("%s: lock 0x%x already locked (at %p) by",
816 caller
, (integer_t
) l
, l
->debug
.lock_pc
);
817 printf(" current thread %p (new attempt at pc %p)\n",
818 l
->debug
.lock_thread
, pc
);
821 mp_disable_preemption();
822 usl_trace(l
, cpu_number(), pc
, caller
);
823 mp_enable_preemption();
828 * Debug checks on a usimple_lock just after acquiring it.
830 * Pre-emption has been disabled at this point,
831 * so we are safe in using cpu_number.
839 const char *caller
= "successful usimple_lock";
842 if (!usld_lock_common_checks(l
, caller
))
845 if (!((l
->debug
.state
& ~USLOCK_TAKEN
) == USLOCK_INITIALIZED
))
846 panic("%s: lock 0x%x became uninitialized",
847 caller
, (integer_t
) l
);
848 if ((l
->debug
.state
& USLOCK_TAKEN
))
849 panic("%s: lock 0x%x became TAKEN by someone else",
850 caller
, (integer_t
) l
);
852 mycpu
= cpu_number();
853 l
->debug
.lock_thread
= (void *) current_thread();
854 l
->debug
.state
|= USLOCK_TAKEN
;
855 l
->debug
.lock_pc
= pc
;
856 l
->debug
.lock_cpu
= mycpu
;
858 usl_trace(l
, mycpu
, pc
, caller
);
863 * Debug checks on a usimple_lock just before
864 * releasing it. Note that the caller has not
865 * yet released the hardware lock.
867 * Preemption is still disabled, so there's
868 * no problem using cpu_number.
876 const char *caller
= "usimple_unlock";
879 if (!usld_lock_common_checks(l
, caller
))
882 mycpu
= cpu_number();
884 if (!(l
->debug
.state
& USLOCK_TAKEN
))
885 panic("%s: lock 0x%x hasn't been taken",
886 caller
, (integer_t
) l
);
887 if (l
->debug
.lock_thread
!= (void *) current_thread())
888 panic("%s: unlocking lock 0x%x, owned by thread %p",
889 caller
, (integer_t
) l
, l
->debug
.lock_thread
);
890 if (l
->debug
.lock_cpu
!= mycpu
) {
891 printf("%s: unlocking lock 0x%x on cpu 0x%x",
892 caller
, (integer_t
) l
, mycpu
);
893 printf(" (acquired on cpu 0x%x)\n", l
->debug
.lock_cpu
);
896 usl_trace(l
, mycpu
, pc
, caller
);
898 l
->debug
.unlock_thread
= l
->debug
.lock_thread
;
899 l
->debug
.lock_thread
= INVALID_PC
;
900 l
->debug
.state
&= ~USLOCK_TAKEN
;
901 l
->debug
.unlock_pc
= pc
;
902 l
->debug
.unlock_cpu
= mycpu
;
907 * Debug checks on a usimple_lock just before
908 * attempting to acquire it.
910 * Preemption isn't guaranteed to be disabled.
917 const char *caller
= "usimple_lock_try";
919 if (!usld_lock_common_checks(l
, caller
))
921 mp_disable_preemption();
922 usl_trace(l
, cpu_number(), pc
, caller
);
923 mp_enable_preemption();
928 * Debug checks on a usimple_lock just after
929 * successfully attempting to acquire it.
931 * Preemption has been disabled by the
932 * lock acquisition attempt, so it's safe
941 const char *caller
= "successful usimple_lock_try";
943 if (!usld_lock_common_checks(l
, caller
))
946 if (!((l
->debug
.state
& ~USLOCK_TAKEN
) == USLOCK_INITIALIZED
))
947 panic("%s: lock 0x%x became uninitialized",
948 caller
, (integer_t
) l
);
949 if ((l
->debug
.state
& USLOCK_TAKEN
))
950 panic("%s: lock 0x%x became TAKEN by someone else",
951 caller
, (integer_t
) l
);
953 mycpu
= cpu_number();
954 l
->debug
.lock_thread
= (void *) current_thread();
955 l
->debug
.state
|= USLOCK_TAKEN
;
956 l
->debug
.lock_pc
= pc
;
957 l
->debug
.lock_cpu
= mycpu
;
959 usl_trace(l
, mycpu
, pc
, caller
);
964 * For very special cases, set traced_lock to point to a
965 * specific lock of interest. The result is a series of
966 * XPRs showing lock operations on that lock. The lock_seq
967 * value is used to show the order of those operations.
969 usimple_lock_t traced_lock
;
970 unsigned int lock_seq
;
979 if (traced_lock
== l
) {
981 "seq %d, cpu %d, %s @ %x\n",
982 (integer_t
) lock_seq
, (integer_t
) mycpu
,
983 (integer_t
) op_name
, (integer_t
) pc
, 0);
989 #endif /* USLOCK_DEBUG */
992 * The C portion of the shared/exclusive locks package.
996 * compute the deadline to spin against when
997 * waiting for a change of state on a lck_rw_t
1000 static inline uint64_t
1001 lck_rw_deadline_for_spin(lck_rw_t
*lck
)
1005 word
.data
= ordered_load_rw(lck
);
1006 if (word
.can_sleep
) {
1007 if (word
.r_waiting
|| word
.w_waiting
|| (word
.shared_count
> machine_info
.max_cpus
)) {
1009 * there are already threads waiting on this lock... this
1010 * implies that they have spun beyond their deadlines waiting for
1011 * the desired state to show up so we will not bother spinning at this time...
1013 * the current number of threads sharing this lock exceeds our capacity to run them
1014 * concurrently and since all states we're going to spin for require the rw_shared_count
1015 * to be at 0, we'll not bother spinning since the latency for this to happen is
1018 return (mach_absolute_time());
1020 return (mach_absolute_time() + MutexSpin
);
1022 return (mach_absolute_time() + (100000LL * 1000000000LL));
1027 lck_rw_drain_status(lck_rw_t
*lock
, uint32_t status_mask
, boolean_t wait __unused
)
1030 uint64_t deadline
= 0;
1034 deadline
= lck_rw_deadline_for_spin(lock
);
1037 data
= load_exclusive32(&lock
->lck_rw_data
, memory_order_acquire_smp
);
1038 if ((data
& status_mask
) == 0)
1044 if (!wait
|| (mach_absolute_time() >= deadline
))
1052 data
= ordered_load_rw(lock
);
1053 if ((data
& status_mask
) == 0)
1061 * Spin while interlock is held.
1064 lck_rw_interlock_spin(lck_rw_t
*lock
)
1070 data
= load_exclusive32(&lock
->lck_rw_data
, memory_order_relaxed
);
1071 if (data
& LCK_RW_INTERLOCK
)
1079 panic("lck_rw_interlock_spin(): Interlock locked %p %x", lock
, lock
->lck_rw_data
);
1084 * We disable interrupts while holding the RW interlock to prevent an
1085 * interrupt from exacerbating hold time.
1086 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
1088 static inline boolean_t
1089 lck_interlock_lock(lck_rw_t
*lck
)
1093 istate
= ml_set_interrupts_enabled(FALSE
);
1094 lck_rw_ilk_lock(lck
);
1099 lck_interlock_unlock(lck_rw_t
*lck
, boolean_t istate
)
1101 lck_rw_ilk_unlock(lck
);
1102 ml_set_interrupts_enabled(istate
);
1106 #define LCK_RW_GRAB_WANT 0
1107 #define LCK_RW_GRAB_SHARED 1
1110 lck_rw_grab(lck_rw_t
*lock
, int mode
, boolean_t wait
)
1112 uint64_t deadline
= 0;
1113 uint32_t data
, prev
;
1118 deadline
= lck_rw_deadline_for_spin(lock
);
1120 wait
= FALSE
; // Don't spin on UP systems
1124 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_acquire_smp
);
1125 if (data
& LCK_RW_INTERLOCK
) {
1126 atomic_exchange_abort();
1127 lck_rw_interlock_spin(lock
);
1131 if (mode
== LCK_RW_GRAB_WANT
) {
1132 if ((data
& LCK_RW_WANT_EXCL
) == 0) {
1133 data
|= LCK_RW_WANT_EXCL
;
1136 } else { // LCK_RW_GRAB_SHARED
1137 if (((data
& (LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
)) == 0) ||
1138 (((data
& LCK_RW_SHARED_MASK
)) && ((data
& LCK_RW_PRIV_EXCL
) == 0))) {
1139 data
+= LCK_RW_SHARED_READER
;
1144 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
))
1147 if (wait
) // Non-waiting
1150 atomic_exchange_abort();
1151 if (!wait
|| (mach_absolute_time() >= deadline
))
1159 * Routine: lck_rw_alloc_init
1168 if ((lck
= (lck_rw_t
*)kalloc(sizeof(lck_rw_t
))) != 0)
1169 lck_rw_init(lck
, grp
, attr
);
1175 * Routine: lck_rw_free
1182 lck_rw_destroy(lck
, grp
);
1183 kfree(lck
, sizeof(lck_rw_t
));
1187 * Routine: lck_rw_init
1195 if (attr
== LCK_ATTR_NULL
)
1196 attr
= &LockDefaultLckAttr
;
1197 memset(lck
, 0, sizeof(lck_rw_t
));
1198 lck
->lck_rw_can_sleep
= TRUE
;
1199 if ((attr
->lck_attr_val
& LCK_ATTR_RW_SHARED_PRIORITY
) == 0)
1200 lck
->lck_rw_priv_excl
= TRUE
;
1202 lck_grp_reference(grp
);
1203 lck_grp_lckcnt_incr(grp
, LCK_TYPE_RW
);
1208 * Routine: lck_rw_destroy
1215 if (lck
->lck_rw_tag
== LCK_RW_TAG_DESTROYED
)
1218 lck_rw_assert(lck
, LCK_RW_ASSERT_NOTHELD
);
1220 lck
->lck_rw_tag
= LCK_RW_TAG_DESTROYED
;
1221 lck_grp_lckcnt_decr(grp
, LCK_TYPE_RW
);
1222 lck_grp_deallocate(grp
);
1227 * Routine: lck_rw_lock
1232 lck_rw_type_t lck_rw_type
)
1234 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1235 lck_rw_lock_shared(lck
);
1236 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1237 lck_rw_lock_exclusive(lck
);
1239 panic("lck_rw_lock(): Invalid RW lock type: %x", lck_rw_type
);
1243 * Routine: lck_rw_lock_exclusive
1246 lck_rw_lock_exclusive(lck_rw_t
*lock
)
1248 thread_t thread
= current_thread();
1250 thread
->rwlock_count
++;
1251 if (atomic_test_and_set32(&lock
->lck_rw_data
,
1252 (LCK_RW_SHARED_MASK
| LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
| LCK_RW_INTERLOCK
),
1253 LCK_RW_WANT_EXCL
, memory_order_acquire_smp
, FALSE
)) {
1255 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE
, lock
, DTRACE_RW_EXCL
);
1256 #endif /* CONFIG_DTRACE */
1258 lck_rw_lock_exclusive_gen(lock
);
1260 thread_t owner
= ordered_load_rw_owner(lock
);
1261 assertf(owner
== THREAD_NULL
, "state=0x%x, owner=%p", ordered_load_rw(lock
), owner
);
1263 ordered_store_rw_owner(lock
, thread
);
1267 * Routine: lck_rw_lock_shared
1270 lck_rw_lock_shared(lck_rw_t
*lock
)
1272 uint32_t data
, prev
;
1274 current_thread()->rwlock_count
++;
1276 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_acquire_smp
);
1277 if (data
& (LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
| LCK_RW_INTERLOCK
)) {
1278 atomic_exchange_abort();
1279 lck_rw_lock_shared_gen(lock
);
1282 data
+= LCK_RW_SHARED_READER
;
1283 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
))
1288 thread_t owner
= ordered_load_rw_owner(lock
);
1289 assertf(owner
== THREAD_NULL
, "state=0x%x, owner=%p", ordered_load_rw(lock
), owner
);
1292 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE
, lock
, DTRACE_RW_SHARED
);
1293 #endif /* CONFIG_DTRACE */
1298 * Routine: lck_rw_lock_shared_to_exclusive
1301 lck_rw_lock_shared_to_exclusive(lck_rw_t
*lock
)
1303 uint32_t data
, prev
;
1306 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_acquire_smp
);
1307 if (data
& LCK_RW_INTERLOCK
) {
1308 atomic_exchange_abort();
1309 lck_rw_interlock_spin(lock
);
1312 if (data
& LCK_RW_WANT_UPGRADE
) {
1313 data
-= LCK_RW_SHARED_READER
;
1314 if ((data
& LCK_RW_SHARED_MASK
) == 0) /* we were the last reader */
1315 data
&= ~(LCK_RW_W_WAITING
); /* so clear the wait indicator */
1316 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
))
1317 return lck_rw_lock_shared_to_exclusive_failure(lock
, prev
);
1319 data
|= LCK_RW_WANT_UPGRADE
; /* ask for WANT_UPGRADE */
1320 data
-= LCK_RW_SHARED_READER
; /* and shed our read count */
1321 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
))
1326 /* we now own the WANT_UPGRADE */
1327 if (data
& LCK_RW_SHARED_MASK
) /* check to see if all of the readers are drained */
1328 lck_rw_lock_shared_to_exclusive_success(lock
); /* if not, we need to go wait */
1330 thread_t owner
= ordered_load_rw_owner(lock
);
1331 assertf(owner
== THREAD_NULL
, "state=0x%x, owner=%p", ordered_load_rw(lock
), owner
);
1333 ordered_store_rw_owner(lock
, current_thread());
1335 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
, lock
, 0);
1336 #endif /* CONFIG_DTRACE */
1342 * Routine: lck_rw_lock_shared_to_exclusive_failure
1344 * Fast path code has already dropped our read
1345 * count and determined that someone else owns 'lck_rw_want_upgrade'
1346 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1347 * all we need to do here is determine if a wakeup is needed
1350 lck_rw_lock_shared_to_exclusive_failure(
1352 uint32_t prior_lock_state
)
1354 thread_t thread
= current_thread();
1355 uint32_t rwlock_count
;
1357 /* Check if dropping the lock means that we need to unpromote */
1358 rwlock_count
= thread
->rwlock_count
--;
1360 if (rwlock_count
== 0) {
1361 panic("rw lock count underflow for thread %p", thread
);
1364 if ((prior_lock_state
& LCK_RW_W_WAITING
) &&
1365 ((prior_lock_state
& LCK_RW_SHARED_MASK
) == LCK_RW_SHARED_READER
)) {
1367 * Someone else has requested upgrade.
1368 * Since we've released the read lock, wake
1369 * him up if he's blocked waiting
1371 thread_wakeup(LCK_RW_WRITER_EVENT(lck
));
1374 if ((rwlock_count
== 1 /* field now 0 */) && (thread
->sched_flags
& TH_SFLAG_RW_PROMOTED
)) {
1375 /* sched_flags checked without lock, but will be rechecked while clearing */
1376 lck_rw_clear_promotion(thread
);
1379 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_CODE
) | DBG_FUNC_NONE
,
1380 VM_KERNEL_UNSLIDE_OR_PERM(lck
), lck
->lck_rw_shared_count
, lck
->lck_rw_want_upgrade
, 0, 0);
1386 * Routine: lck_rw_lock_shared_to_exclusive_success
1388 * assembly fast path code has already dropped our read
1389 * count and successfully acquired 'lck_rw_want_upgrade'
1390 * we just need to wait for the rest of the readers to drain
1391 * and then we can return as the exclusive holder of this lock
1394 lck_rw_lock_shared_to_exclusive_success(
1397 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lock
);
1402 boolean_t not_shared
;
1405 uint64_t wait_interval
= 0;
1406 int readers_at_sleep
= 0;
1407 boolean_t dtrace_ls_initialized
= FALSE
;
1408 boolean_t dtrace_rwl_shared_to_excl_spin
, dtrace_rwl_shared_to_excl_block
, dtrace_ls_enabled
= FALSE
;
1411 while (!lck_rw_drain_status(lock
, LCK_RW_SHARED_MASK
, FALSE
)) {
1413 word
.data
= ordered_load_rw(lock
);
1415 if (dtrace_ls_initialized
== FALSE
) {
1416 dtrace_ls_initialized
= TRUE
;
1417 dtrace_rwl_shared_to_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
] != 0);
1418 dtrace_rwl_shared_to_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
] != 0);
1419 dtrace_ls_enabled
= dtrace_rwl_shared_to_excl_spin
|| dtrace_rwl_shared_to_excl_block
;
1420 if (dtrace_ls_enabled
) {
1422 * Either sleeping or spinning is happening,
1423 * start a timing of our delay interval now.
1425 readers_at_sleep
= word
.shared_count
;
1426 wait_interval
= mach_absolute_time();
1431 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_START
,
1432 trace_lck
, word
.shared_count
, 0, 0, 0);
1434 not_shared
= lck_rw_drain_status(lock
, LCK_RW_SHARED_MASK
, TRUE
);
1436 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_SPIN_CODE
) | DBG_FUNC_END
,
1437 trace_lck
, lock
->lck_rw_shared_count
, 0, 0, 0);
1443 * if we get here, the spin deadline in lck_rw_wait_on_status()
1444 * has expired w/o the rw_shared_count having drained to 0
1445 * check to see if we're allowed to do a thread_block
1447 if (word
.can_sleep
) {
1449 istate
= lck_interlock_lock(lock
);
1451 word
.data
= ordered_load_rw(lock
);
1452 if (word
.shared_count
!= 0) {
1453 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_START
,
1454 trace_lck
, word
.shared_count
, 0, 0, 0);
1457 ordered_store_rw(lock
, word
.data
);
1459 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade
);
1460 res
= assert_wait(LCK_RW_WRITER_EVENT(lock
), THREAD_UNINT
);
1461 lck_interlock_unlock(lock
, istate
);
1463 if (res
== THREAD_WAITING
) {
1464 res
= thread_block(THREAD_CONTINUE_NULL
);
1467 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SH_TO_EX_WAIT_CODE
) | DBG_FUNC_END
,
1468 trace_lck
, res
, slept
, 0, 0);
1470 lck_interlock_unlock(lock
, istate
);
1477 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1479 if (dtrace_ls_enabled
== TRUE
) {
1481 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN
, lock
, mach_absolute_time() - wait_interval
, 0);
1483 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK
, lock
,
1484 mach_absolute_time() - wait_interval
, 1,
1485 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1488 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE
, lock
, 1);
1495 * Routine: lck_rw_lock_exclusive_to_shared
1498 void lck_rw_lock_exclusive_to_shared(lck_rw_t
*lock
)
1500 uint32_t data
, prev
;
1502 assertf(lock
->lck_rw_owner
== current_thread(), "state=0x%x, owner=%p", lock
->lck_rw_data
, lock
->lck_rw_owner
);
1503 ordered_store_rw_owner(lock
, THREAD_NULL
);
1505 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_release_smp
);
1506 if (data
& LCK_RW_INTERLOCK
) {
1508 atomic_exchange_abort();
1509 lck_rw_interlock_spin(lock
); /* wait for interlock to clear */
1512 panic("lck_rw_lock_exclusive_to_shared(): Interlock locked (%p): %x", lock
, data
);
1515 data
+= LCK_RW_SHARED_READER
;
1516 if (data
& LCK_RW_WANT_UPGRADE
)
1517 data
&= ~(LCK_RW_WANT_UPGRADE
);
1519 data
&= ~(LCK_RW_WANT_EXCL
);
1520 if (!((prev
& LCK_RW_W_WAITING
) && (prev
& LCK_RW_PRIV_EXCL
)))
1521 data
&= ~(LCK_RW_W_WAITING
);
1522 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_release_smp
))
1526 return lck_rw_lock_exclusive_to_shared_gen(lock
, prev
);
1530 * Routine: lck_rw_lock_exclusive_to_shared_gen
1532 * Fast path has already dropped
1533 * our exclusive state and bumped lck_rw_shared_count
1534 * all we need to do here is determine if anyone
1535 * needs to be awakened.
1538 lck_rw_lock_exclusive_to_shared_gen(
1540 uint32_t prior_lock_state
)
1542 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lck
);
1543 lck_rw_word_t fake_lck
;
1546 * prior_lock state is a snapshot of the 1st word of the
1547 * lock in question... we'll fake up a pointer to it
1548 * and carefully not access anything beyond whats defined
1549 * in the first word of a lck_rw_t
1551 fake_lck
.data
= prior_lock_state
;
1553 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_START
,
1554 trace_lck
, fake_lck
->want_excl
, fake_lck
->want_upgrade
, 0, 0);
1557 * don't wake up anyone waiting to take the lock exclusively
1558 * since we hold a read count... when the read count drops to 0,
1559 * the writers will be woken.
1561 * wake up any waiting readers if we don't have any writers waiting,
1562 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1564 if (!(fake_lck
.priv_excl
&& fake_lck
.w_waiting
) && fake_lck
.r_waiting
)
1565 thread_wakeup(LCK_RW_READER_EVENT(lck
));
1567 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_TO_SH_CODE
) | DBG_FUNC_END
,
1568 trace_lck
, lck
->lck_rw_want_excl
, lck
->lck_rw_want_upgrade
, lck
->lck_rw_shared_count
, 0);
1571 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE
, lck
, 0);
1577 * Routine: lck_rw_try_lock
1582 lck_rw_type_t lck_rw_type
)
1584 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1585 return lck_rw_try_lock_shared(lck
);
1586 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1587 return lck_rw_try_lock_exclusive(lck
);
1589 panic("lck_rw_try_lock(): Invalid rw lock type: %x", lck_rw_type
);
1594 * Routine: lck_rw_try_lock_shared
1597 boolean_t
lck_rw_try_lock_shared(lck_rw_t
*lock
)
1599 uint32_t data
, prev
;
1602 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_acquire_smp
);
1603 if (data
& LCK_RW_INTERLOCK
) {
1605 atomic_exchange_abort();
1606 lck_rw_interlock_spin(lock
);
1609 panic("lck_rw_try_lock_shared(): Interlock locked (%p): %x", lock
, data
);
1612 if (data
& (LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
)) {
1613 atomic_exchange_abort();
1614 return FALSE
; /* lock is busy */
1616 data
+= LCK_RW_SHARED_READER
; /* Increment reader refcount */
1617 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
))
1622 thread_t owner
= ordered_load_rw_owner(lock
);
1623 assertf(owner
== THREAD_NULL
, "state=0x%x, owner=%p", ordered_load_rw(lock
), owner
);
1625 current_thread()->rwlock_count
++;
1627 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE
, lock
, DTRACE_RW_SHARED
);
1628 #endif /* CONFIG_DTRACE */
1634 * Routine: lck_rw_try_lock_exclusive
1637 boolean_t
lck_rw_try_lock_exclusive(lck_rw_t
*lock
)
1639 uint32_t data
, prev
;
1643 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_acquire_smp
);
1644 if (data
& LCK_RW_INTERLOCK
) {
1646 atomic_exchange_abort();
1647 lck_rw_interlock_spin(lock
);
1650 panic("lck_rw_try_lock_exclusive(): Interlock locked (%p): %x", lock
, data
);
1653 if (data
& (LCK_RW_SHARED_MASK
| LCK_RW_WANT_EXCL
| LCK_RW_WANT_UPGRADE
)) {
1654 atomic_exchange_abort();
1657 data
|= LCK_RW_WANT_EXCL
;
1658 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_acquire_smp
))
1662 thread
= current_thread();
1663 thread
->rwlock_count
++;
1665 thread_t owner
= ordered_load_rw_owner(lock
);
1666 assertf(owner
== THREAD_NULL
, "state=0x%x, owner=%p", ordered_load_rw(lock
), owner
);
1668 ordered_store_rw_owner(lock
, thread
);
1670 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE
, lock
, DTRACE_RW_EXCL
);
1671 #endif /* CONFIG_DTRACE */
1677 * Routine: lck_rw_unlock
1682 lck_rw_type_t lck_rw_type
)
1684 if (lck_rw_type
== LCK_RW_TYPE_SHARED
)
1685 lck_rw_unlock_shared(lck
);
1686 else if (lck_rw_type
== LCK_RW_TYPE_EXCLUSIVE
)
1687 lck_rw_unlock_exclusive(lck
);
1689 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type
);
1694 * Routine: lck_rw_unlock_shared
1697 lck_rw_unlock_shared(
1702 assertf(lck
->lck_rw_owner
== THREAD_NULL
, "state=0x%x, owner=%p", lck
->lck_rw_data
, lck
->lck_rw_owner
);
1703 assertf(lck
->lck_rw_shared_count
> 0, "shared_count=0x%x", lck
->lck_rw_shared_count
);
1704 ret
= lck_rw_done(lck
);
1706 if (ret
!= LCK_RW_TYPE_SHARED
)
1707 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck
, ret
);
1712 * Routine: lck_rw_unlock_exclusive
1715 lck_rw_unlock_exclusive(
1720 assertf(lck
->lck_rw_owner
== current_thread(), "state=0x%x, owner=%p", lck
->lck_rw_data
, lck
->lck_rw_owner
);
1721 ret
= lck_rw_done(lck
);
1723 if (ret
!= LCK_RW_TYPE_EXCLUSIVE
)
1724 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck
, ret
);
1729 * Routine: lck_rw_lock_exclusive_gen
1732 lck_rw_lock_exclusive_gen(
1735 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lock
);
1738 boolean_t gotlock
= 0;
1739 boolean_t not_shared_or_upgrade
= 0;
1740 wait_result_t res
= 0;
1744 boolean_t dtrace_ls_initialized
= FALSE
;
1745 boolean_t dtrace_rwl_excl_spin
, dtrace_rwl_excl_block
, dtrace_ls_enabled
= FALSE
;
1746 uint64_t wait_interval
= 0;
1747 int readers_at_sleep
= 0;
1751 * Try to acquire the lck_rw_want_excl bit.
1753 while (!lck_rw_grab(lock
, LCK_RW_GRAB_WANT
, FALSE
)) {
1756 if (dtrace_ls_initialized
== FALSE
) {
1757 dtrace_ls_initialized
= TRUE
;
1758 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
1759 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
1760 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
1761 if (dtrace_ls_enabled
) {
1763 * Either sleeping or spinning is happening,
1764 * start a timing of our delay interval now.
1766 readers_at_sleep
= lock
->lck_rw_shared_count
;
1767 wait_interval
= mach_absolute_time();
1772 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1774 gotlock
= lck_rw_grab(lock
, LCK_RW_GRAB_WANT
, TRUE
);
1776 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_SPIN_CODE
) | DBG_FUNC_END
, trace_lck
, 0, 0, gotlock
, 0);
1781 * if we get here, the deadline has expired w/o us
1782 * being able to grab the lock exclusively
1783 * check to see if we're allowed to do a thread_block
1785 word
.data
= ordered_load_rw(lock
);
1786 if (word
.can_sleep
) {
1788 istate
= lck_interlock_lock(lock
);
1789 word
.data
= ordered_load_rw(lock
);
1791 if (word
.want_excl
) {
1793 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1796 ordered_store_rw(lock
, word
.data
);
1798 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite
);
1799 res
= assert_wait(LCK_RW_WRITER_EVENT(lock
), THREAD_UNINT
);
1800 lck_interlock_unlock(lock
, istate
);
1802 if (res
== THREAD_WAITING
) {
1803 res
= thread_block(THREAD_CONTINUE_NULL
);
1806 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_WRITER_WAIT_CODE
) | DBG_FUNC_END
, trace_lck
, res
, slept
, 0, 0);
1809 ordered_store_rw(lock
, word
.data
);
1810 lck_interlock_unlock(lock
, istate
);
1816 * Wait for readers (and upgrades) to finish...
1818 while (!lck_rw_drain_status(lock
, LCK_RW_SHARED_MASK
| LCK_RW_WANT_UPGRADE
, FALSE
)) {
1822 * Either sleeping or spinning is happening, start
1823 * a timing of our delay interval now. If we set it
1824 * to -1 we don't have accurate data so we cannot later
1825 * decide to record a dtrace spin or sleep event.
1827 if (dtrace_ls_initialized
== FALSE
) {
1828 dtrace_ls_initialized
= TRUE
;
1829 dtrace_rwl_excl_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_SPIN
] != 0);
1830 dtrace_rwl_excl_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_EXCL_BLOCK
] != 0);
1831 dtrace_ls_enabled
= dtrace_rwl_excl_spin
|| dtrace_rwl_excl_block
;
1832 if (dtrace_ls_enabled
) {
1834 * Either sleeping or spinning is happening,
1835 * start a timing of our delay interval now.
1837 readers_at_sleep
= lock
->lck_rw_shared_count
;
1838 wait_interval
= mach_absolute_time();
1843 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1845 not_shared_or_upgrade
= lck_rw_drain_status(lock
, LCK_RW_SHARED_MASK
| LCK_RW_WANT_UPGRADE
, TRUE
);
1847 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_SPIN_CODE
) | DBG_FUNC_END
, trace_lck
, 0, 0, not_shared_or_upgrade
, 0);
1849 if (not_shared_or_upgrade
)
1852 * if we get here, the deadline has expired w/o us
1853 * being able to grab the lock exclusively
1854 * check to see if we're allowed to do a thread_block
1856 word
.data
= ordered_load_rw(lock
);
1857 if (word
.can_sleep
) {
1859 istate
= lck_interlock_lock(lock
);
1860 word
.data
= ordered_load_rw(lock
);
1862 if (word
.shared_count
!= 0 || word
.want_upgrade
) {
1863 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_START
, trace_lck
, 0, 0, 0, 0);
1866 ordered_store_rw(lock
, word
.data
);
1868 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite
);
1869 res
= assert_wait(LCK_RW_WRITER_EVENT(lock
), THREAD_UNINT
);
1870 lck_interlock_unlock(lock
, istate
);
1872 if (res
== THREAD_WAITING
) {
1873 res
= thread_block(THREAD_CONTINUE_NULL
);
1876 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_EX_READER_WAIT_CODE
) | DBG_FUNC_END
, trace_lck
, res
, slept
, 0, 0);
1878 lck_interlock_unlock(lock
, istate
);
1880 * must own the lock now, since we checked for
1881 * readers or upgrade owner behind the interlock
1882 * no need for a call to 'lck_rw_drain_status'
1891 * Decide what latencies we suffered that are Dtrace events.
1892 * If we have set wait_interval, then we either spun or slept.
1893 * At least we get out from under the interlock before we record
1894 * which is the best we can do here to minimize the impact
1896 * If we have set wait_interval to -1, then dtrace was not enabled when we
1897 * started sleeping/spinning so we don't record this event.
1899 if (dtrace_ls_enabled
== TRUE
) {
1901 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN
, lock
,
1902 mach_absolute_time() - wait_interval
, 1);
1905 * For the blocking case, we also record if when we blocked
1906 * it was held for read or write, and how many readers.
1907 * Notice that above we recorded this before we dropped
1908 * the interlock so the count is accurate.
1910 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK
, lock
,
1911 mach_absolute_time() - wait_interval
, 1,
1912 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
1915 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE
, lock
, 1);
1916 #endif /* CONFIG_DTRACE */
1920 * Routine: lck_rw_done
1923 lck_rw_type_t
lck_rw_done(lck_rw_t
*lock
)
1925 uint32_t data
, prev
;
1926 boolean_t once
= FALSE
;
1929 data
= atomic_exchange_begin32(&lock
->lck_rw_data
, &prev
, memory_order_release_smp
);
1930 if (data
& LCK_RW_INTERLOCK
) { /* wait for interlock to clear */
1932 atomic_exchange_abort();
1933 lck_rw_interlock_spin(lock
);
1936 panic("lck_rw_done(): Interlock locked (%p): %x", lock
, data
);
1939 if (data
& LCK_RW_SHARED_MASK
) { /* lock is held shared */
1940 assertf(lock
->lck_rw_owner
== THREAD_NULL
, "state=0x%x, owner=%p", lock
->lck_rw_data
, lock
->lck_rw_owner
);
1941 data
-= LCK_RW_SHARED_READER
;
1942 if ((data
& LCK_RW_SHARED_MASK
) == 0) /* if reader count has now gone to 0, check for waiters */
1944 } else { /* if reader count == 0, must be exclusive lock */
1945 if (data
& LCK_RW_WANT_UPGRADE
) {
1946 data
&= ~(LCK_RW_WANT_UPGRADE
);
1948 if (data
& LCK_RW_WANT_EXCL
)
1949 data
&= ~(LCK_RW_WANT_EXCL
);
1950 else /* lock is not 'owned', panic */
1951 panic("Releasing non-exclusive RW lock without a reader refcount!");
1954 // Only check for holder and clear it once
1955 assertf(lock
->lck_rw_owner
== current_thread(), "state=0x%x, owner=%p", lock
->lck_rw_data
, lock
->lck_rw_owner
);
1956 ordered_store_rw_owner(lock
, THREAD_NULL
);
1961 * test the original values to match what
1962 * lck_rw_done_gen is going to do to determine
1963 * which wakeups need to happen...
1965 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
1967 if (prev
& LCK_RW_W_WAITING
) {
1968 data
&= ~(LCK_RW_W_WAITING
);
1969 if ((prev
& LCK_RW_PRIV_EXCL
) == 0)
1970 data
&= ~(LCK_RW_R_WAITING
);
1972 data
&= ~(LCK_RW_R_WAITING
);
1974 if (atomic_exchange_complete32(&lock
->lck_rw_data
, prev
, data
, memory_order_release_smp
))
1978 return lck_rw_done_gen(lock
, prev
);
1982 * Routine: lck_rw_done_gen
1984 * called from the assembly language wrapper...
1985 * prior_lock_state is the value in the 1st
1986 * word of the lock at the time of a successful
1987 * atomic compare and exchange with the new value...
1988 * it represents the state of the lock before we
1989 * decremented the rw_shared_count or cleared either
1990 * rw_want_upgrade or rw_want_write and
1991 * the lck_x_waiting bits... since the wrapper
1992 * routine has already changed the state atomically,
1993 * we just need to decide if we should
1994 * wake up anyone and what value to return... we do
1995 * this by examining the state of the lock before
1998 static lck_rw_type_t
2001 uint32_t prior_lock_state
)
2003 lck_rw_word_t fake_lck
;
2004 lck_rw_type_t lock_type
;
2006 uint32_t rwlock_count
;
2009 * prior_lock state is a snapshot of the 1st word of the
2010 * lock in question... we'll fake up a pointer to it
2011 * and carefully not access anything beyond whats defined
2012 * in the first word of a lck_rw_t
2014 fake_lck
.data
= prior_lock_state
;
2016 if (fake_lck
.shared_count
<= 1) {
2017 if (fake_lck
.w_waiting
)
2018 thread_wakeup(LCK_RW_WRITER_EVENT(lck
));
2020 if (!(fake_lck
.priv_excl
&& fake_lck
.w_waiting
) && fake_lck
.r_waiting
)
2021 thread_wakeup(LCK_RW_READER_EVENT(lck
));
2023 if (fake_lck
.shared_count
)
2024 lock_type
= LCK_RW_TYPE_SHARED
;
2026 lock_type
= LCK_RW_TYPE_EXCLUSIVE
;
2028 /* Check if dropping the lock means that we need to unpromote */
2029 thread
= current_thread();
2030 rwlock_count
= thread
->rwlock_count
--;
2032 if (rwlock_count
== 0)
2033 panic("rw lock count underflow for thread %p", thread
);
2035 if ((rwlock_count
== 1 /* field now 0 */) && (thread
->sched_flags
& TH_SFLAG_RW_PROMOTED
)) {
2036 /* sched_flags checked without lock, but will be rechecked while clearing */
2037 lck_rw_clear_promotion(thread
);
2040 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE
, lck
, lock_type
== LCK_RW_TYPE_SHARED
? 0 : 1);
2046 * Routine: lck_rw_lock_shared_gen
2048 * Fast path code has determined that this lock
2049 * is held exclusively... this is where we spin/block
2050 * until we can acquire the lock in the shared mode
2053 lck_rw_lock_shared_gen(
2056 __kdebug_only
uintptr_t trace_lck
= VM_KERNEL_UNSLIDE_OR_PERM(lck
);
2058 boolean_t gotlock
= 0;
2060 wait_result_t res
= 0;
2064 uint64_t wait_interval
= 0;
2065 int readers_at_sleep
= 0;
2066 boolean_t dtrace_ls_initialized
= FALSE
;
2067 boolean_t dtrace_rwl_shared_spin
, dtrace_rwl_shared_block
, dtrace_ls_enabled
= FALSE
;
2068 #endif /* CONFIG_DTRACE */
2070 while ( !lck_rw_grab(lck
, LCK_RW_GRAB_SHARED
, FALSE
)) {
2073 if (dtrace_ls_initialized
== FALSE
) {
2074 dtrace_ls_initialized
= TRUE
;
2075 dtrace_rwl_shared_spin
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_SPIN
] != 0);
2076 dtrace_rwl_shared_block
= (lockstat_probemap
[LS_LCK_RW_LOCK_SHARED_BLOCK
] != 0);
2077 dtrace_ls_enabled
= dtrace_rwl_shared_spin
|| dtrace_rwl_shared_block
;
2078 if (dtrace_ls_enabled
) {
2080 * Either sleeping or spinning is happening,
2081 * start a timing of our delay interval now.
2083 readers_at_sleep
= lck
->lck_rw_shared_count
;
2084 wait_interval
= mach_absolute_time();
2089 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_START
,
2090 trace_lck
, lck
->lck_rw_want_excl
, lck
->lck_rw_want_upgrade
, 0, 0);
2092 gotlock
= lck_rw_grab(lck
, LCK_RW_GRAB_SHARED
, TRUE
);
2094 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_SPIN_CODE
) | DBG_FUNC_END
,
2095 trace_lck
, lck
->lck_rw_want_excl
, lck
->lck_rw_want_upgrade
, gotlock
, 0);
2100 * if we get here, the deadline has expired w/o us
2101 * being able to grab the lock for read
2102 * check to see if we're allowed to do a thread_block
2104 if (lck
->lck_rw_can_sleep
) {
2106 istate
= lck_interlock_lock(lck
);
2108 word
.data
= ordered_load_rw(lck
);
2109 if ((word
.want_excl
|| word
.want_upgrade
) &&
2110 ((word
.shared_count
== 0) || word
.priv_excl
)) {
2112 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_START
,
2113 trace_lck
, word
.want_excl
, word
.want_upgrade
, 0, 0);
2116 ordered_store_rw(lck
, word
.data
);
2118 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead
);
2119 res
= assert_wait(LCK_RW_READER_EVENT(lck
), THREAD_UNINT
);
2120 lck_interlock_unlock(lck
, istate
);
2122 if (res
== THREAD_WAITING
) {
2123 res
= thread_block(THREAD_CONTINUE_NULL
);
2126 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS
, LCK_RW_LCK_SHARED_WAIT_CODE
) | DBG_FUNC_END
,
2127 trace_lck
, res
, slept
, 0, 0);
2129 word
.shared_count
++;
2130 ordered_store_rw(lck
, word
.data
);
2131 lck_interlock_unlock(lck
, istate
);
2138 if (dtrace_ls_enabled
== TRUE
) {
2140 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN
, lck
, mach_absolute_time() - wait_interval
, 0);
2142 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK
, lck
,
2143 mach_absolute_time() - wait_interval
, 0,
2144 (readers_at_sleep
== 0 ? 1 : 0), readers_at_sleep
);
2147 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE
, lck
, 0);
2148 #endif /* CONFIG_DTRACE */
2158 case LCK_RW_ASSERT_SHARED
:
2159 if ((lck
->lck_rw_shared_count
!= 0) &&
2160 (lck
->lck_rw_owner
== THREAD_NULL
)) {
2164 case LCK_RW_ASSERT_EXCLUSIVE
:
2165 if ((lck
->lck_rw_want_excl
|| lck
->lck_rw_want_upgrade
) &&
2166 (lck
->lck_rw_shared_count
== 0) &&
2167 (lck
->lck_rw_owner
== current_thread())) {
2171 case LCK_RW_ASSERT_HELD
:
2172 if (lck
->lck_rw_shared_count
!= 0)
2173 return; // Held shared
2174 if ((lck
->lck_rw_want_excl
|| lck
->lck_rw_want_upgrade
) &&
2175 (lck
->lck_rw_owner
== current_thread())) {
2176 return; // Held exclusive
2179 case LCK_RW_ASSERT_NOTHELD
:
2180 if ((lck
->lck_rw_shared_count
== 0) &&
2181 !(lck
->lck_rw_want_excl
|| lck
->lck_rw_want_upgrade
) &&
2182 (lck
->lck_rw_owner
== THREAD_NULL
)) {
2189 panic("rw lock (%p)%s held (mode=%u)", lck
, (type
== LCK_RW_ASSERT_NOTHELD
? "" : " not"), type
);
2194 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
2195 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2198 kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t
*lck
) {
2200 panic("panic: rw lock exclusive check done outside of kernel debugger");
2202 return ((lck
->lck_rw_want_upgrade
|| lck
->lck_rw_want_excl
) && (lck
->lck_rw_shared_count
== 0)) ? TRUE
: FALSE
;
2206 * The C portion of the mutex package. These routines are only invoked
2207 * if the optimized assembler routines can't do the work.
2211 * Forward declaration
2216 lck_mtx_ext_t
* lck
,
2221 * Routine: lck_mtx_alloc_init
2230 if ((lck
= (lck_mtx_t
*) kalloc(sizeof(lck_mtx_t
))) != 0)
2231 lck_mtx_init(lck
, grp
, attr
);
2237 * Routine: lck_mtx_free
2244 lck_mtx_destroy(lck
, grp
);
2245 kfree((void *) lck
, sizeof(lck_mtx_t
));
2249 * Routine: lck_mtx_init
2258 lck_mtx_ext_t
*lck_ext
;
2260 lck_attr_t
*lck_attr
;
2262 if (attr
!= LCK_ATTR_NULL
)
2265 lck_attr
= &LockDefaultLckAttr
;
2268 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2269 if ((lck_ext
= (lck_mtx_ext_t
*) kalloc(sizeof(lck_mtx_ext_t
))) != 0) {
2270 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
2271 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
2272 lck
->lck_mtx_ptr
= lck_ext
;
2273 lck
->lck_mtx_type
= LCK_MTX_TYPE
;
2278 lck
->lck_mtx_ptr
= NULL
; // Clear any padding in the union fields below
2279 lck
->lck_mtx_waiters
= 0;
2280 lck
->lck_mtx_pri
= 0;
2281 lck
->lck_mtx_type
= LCK_MTX_TYPE
;
2282 ordered_store_mtx(lck
, 0);
2284 lck_grp_reference(grp
);
2285 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
2289 * Routine: lck_mtx_init_ext
2294 lck_mtx_ext_t
* lck_ext
,
2298 lck_attr_t
*lck_attr
;
2300 if (attr
!= LCK_ATTR_NULL
)
2303 lck_attr
= &LockDefaultLckAttr
;
2305 if ((lck_attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2306 lck_mtx_ext_init(lck_ext
, grp
, lck_attr
);
2307 lck
->lck_mtx_tag
= LCK_MTX_TAG_INDIRECT
;
2308 lck
->lck_mtx_ptr
= lck_ext
;
2309 lck
->lck_mtx_type
= LCK_MTX_TYPE
;
2311 lck
->lck_mtx_waiters
= 0;
2312 lck
->lck_mtx_pri
= 0;
2313 lck
->lck_mtx_type
= LCK_MTX_TYPE
;
2314 ordered_store_mtx(lck
, 0);
2316 lck_grp_reference(grp
);
2317 lck_grp_lckcnt_incr(grp
, LCK_TYPE_MTX
);
2321 * Routine: lck_mtx_ext_init
2325 lck_mtx_ext_t
* lck
,
2329 bzero((void *) lck
, sizeof(lck_mtx_ext_t
));
2331 lck
->lck_mtx
.lck_mtx_type
= LCK_MTX_TYPE
;
2333 if ((attr
->lck_attr_val
) & LCK_ATTR_DEBUG
) {
2334 lck
->lck_mtx_deb
.type
= MUTEX_TAG
;
2335 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_DEBUG
;
2337 lck
->lck_mtx_grp
= grp
;
2339 if (grp
->lck_grp_attr
& LCK_GRP_ATTR_STAT
)
2340 lck
->lck_mtx_attr
|= LCK_MTX_ATTR_STAT
;
2343 /* The slow versions */
2344 static void lck_mtx_lock_contended(lck_mtx_t
*lock
, thread_t thread
, boolean_t interlocked
);
2345 static boolean_t
lck_mtx_try_lock_contended(lck_mtx_t
*lock
, thread_t thread
);
2346 static void lck_mtx_unlock_contended(lck_mtx_t
*lock
, thread_t thread
, boolean_t interlocked
);
2349 * Routine: lck_mtx_verify
2351 * Verify if a mutex is valid
2354 lck_mtx_verify(lck_mtx_t
*lock
)
2356 if (lock
->lck_mtx_type
!= LCK_MTX_TYPE
)
2357 panic("Invalid mutex %p", lock
);
2358 #if DEVELOPMENT || DEBUG
2359 if (lock
->lck_mtx_tag
== LCK_MTX_TAG_DESTROYED
)
2360 panic("Mutex destroyed %p", lock
);
2361 #endif /* DEVELOPMENT || DEBUG */
2365 * Routine: lck_mtx_check_preemption
2367 * Verify preemption is enabled when attempting to acquire a mutex.
2371 lck_mtx_check_preemption(lck_mtx_t
*lock
)
2373 #if DEVELOPMENT || DEBUG
2374 int pl
= get_preemption_level();
2377 panic("Attempt to take mutex with preemption disabled. Lock=%p, level=%d", lock
, pl
);
2384 * Routine: lck_mtx_lock
2387 lck_mtx_lock(lck_mtx_t
*lock
)
2391 lck_mtx_verify(lock
);
2392 lck_mtx_check_preemption(lock
);
2393 thread
= current_thread();
2394 if (atomic_compare_exchange(&lock
->lck_mtx_data
, 0, LCK_MTX_THREAD_TO_STATE(thread
),
2395 memory_order_acquire_smp
, FALSE
)) {
2397 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE
, lock
, 0);
2398 #endif /* CONFIG_DTRACE */
2401 lck_mtx_lock_contended(lock
, thread
, FALSE
);
2405 This is the slow version of mutex locking.
2407 static void NOINLINE
2408 lck_mtx_lock_contended(lck_mtx_t
*lock
, thread_t thread
, boolean_t interlocked
)
2410 thread_t holding_thread
;
2415 goto interlock_held
;
2418 if (atomic_compare_exchange(&lock
->lck_mtx_data
, 0, LCK_MTX_THREAD_TO_STATE(thread
),
2419 memory_order_acquire_smp
, FALSE
))
2421 interlock_lock(lock
);
2423 state
= ordered_load_mtx(lock
);
2424 holding_thread
= LCK_MTX_STATE_TO_THREAD(state
);
2425 if (holding_thread
== NULL
)
2427 ordered_store_mtx(lock
, (state
| LCK_ILOCK
| ARM_LCK_WAITERS
)); // Set waiters bit and wait
2428 lck_mtx_lock_wait(lock
, holding_thread
);
2430 waiters
= lck_mtx_lock_acquire(lock
);
2431 state
= LCK_MTX_THREAD_TO_STATE(thread
);
2433 state
|= ARM_LCK_WAITERS
;
2435 state
|= LCK_ILOCK
; // Preserve interlock
2436 ordered_store_mtx(lock
, state
); // Set ownership
2437 interlock_unlock(lock
); // Release interlock, enable preemption
2439 ordered_store_mtx(lock
, state
); // Set ownership
2440 enable_preemption();
2442 load_memory_barrier();
2445 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE
, lock
, 0);
2446 #endif /* CONFIG_DTRACE */
2450 * Common code for mutex locking as spinlock
2453 lck_mtx_lock_spin_internal(lck_mtx_t
*lock
, boolean_t allow_held_as_mutex
)
2457 interlock_lock(lock
);
2458 state
= ordered_load_mtx(lock
);
2459 if (LCK_MTX_STATE_TO_THREAD(state
)) {
2460 if (allow_held_as_mutex
)
2461 lck_mtx_lock_contended(lock
, current_thread(), TRUE
);
2463 // "Always" variants can never block. If the lock is held and blocking is not allowed
2464 // then someone is mixing always and non-always calls on the same lock, which is
2466 panic("Attempting to block on a lock taken as spin-always %p", lock
);
2469 state
&= ARM_LCK_WAITERS
; // Preserve waiters bit
2470 state
|= (LCK_MTX_SPIN_TAG
| LCK_ILOCK
); // Add spin tag and maintain interlock
2471 ordered_store_mtx(lock
, state
);
2472 load_memory_barrier();
2475 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE
, lock
, 0);
2476 #endif /* CONFIG_DTRACE */
2480 * Routine: lck_mtx_lock_spin
2483 lck_mtx_lock_spin(lck_mtx_t
*lock
)
2485 lck_mtx_check_preemption(lock
);
2486 lck_mtx_lock_spin_internal(lock
, TRUE
);
2490 * Routine: lck_mtx_lock_spin_always
2493 lck_mtx_lock_spin_always(lck_mtx_t
*lock
)
2495 lck_mtx_lock_spin_internal(lock
, FALSE
);
2499 * Routine: lck_mtx_try_lock
2502 lck_mtx_try_lock(lck_mtx_t
*lock
)
2504 thread_t thread
= current_thread();
2506 lck_mtx_verify(lock
);
2507 if (atomic_compare_exchange(&lock
->lck_mtx_data
, 0, LCK_MTX_THREAD_TO_STATE(thread
),
2508 memory_order_acquire_smp
, FALSE
)) {
2510 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE
, lock
, 0);
2511 #endif /* CONFIG_DTRACE */
2514 return lck_mtx_try_lock_contended(lock
, thread
);
2517 static boolean_t NOINLINE
2518 lck_mtx_try_lock_contended(lck_mtx_t
*lock
, thread_t thread
)
2520 thread_t holding_thread
;
2525 interlock_lock(lock
);
2526 state
= ordered_load_mtx(lock
);
2527 holding_thread
= LCK_MTX_STATE_TO_THREAD(state
);
2528 if (holding_thread
) {
2529 interlock_unlock(lock
);
2533 disable_preemption_for_thread(thread
);
2534 state
= ordered_load_mtx(lock
);
2535 if (state
& LCK_ILOCK
)
2536 panic("Unexpected interlock set (%p)", lock
);
2537 holding_thread
= LCK_MTX_STATE_TO_THREAD(state
);
2538 if (holding_thread
) {
2539 enable_preemption();
2543 ordered_store_mtx(lock
, state
);
2545 waiters
= lck_mtx_lock_acquire(lock
);
2546 state
= LCK_MTX_THREAD_TO_STATE(thread
);
2548 state
|= ARM_LCK_WAITERS
;
2550 state
|= LCK_ILOCK
; // Preserve interlock
2551 ordered_store_mtx(lock
, state
); // Set ownership
2552 interlock_unlock(lock
); // Release interlock, enable preemption
2554 ordered_store_mtx(lock
, state
); // Set ownership
2555 enable_preemption();
2557 load_memory_barrier();
2561 static inline boolean_t
2562 lck_mtx_try_lock_spin_internal(lck_mtx_t
*lock
, boolean_t allow_held_as_mutex
)
2566 if (!interlock_try(lock
))
2568 state
= ordered_load_mtx(lock
);
2569 if(LCK_MTX_STATE_TO_THREAD(state
)) {
2570 // Lock is held as mutex
2571 if (allow_held_as_mutex
)
2572 interlock_unlock(lock
);
2574 // "Always" variants can never block. If the lock is held as a normal mutex
2575 // then someone is mixing always and non-always calls on the same lock, which is
2577 panic("Spin-mutex held as full mutex %p", lock
);
2580 state
&= ARM_LCK_WAITERS
; // Preserve waiters bit
2581 state
|= (LCK_MTX_SPIN_TAG
| LCK_ILOCK
); // Add spin tag and maintain interlock
2582 ordered_store_mtx(lock
, state
);
2583 load_memory_barrier();
2586 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE
, lock
, 0);
2587 #endif /* CONFIG_DTRACE */
2592 * Routine: lck_mtx_try_lock_spin
2595 lck_mtx_try_lock_spin(lck_mtx_t
*lock
)
2597 return lck_mtx_try_lock_spin_internal(lock
, TRUE
);
2601 * Routine: lck_mtx_try_lock_spin_always
2604 lck_mtx_try_lock_spin_always(lck_mtx_t
*lock
)
2606 return lck_mtx_try_lock_spin_internal(lock
, FALSE
);
2612 * Routine: lck_mtx_unlock
2615 lck_mtx_unlock(lck_mtx_t
*lock
)
2617 thread_t thread
= current_thread();
2619 boolean_t ilk_held
= FALSE
;
2621 lck_mtx_verify(lock
);
2623 state
= ordered_load_mtx(lock
);
2624 if (state
& LCK_ILOCK
) {
2625 if(LCK_MTX_STATE_TO_THREAD(state
) == (thread_t
)LCK_MTX_SPIN_TAG
)
2626 ilk_held
= TRUE
; // Interlock is held by (presumably) this thread
2629 // Locked as a mutex
2630 if (atomic_compare_exchange(&lock
->lck_mtx_data
, LCK_MTX_THREAD_TO_STATE(thread
), 0,
2631 memory_order_release_smp
, FALSE
)) {
2633 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE
, lock
, 0);
2634 #endif /* CONFIG_DTRACE */
2638 lck_mtx_unlock_contended(lock
, thread
, ilk_held
);
2641 static void NOINLINE
2642 lck_mtx_unlock_contended(lck_mtx_t
*lock
, thread_t thread
, boolean_t ilk_held
)
2647 state
= ordered_load_mtx(lock
);
2650 interlock_lock(lock
);
2651 state
= ordered_load_mtx(lock
);
2652 if (thread
!= LCK_MTX_STATE_TO_THREAD(state
))
2653 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock
);
2655 disable_preemption_for_thread(thread
);
2656 state
= ordered_load_mtx(lock
);
2657 if (state
& LCK_ILOCK
)
2658 panic("lck_mtx_unlock(): Unexpected interlock set (%p)", lock
);
2659 if (thread
!= LCK_MTX_STATE_TO_THREAD(state
))
2660 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock
);
2662 ordered_store_mtx(lock
, state
);
2665 if (state
& ARM_LCK_WAITERS
) {
2666 lck_mtx_unlock_wakeup(lock
, thread
);
2667 state
= ordered_load_mtx(lock
);
2669 assertf(lock
->lck_mtx_pri
== 0, "pri=0x%x", lock
->lck_mtx_pri
);
2671 state
&= ARM_LCK_WAITERS
; // Retain waiters bit
2674 ordered_store_mtx(lock
, state
);
2675 interlock_unlock(lock
);
2677 ordered_store_mtx(lock
, state
);
2678 enable_preemption();
2682 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE
, lock
, 0);
2683 #endif /* CONFIG_DTRACE */
2687 * Routine: lck_mtx_assert
2690 lck_mtx_assert(lck_mtx_t
*lock
, unsigned int type
)
2692 thread_t thread
, holder
;
2695 state
= ordered_load_mtx(lock
);
2696 holder
= LCK_MTX_STATE_TO_THREAD(state
);
2697 if (holder
== (thread_t
)LCK_MTX_SPIN_TAG
) {
2698 // Lock is held in spin mode, owner is unknown.
2701 thread
= current_thread();
2702 if (type
== LCK_MTX_ASSERT_OWNED
) {
2703 if (thread
!= holder
)
2704 panic("lck_mtx_assert(): mutex (%p) owned", lock
);
2705 } else if (type
== LCK_MTX_ASSERT_NOTOWNED
) {
2706 if (thread
== holder
)
2707 panic("lck_mtx_assert(): mutex (%p) not owned", lock
);
2709 panic("lck_mtx_assert(): invalid arg (%u)", type
);
2713 * Routine: lck_mtx_ilk_unlock
2716 lck_mtx_ilk_unlock(lck_mtx_t
*lock
)
2718 interlock_unlock(lock
);
2723 * Routine: lck_mtx_convert_spin
2725 * Convert a mutex held for spin into a held full mutex
2728 lck_mtx_convert_spin(lck_mtx_t
*lock
)
2730 thread_t thread
= current_thread();
2734 state
= ordered_load_mtx(lock
);
2735 if (LCK_MTX_STATE_TO_THREAD(state
) == thread
)
2736 return; // Already owned as mutex, return
2737 if ((state
& LCK_ILOCK
) == 0 || (LCK_MTX_STATE_TO_THREAD(state
) != (thread_t
)LCK_MTX_SPIN_TAG
))
2738 panic("lck_mtx_convert_spin: Not held as spinlock (%p)", lock
);
2739 state
&= ~(LCK_MTX_THREAD_MASK
); // Clear the spin tag
2740 ordered_store_mtx(lock
, state
);
2741 waiters
= lck_mtx_lock_acquire(lock
); // Acquire to manage priority boosts
2742 state
= LCK_MTX_THREAD_TO_STATE(thread
);
2744 state
|= ARM_LCK_WAITERS
;
2747 ordered_store_mtx(lock
, state
); // Set ownership
2748 interlock_unlock(lock
); // Release interlock, enable preemption
2750 ordered_store_mtx(lock
, state
); // Set ownership
2751 enable_preemption();
2757 * Routine: lck_mtx_destroy
2764 if (lck
->lck_mtx_type
!= LCK_MTX_TYPE
)
2765 panic("Destroying invalid mutex %p", lck
);
2766 if (lck
->lck_mtx_tag
== LCK_MTX_TAG_DESTROYED
)
2767 panic("Destroying previously destroyed lock %p", lck
);
2768 lck_mtx_assert(lck
, LCK_MTX_ASSERT_NOTOWNED
);
2769 lck
->lck_mtx_tag
= LCK_MTX_TAG_DESTROYED
;
2770 lck_grp_lckcnt_decr(grp
, LCK_TYPE_MTX
);
2771 lck_grp_deallocate(grp
);
2776 * Routine: lck_spin_assert
2779 lck_spin_assert(lck_spin_t
*lock
, unsigned int type
)
2781 thread_t thread
, holder
;
2784 if (lock
->type
!= LCK_SPIN_TYPE
)
2785 panic("Invalid spinlock %p", lock
);
2787 state
= lock
->lck_spin_data
;
2788 holder
= (thread_t
)(state
& ~LCK_ILOCK
);
2789 thread
= current_thread();
2790 if (type
== LCK_ASSERT_OWNED
) {
2792 panic("Lock not owned %p = %lx", lock
, state
);
2793 if (holder
!= thread
)
2794 panic("Lock not owned by current thread %p = %lx", lock
, state
);
2795 if ((state
& LCK_ILOCK
) == 0)
2796 panic("Lock bit not set %p = %lx", lock
, state
);
2797 } else if (type
== LCK_ASSERT_NOTOWNED
) {
2799 if (holder
== thread
)
2800 panic("Lock owned by current thread %p = %lx", lock
, state
);
2802 panic("Lock %p owned by thread %p", lock
, holder
);
2804 if (state
& LCK_ILOCK
)
2805 panic("Lock bit set %p = %lx", lock
, state
);
2807 panic("lck_spin_assert(): invalid arg (%u)", type
);
2811 lck_rw_lock_yield_shared(lck_rw_t
*lck
, boolean_t force_yield
)
2815 lck_rw_assert(lck
, LCK_RW_ASSERT_SHARED
);
2817 word
.data
= ordered_load_rw(lck
);
2818 if (word
.want_excl
|| word
.want_upgrade
|| force_yield
) {
2819 lck_rw_unlock_shared(lck
);
2821 lck_rw_lock_shared(lck
);
2829 * Routine: kdp_lck_mtx_lock_spin_is_acquired
2830 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2833 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t
*lck
)
2838 panic("panic: spinlock acquired check done outside of kernel debugger");
2840 state
= ordered_load_mtx(lck
);
2841 if (state
== LCK_MTX_TAG_DESTROYED
)
2843 if (LCK_MTX_STATE_TO_THREAD(state
) || (state
& LCK_ILOCK
))
2849 kdp_lck_mtx_find_owner(__unused
struct waitq
* waitq
, event64_t event
, thread_waitinfo_t
* waitinfo
)
2851 lck_mtx_t
* mutex
= LCK_EVENT_TO_MUTEX(event
);
2852 waitinfo
->context
= VM_KERNEL_UNSLIDE_OR_PERM(mutex
);
2853 uintptr_t state
= ordered_load_mtx(mutex
);
2854 thread_t holder
= LCK_MTX_STATE_TO_THREAD(state
);
2855 if ((uintptr_t)holder
== (uintptr_t)LCK_MTX_SPIN_TAG
) {
2856 waitinfo
->owner
= STACKSHOT_WAITOWNER_MTXSPIN
;
2858 assertf(state
!= (uintptr_t)LCK_MTX_TAG_DESTROYED
, "state=0x%llx", (uint64_t)state
);
2859 assertf(state
!= (uintptr_t)LCK_MTX_TAG_INDIRECT
, "state=0x%llx", (uint64_t)state
);
2860 waitinfo
->owner
= thread_tid(holder
);
2865 kdp_rwlck_find_owner(__unused
struct waitq
* waitq
, event64_t event
, thread_waitinfo_t
* waitinfo
)
2867 lck_rw_t
*rwlck
= NULL
;
2868 switch(waitinfo
->wait_type
) {
2869 case kThreadWaitKernelRWLockRead
:
2870 rwlck
= READ_EVENT_TO_RWLOCK(event
);
2872 case kThreadWaitKernelRWLockWrite
:
2873 case kThreadWaitKernelRWLockUpgrade
:
2874 rwlck
= WRITE_EVENT_TO_RWLOCK(event
);
2877 panic("%s was called with an invalid blocking type", __FUNCTION__
);
2880 waitinfo
->context
= VM_KERNEL_UNSLIDE_OR_PERM(rwlck
);
2881 waitinfo
->owner
= thread_tid(rwlck
->lck_rw_owner
);