]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm/locks_arm.c
xnu-6153.61.1.tar.gz
[apple/xnu.git] / osfmk / arm / locks_arm.c
1 /*
2 * Copyright (c) 2007-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
33 * Mellon University All Rights Reserved.
34 *
35 * Permission to use, copy, modify and distribute this software and its
36 * documentation is hereby granted, provided that both the copyright notice
37 * and this permission notice appear in all copies of the software,
38 * derivative works or modified versions, and any portions thereof, and that
39 * both notices appear in supporting documentation.
40 *
41 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
42 * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
43 * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
44 *
45 * Carnegie Mellon requests users of this software to return to
46 *
47 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
48 * School of Computer Science Carnegie Mellon University Pittsburgh PA
49 * 15213-3890
50 *
51 * any improvements or extensions that they make and grant Carnegie Mellon the
52 * rights to redistribute these changes.
53 */
54 /*
55 * File: kern/lock.c
56 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * Date: 1985
58 *
59 * Locking primitives implementation
60 */
61
62 #define LOCK_PRIVATE 1
63
64 #include <mach_ldebug.h>
65
66 #include <kern/kalloc.h>
67 #include <kern/lock_stat.h>
68 #include <kern/locks.h>
69 #include <kern/misc_protos.h>
70 #include <kern/thread.h>
71 #include <kern/processor.h>
72 #include <kern/sched_prim.h>
73 #include <kern/debug.h>
74 #include <kern/kcdata.h>
75 #include <string.h>
76
77 #include <arm/cpu_data_internal.h>
78 #include <arm/proc_reg.h>
79 #include <arm/smp.h>
80 #include <machine/atomic.h>
81 #include <machine/machine_cpu.h>
82
83 #include <sys/kdebug.h>
84
85 #if CONFIG_DTRACE
86 #define DTRACE_RW_SHARED 0x0 //reader
87 #define DTRACE_RW_EXCL 0x1 //writer
88 #define DTRACE_NO_FLAG 0x0 //not applicable
89 #endif /* CONFIG_DTRACE */
90
91 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
92 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
93 #define LCK_RW_LCK_SHARED_CODE 0x102
94 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
95 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
96 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
97
98
99 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
100
101 // Panic in tests that check lock usage correctness
102 // These are undesirable when in a panic or a debugger is runnning.
103 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
104
105 unsigned int LcksOpts = 0;
106
107 #define ADAPTIVE_SPIN_ENABLE 0x1
108
109 #if __SMP__
110 int lck_mtx_adaptive_spin_mode = ADAPTIVE_SPIN_ENABLE;
111 #else /* __SMP__ */
112 int lck_mtx_adaptive_spin_mode = 0;
113 #endif /* __SMP__ */
114
115 #define SPINWAIT_OWNER_CHECK_COUNT 4
116
117 typedef enum {
118 SPINWAIT_ACQUIRED, /* Got the lock. */
119 SPINWAIT_INTERLOCK, /* Got the interlock, no owner, but caller must finish acquiring the lock. */
120 SPINWAIT_DID_SPIN, /* Got the interlock, spun, but failed to get the lock. */
121 SPINWAIT_DID_NOT_SPIN, /* Got the interlock, did not spin. */
122 } spinwait_result_t;
123
124 #if CONFIG_DTRACE && __SMP__
125 extern uint64_t dtrace_spin_threshold;
126 #endif
127
128 /* Forwards */
129
130 extern unsigned int not_in_kdp;
131
132 /*
133 * We often want to know the addresses of the callers
134 * of the various lock routines. However, this information
135 * is only used for debugging and statistics.
136 */
137 typedef void *pc_t;
138 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
139 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
140
141 #ifdef lint
142 /*
143 * Eliminate lint complaints about unused local pc variables.
144 */
145 #define OBTAIN_PC(pc, l) ++pc
146 #else /* lint */
147 #define OBTAIN_PC(pc, l)
148 #endif /* lint */
149
150
151 /*
152 * Portable lock package implementation of usimple_locks.
153 */
154
155 /*
156 * Owner thread pointer when lock held in spin mode
157 */
158 #define LCK_MTX_SPIN_TAG 0xfffffff0
159
160
161 #define interlock_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
162 #define interlock_try(lock) hw_lock_bit_try((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
163 #define interlock_unlock(lock) hw_unlock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
164 #define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT, LCK_GRP_NULL)
165 #define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
166
167 #define load_memory_barrier() os_atomic_thread_fence(acquire)
168
169 // Enforce program order of loads and stores.
170 #define ordered_load(target) \
171 os_atomic_load(target, compiler_acq_rel)
172 #define ordered_store(target, value) \
173 os_atomic_store(target, value, compiler_acq_rel)
174
175 #define ordered_load_mtx(lock) ordered_load(&(lock)->lck_mtx_data)
176 #define ordered_store_mtx(lock, value) ordered_store(&(lock)->lck_mtx_data, (value))
177 #define ordered_load_rw(lock) ordered_load(&(lock)->lck_rw_data)
178 #define ordered_store_rw(lock, value) ordered_store(&(lock)->lck_rw_data, (value))
179 #define ordered_load_rw_owner(lock) ordered_load(&(lock)->lck_rw_owner)
180 #define ordered_store_rw_owner(lock, value) ordered_store(&(lock)->lck_rw_owner, (value))
181 #define ordered_load_hw(lock) ordered_load(&(lock)->lock_data)
182 #define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, (value))
183 #define ordered_load_bit(lock) ordered_load((lock))
184 #define ordered_store_bit(lock, value) ordered_store((lock), (value))
185
186
187 // Prevent the compiler from reordering memory operations around this
188 #define compiler_memory_fence() __asm__ volatile ("" ::: "memory")
189
190 #define LOCK_PANIC_TIMEOUT 0xc00000
191 #define NOINLINE __attribute__((noinline))
192
193
194 #if __arm__
195 #define interrupts_disabled(mask) (mask & PSR_INTMASK)
196 #else
197 #define interrupts_disabled(mask) (mask & DAIF_IRQF)
198 #endif
199
200
201 #if __arm__
202 #define enable_fiq() __asm__ volatile ("cpsie f" ::: "memory");
203 #define enable_interrupts() __asm__ volatile ("cpsie if" ::: "memory");
204 #endif
205
206 /*
207 * Forward declarations
208 */
209
210 static void lck_rw_lock_shared_gen(lck_rw_t *lck);
211 static void lck_rw_lock_exclusive_gen(lck_rw_t *lck);
212 static boolean_t lck_rw_lock_shared_to_exclusive_success(lck_rw_t *lck);
213 static boolean_t lck_rw_lock_shared_to_exclusive_failure(lck_rw_t *lck, uint32_t prior_lock_state);
214 static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t *lck, uint32_t prior_lock_state);
215 static lck_rw_type_t lck_rw_done_gen(lck_rw_t *lck, uint32_t prior_lock_state);
216 static boolean_t lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait);
217
218 /*
219 * atomic exchange API is a low level abstraction of the operations
220 * to atomically read, modify, and write a pointer. This abstraction works
221 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
222 * well as the ARM exclusive instructions.
223 *
224 * atomic_exchange_begin() - begin exchange and retrieve current value
225 * atomic_exchange_complete() - conclude an exchange
226 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
227 */
228 __unused static uint32_t
229 load_exclusive32(uint32_t *target, enum memory_order ord)
230 {
231 uint32_t value;
232
233 #if __arm__
234 if (memory_order_has_release(ord)) {
235 // Pre-load release barrier
236 atomic_thread_fence(memory_order_release);
237 }
238 value = __builtin_arm_ldrex(target);
239 #else
240 if (memory_order_has_acquire(ord)) {
241 value = __builtin_arm_ldaex(target); // ldaxr
242 } else {
243 value = __builtin_arm_ldrex(target); // ldxr
244 }
245 #endif // __arm__
246 return value;
247 }
248
249 __unused static boolean_t
250 store_exclusive32(uint32_t *target, uint32_t value, enum memory_order ord)
251 {
252 boolean_t err;
253
254 #if __arm__
255 err = __builtin_arm_strex(value, target);
256 if (memory_order_has_acquire(ord)) {
257 // Post-store acquire barrier
258 atomic_thread_fence(memory_order_acquire);
259 }
260 #else
261 if (memory_order_has_release(ord)) {
262 err = __builtin_arm_stlex(value, target); // stlxr
263 } else {
264 err = __builtin_arm_strex(value, target); // stxr
265 }
266 #endif // __arm__
267 return !err;
268 }
269
270 static uint32_t
271 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
272 {
273 uint32_t val;
274
275 #if __ARM_ATOMICS_8_1
276 ord = memory_order_relaxed;
277 #endif
278 val = load_exclusive32(target, ord);
279 *previous = val;
280 return val;
281 }
282
283 static boolean_t
284 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
285 {
286 #if __ARM_ATOMICS_8_1
287 return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
288 #else
289 (void)previous; // Previous not needed, monitor is held
290 return store_exclusive32(target, newval, ord);
291 #endif
292 }
293
294 static void
295 atomic_exchange_abort(void)
296 {
297 os_atomic_clear_exclusive();
298 }
299
300 static boolean_t
301 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
302 {
303 uint32_t value, prev;
304
305 for (;;) {
306 value = atomic_exchange_begin32(target, &prev, ord);
307 if (value & test_mask) {
308 if (wait) {
309 wait_for_event(); // Wait with monitor held
310 } else {
311 atomic_exchange_abort(); // Clear exclusive monitor
312 }
313 return FALSE;
314 }
315 value |= set_mask;
316 if (atomic_exchange_complete32(target, prev, value, ord)) {
317 return TRUE;
318 }
319 }
320 }
321
322 inline boolean_t
323 hw_atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
324 {
325 return atomic_test_and_set32(target, test_mask, set_mask, ord, wait);
326 }
327
328 void
329 _disable_preemption(void)
330 {
331 thread_t thread = current_thread();
332 unsigned int count = thread->machine.preemption_count;
333
334 count += 1;
335 if (__improbable(count == 0)) {
336 panic("Preemption count overflow");
337 }
338
339 os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
340 }
341
342 /*
343 * This function checks whether an AST_URGENT has been pended.
344 *
345 * It is called once the preemption has been reenabled, which means the thread
346 * may have been preempted right before this was called, and when this function
347 * actually performs the check, we've changed CPU.
348 *
349 * This race is however benign: the point of AST_URGENT is to trigger a context
350 * switch, so if one happened, there's nothing left to check for, and AST_URGENT
351 * was cleared in the process.
352 *
353 * It follows that this check cannot have false negatives, which allows us
354 * to avoid fiddling with interrupt state for the vast majority of cases
355 * when the check will actually be negative.
356 */
357 static NOINLINE void
358 kernel_preempt_check(thread_t thread)
359 {
360 cpu_data_t *cpu_data_ptr;
361 long state;
362
363 #if __arm__
364 #define INTERRUPT_MASK PSR_IRQF
365 #else // __arm__
366 #define INTERRUPT_MASK DAIF_IRQF
367 #endif // __arm__
368
369 /*
370 * This check is racy and could load from another CPU's pending_ast mask,
371 * but as described above, this can't have false negatives.
372 */
373 cpu_data_ptr = os_atomic_load(&thread->machine.CpuDatap, compiler_acq_rel);
374 if (__probable((cpu_data_ptr->cpu_pending_ast & AST_URGENT) == 0)) {
375 return;
376 }
377
378 /* If interrupts are masked, we can't take an AST here */
379 state = get_interrupts();
380 if ((state & INTERRUPT_MASK) == 0) {
381 disable_interrupts_noread(); // Disable interrupts
382
383 /*
384 * Reload cpu_data_ptr: a context switch would cause it to change.
385 * Now that interrupts are disabled, this will debounce false positives.
386 */
387 cpu_data_ptr = os_atomic_load(&thread->machine.CpuDatap, compiler_acq_rel);
388 if (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
389 #if __arm__
390 #if __ARM_USER_PROTECT__
391 uintptr_t up = arm_user_protect_begin(thread);
392 #endif // __ARM_USER_PROTECT__
393 enable_fiq();
394 #endif // __arm__
395 ast_taken_kernel(); // Handle urgent AST
396 #if __arm__
397 #if __ARM_USER_PROTECT__
398 arm_user_protect_end(thread, up, TRUE);
399 #endif // __ARM_USER_PROTECT__
400 enable_interrupts();
401 return; // Return early on arm only due to FIQ enabling
402 #endif // __arm__
403 }
404 restore_interrupts(state); // Enable interrupts
405 }
406 }
407
408 void
409 _enable_preemption(void)
410 {
411 thread_t thread = current_thread();
412 unsigned int count = thread->machine.preemption_count;
413
414 if (__improbable(count == 0)) {
415 panic("Preemption count underflow");
416 }
417 count -= 1;
418
419 os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
420 if (count == 0) {
421 kernel_preempt_check(thread);
422 }
423 }
424
425 int
426 get_preemption_level(void)
427 {
428 return current_thread()->machine.preemption_count;
429 }
430
431 #if __SMP__
432 static inline boolean_t
433 interlock_try_disable_interrupts(
434 lck_mtx_t *mutex,
435 boolean_t *istate)
436 {
437 *istate = ml_set_interrupts_enabled(FALSE);
438
439 if (interlock_try(mutex)) {
440 return 1;
441 } else {
442 ml_set_interrupts_enabled(*istate);
443 return 0;
444 }
445 }
446
447 static inline void
448 interlock_unlock_enable_interrupts(
449 lck_mtx_t *mutex,
450 boolean_t istate)
451 {
452 interlock_unlock(mutex);
453 ml_set_interrupts_enabled(istate);
454 }
455 #endif /* __SMP__ */
456
457 /*
458 * Routine: lck_spin_alloc_init
459 */
460 lck_spin_t *
461 lck_spin_alloc_init(
462 lck_grp_t * grp,
463 lck_attr_t * attr)
464 {
465 lck_spin_t *lck;
466
467 if ((lck = (lck_spin_t *) kalloc(sizeof(lck_spin_t))) != 0) {
468 lck_spin_init(lck, grp, attr);
469 }
470
471 return lck;
472 }
473
474 /*
475 * Routine: lck_spin_free
476 */
477 void
478 lck_spin_free(
479 lck_spin_t * lck,
480 lck_grp_t * grp)
481 {
482 lck_spin_destroy(lck, grp);
483 kfree(lck, sizeof(lck_spin_t));
484 }
485
486 /*
487 * Routine: lck_spin_init
488 */
489 void
490 lck_spin_init(
491 lck_spin_t * lck,
492 lck_grp_t * grp,
493 __unused lck_attr_t * attr)
494 {
495 lck->type = LCK_SPIN_TYPE;
496 hw_lock_init(&lck->hwlock);
497 if (grp) {
498 lck_grp_reference(grp);
499 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
500 }
501 }
502
503 /*
504 * arm_usimple_lock is a lck_spin_t without a group or attributes
505 */
506 void inline
507 arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
508 {
509 lck->type = LCK_SPIN_TYPE;
510 hw_lock_init(&lck->hwlock);
511 }
512
513
514 /*
515 * Routine: lck_spin_lock
516 */
517 void
518 lck_spin_lock(lck_spin_t *lock)
519 {
520 #if DEVELOPMENT || DEBUG
521 if (lock->type != LCK_SPIN_TYPE) {
522 panic("Invalid spinlock %p", lock);
523 }
524 #endif // DEVELOPMENT || DEBUG
525 hw_lock_lock(&lock->hwlock, LCK_GRP_NULL);
526 }
527
528 void
529 lck_spin_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
530 {
531 #pragma unused(grp)
532 #if DEVELOPMENT || DEBUG
533 if (lock->type != LCK_SPIN_TYPE) {
534 panic("Invalid spinlock %p", lock);
535 }
536 #endif // DEVELOPMENT || DEBUG
537 hw_lock_lock(&lock->hwlock, grp);
538 }
539
540 /*
541 * Routine: lck_spin_lock_nopreempt
542 */
543 void
544 lck_spin_lock_nopreempt(lck_spin_t *lock)
545 {
546 #if DEVELOPMENT || DEBUG
547 if (lock->type != LCK_SPIN_TYPE) {
548 panic("Invalid spinlock %p", lock);
549 }
550 #endif // DEVELOPMENT || DEBUG
551 hw_lock_lock_nopreempt(&lock->hwlock, LCK_GRP_NULL);
552 }
553
554 void
555 lck_spin_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
556 {
557 #pragma unused(grp)
558 #if DEVELOPMENT || DEBUG
559 if (lock->type != LCK_SPIN_TYPE) {
560 panic("Invalid spinlock %p", lock);
561 }
562 #endif // DEVELOPMENT || DEBUG
563 hw_lock_lock_nopreempt(&lock->hwlock, grp);
564 }
565
566 /*
567 * Routine: lck_spin_try_lock
568 */
569 int
570 lck_spin_try_lock(lck_spin_t *lock)
571 {
572 return hw_lock_try(&lock->hwlock, LCK_GRP_NULL);
573 }
574
575 int
576 lck_spin_try_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
577 {
578 #pragma unused(grp)
579 return hw_lock_try(&lock->hwlock, grp);
580 }
581
582 /*
583 * Routine: lck_spin_try_lock_nopreempt
584 */
585 int
586 lck_spin_try_lock_nopreempt(lck_spin_t *lock)
587 {
588 return hw_lock_try_nopreempt(&lock->hwlock, LCK_GRP_NULL);
589 }
590
591 int
592 lck_spin_try_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
593 {
594 #pragma unused(grp)
595 return hw_lock_try_nopreempt(&lock->hwlock, grp);
596 }
597
598 /*
599 * Routine: lck_spin_unlock
600 */
601 void
602 lck_spin_unlock(lck_spin_t *lock)
603 {
604 #if DEVELOPMENT || DEBUG
605 if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
606 panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
607 }
608 if (lock->type != LCK_SPIN_TYPE) {
609 panic("Invalid spinlock type %p", lock);
610 }
611 #endif // DEVELOPMENT || DEBUG
612 hw_lock_unlock(&lock->hwlock);
613 }
614
615 /*
616 * Routine: lck_spin_unlock_nopreempt
617 */
618 void
619 lck_spin_unlock_nopreempt(lck_spin_t *lock)
620 {
621 #if DEVELOPMENT || DEBUG
622 if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
623 panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
624 }
625 if (lock->type != LCK_SPIN_TYPE) {
626 panic("Invalid spinlock type %p", lock);
627 }
628 #endif // DEVELOPMENT || DEBUG
629 hw_lock_unlock_nopreempt(&lock->hwlock);
630 }
631
632 /*
633 * Routine: lck_spin_destroy
634 */
635 void
636 lck_spin_destroy(
637 lck_spin_t * lck,
638 lck_grp_t * grp)
639 {
640 if (lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED) {
641 return;
642 }
643 lck->lck_spin_data = LCK_SPIN_TAG_DESTROYED;
644 if (grp) {
645 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
646 lck_grp_deallocate(grp);
647 }
648 }
649
650 /*
651 * Routine: kdp_lck_spin_is_acquired
652 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
653 */
654 boolean_t
655 kdp_lck_spin_is_acquired(lck_spin_t *lck)
656 {
657 if (not_in_kdp) {
658 panic("panic: spinlock acquired check done outside of kernel debugger");
659 }
660 return ((lck->lck_spin_data & ~LCK_SPIN_TAG_DESTROYED) != 0) ? TRUE:FALSE;
661 }
662
663 /*
664 * Initialize a usimple_lock.
665 *
666 * No change in preemption state.
667 */
668 void
669 usimple_lock_init(
670 usimple_lock_t l,
671 unsigned short tag)
672 {
673 simple_lock_init((simple_lock_t) l, tag);
674 }
675
676
677 /*
678 * Acquire a usimple_lock.
679 *
680 * Returns with preemption disabled. Note
681 * that the hw_lock routines are responsible for
682 * maintaining preemption state.
683 */
684 void
685 (usimple_lock)(
686 usimple_lock_t l
687 LCK_GRP_ARG(lck_grp_t *grp))
688 {
689 simple_lock((simple_lock_t) l, LCK_GRP_PROBEARG(grp));
690 }
691
692
693 extern void sync(void);
694
695 /*
696 * Release a usimple_lock.
697 *
698 * Returns with preemption enabled. Note
699 * that the hw_lock routines are responsible for
700 * maintaining preemption state.
701 */
702 void
703 (usimple_unlock)(
704 usimple_lock_t l)
705 {
706 simple_unlock((simple_lock_t)l);
707 }
708
709
710 /*
711 * Conditionally acquire a usimple_lock.
712 *
713 * On success, returns with preemption disabled.
714 * On failure, returns with preemption in the same state
715 * as when first invoked. Note that the hw_lock routines
716 * are responsible for maintaining preemption state.
717 *
718 * XXX No stats are gathered on a miss; I preserved this
719 * behavior from the original assembly-language code, but
720 * doesn't it make sense to log misses? XXX
721 */
722 unsigned
723 int
724 (usimple_lock_try)(
725 usimple_lock_t l
726 LCK_GRP_ARG(lck_grp_t *grp))
727 {
728 return simple_lock_try((simple_lock_t) l, grp);
729 }
730
731 /*
732 * The C portion of the shared/exclusive locks package.
733 */
734
735 /*
736 * compute the deadline to spin against when
737 * waiting for a change of state on a lck_rw_t
738 */
739 #if __SMP__
740 static inline uint64_t
741 lck_rw_deadline_for_spin(lck_rw_t *lck)
742 {
743 lck_rw_word_t word;
744
745 word.data = ordered_load_rw(lck);
746 if (word.can_sleep) {
747 if (word.r_waiting || word.w_waiting || (word.shared_count > machine_info.max_cpus)) {
748 /*
749 * there are already threads waiting on this lock... this
750 * implies that they have spun beyond their deadlines waiting for
751 * the desired state to show up so we will not bother spinning at this time...
752 * or
753 * the current number of threads sharing this lock exceeds our capacity to run them
754 * concurrently and since all states we're going to spin for require the rw_shared_count
755 * to be at 0, we'll not bother spinning since the latency for this to happen is
756 * unpredictable...
757 */
758 return mach_absolute_time();
759 }
760 return mach_absolute_time() + MutexSpin;
761 } else {
762 return mach_absolute_time() + (100000LL * 1000000000LL);
763 }
764 }
765 #endif // __SMP__
766
767 static boolean_t
768 lck_rw_drain_status(lck_rw_t *lock, uint32_t status_mask, boolean_t wait __unused)
769 {
770 #if __SMP__
771 uint64_t deadline = 0;
772 uint32_t data;
773
774 if (wait) {
775 deadline = lck_rw_deadline_for_spin(lock);
776 }
777
778 for (;;) {
779 data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp);
780 if ((data & status_mask) == 0) {
781 break;
782 }
783 if (wait) {
784 wait_for_event();
785 } else {
786 os_atomic_clear_exclusive();
787 }
788 if (!wait || (mach_absolute_time() >= deadline)) {
789 return FALSE;
790 }
791 }
792 os_atomic_clear_exclusive();
793 return TRUE;
794 #else
795 uint32_t data;
796
797 data = ordered_load_rw(lock);
798 if ((data & status_mask) == 0) {
799 return TRUE;
800 } else {
801 return FALSE;
802 }
803 #endif // __SMP__
804 }
805
806 /*
807 * Spin while interlock is held.
808 */
809 static inline void
810 lck_rw_interlock_spin(lck_rw_t *lock)
811 {
812 #if __SMP__
813 uint32_t data;
814
815 for (;;) {
816 data = load_exclusive32(&lock->lck_rw_data, memory_order_relaxed);
817 if (data & LCK_RW_INTERLOCK) {
818 wait_for_event();
819 } else {
820 os_atomic_clear_exclusive();
821 return;
822 }
823 }
824 #else
825 panic("lck_rw_interlock_spin(): Interlock locked %p %x", lock, lock->lck_rw_data);
826 #endif
827 }
828
829 /*
830 * We disable interrupts while holding the RW interlock to prevent an
831 * interrupt from exacerbating hold time.
832 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
833 */
834 static inline boolean_t
835 lck_interlock_lock(lck_rw_t *lck)
836 {
837 boolean_t istate;
838
839 istate = ml_set_interrupts_enabled(FALSE);
840 lck_rw_ilk_lock(lck);
841 return istate;
842 }
843
844 static inline void
845 lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
846 {
847 lck_rw_ilk_unlock(lck);
848 ml_set_interrupts_enabled(istate);
849 }
850
851
852 #define LCK_RW_GRAB_WANT 0
853 #define LCK_RW_GRAB_SHARED 1
854
855 static boolean_t
856 lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait)
857 {
858 uint64_t deadline = 0;
859 uint32_t data, prev;
860 boolean_t do_exch;
861
862 #if __SMP__
863 if (wait) {
864 deadline = lck_rw_deadline_for_spin(lock);
865 }
866 #else
867 wait = FALSE; // Don't spin on UP systems
868 #endif
869
870 for (;;) {
871 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
872 if (data & LCK_RW_INTERLOCK) {
873 atomic_exchange_abort();
874 lck_rw_interlock_spin(lock);
875 continue;
876 }
877 do_exch = FALSE;
878 if (mode == LCK_RW_GRAB_WANT) {
879 if ((data & LCK_RW_WANT_EXCL) == 0) {
880 data |= LCK_RW_WANT_EXCL;
881 do_exch = TRUE;
882 }
883 } else { // LCK_RW_GRAB_SHARED
884 if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) ||
885 (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
886 data += LCK_RW_SHARED_READER;
887 do_exch = TRUE;
888 }
889 }
890 if (do_exch) {
891 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
892 return TRUE;
893 }
894 } else {
895 if (wait) { // Non-waiting
896 wait_for_event();
897 } else {
898 atomic_exchange_abort();
899 }
900 if (!wait || (mach_absolute_time() >= deadline)) {
901 return FALSE;
902 }
903 }
904 }
905 }
906
907
908 /*
909 * Routine: lck_rw_alloc_init
910 */
911 lck_rw_t *
912 lck_rw_alloc_init(
913 lck_grp_t *grp,
914 lck_attr_t *attr)
915 {
916 lck_rw_t *lck;
917
918 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) {
919 lck_rw_init(lck, grp, attr);
920 }
921
922 return lck;
923 }
924
925 /*
926 * Routine: lck_rw_free
927 */
928 void
929 lck_rw_free(
930 lck_rw_t *lck,
931 lck_grp_t *grp)
932 {
933 lck_rw_destroy(lck, grp);
934 kfree(lck, sizeof(lck_rw_t));
935 }
936
937 /*
938 * Routine: lck_rw_init
939 */
940 void
941 lck_rw_init(
942 lck_rw_t *lck,
943 lck_grp_t *grp,
944 lck_attr_t *attr)
945 {
946 if (attr == LCK_ATTR_NULL) {
947 attr = &LockDefaultLckAttr;
948 }
949 memset(lck, 0, sizeof(lck_rw_t));
950 lck->lck_rw_can_sleep = TRUE;
951 if ((attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY) == 0) {
952 lck->lck_rw_priv_excl = TRUE;
953 }
954
955 lck_grp_reference(grp);
956 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
957 }
958
959
960 /*
961 * Routine: lck_rw_destroy
962 */
963 void
964 lck_rw_destroy(
965 lck_rw_t *lck,
966 lck_grp_t *grp)
967 {
968 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) {
969 return;
970 }
971 #if MACH_LDEBUG
972 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
973 #endif
974 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
975 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
976 lck_grp_deallocate(grp);
977 return;
978 }
979
980 /*
981 * Routine: lck_rw_lock
982 */
983 void
984 lck_rw_lock(
985 lck_rw_t *lck,
986 lck_rw_type_t lck_rw_type)
987 {
988 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
989 lck_rw_lock_shared(lck);
990 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
991 lck_rw_lock_exclusive(lck);
992 } else {
993 panic("lck_rw_lock(): Invalid RW lock type: %x", lck_rw_type);
994 }
995 }
996
997 /*
998 * Routine: lck_rw_lock_exclusive
999 */
1000 void
1001 lck_rw_lock_exclusive(lck_rw_t *lock)
1002 {
1003 thread_t thread = current_thread();
1004
1005 thread->rwlock_count++;
1006 if (atomic_test_and_set32(&lock->lck_rw_data,
1007 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
1008 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
1009 #if CONFIG_DTRACE
1010 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1011 #endif /* CONFIG_DTRACE */
1012 } else {
1013 lck_rw_lock_exclusive_gen(lock);
1014 }
1015 #if MACH_ASSERT
1016 thread_t owner = ordered_load_rw_owner(lock);
1017 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1018 #endif
1019 ordered_store_rw_owner(lock, thread);
1020 }
1021
1022 /*
1023 * Routine: lck_rw_lock_shared
1024 */
1025 void
1026 lck_rw_lock_shared(lck_rw_t *lock)
1027 {
1028 uint32_t data, prev;
1029
1030 current_thread()->rwlock_count++;
1031 for (;;) {
1032 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1033 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
1034 atomic_exchange_abort();
1035 lck_rw_lock_shared_gen(lock);
1036 break;
1037 }
1038 data += LCK_RW_SHARED_READER;
1039 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1040 break;
1041 }
1042 cpu_pause();
1043 }
1044 #if MACH_ASSERT
1045 thread_t owner = ordered_load_rw_owner(lock);
1046 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1047 #endif
1048 #if CONFIG_DTRACE
1049 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1050 #endif /* CONFIG_DTRACE */
1051 return;
1052 }
1053
1054 /*
1055 * Routine: lck_rw_lock_shared_to_exclusive
1056 *
1057 * False returned upon failure, in this case the shared lock is dropped.
1058 */
1059 boolean_t
1060 lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
1061 {
1062 uint32_t data, prev;
1063
1064 for (;;) {
1065 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1066 if (data & LCK_RW_INTERLOCK) {
1067 atomic_exchange_abort();
1068 lck_rw_interlock_spin(lock);
1069 continue;
1070 }
1071 if (data & LCK_RW_WANT_UPGRADE) {
1072 data -= LCK_RW_SHARED_READER;
1073 if ((data & LCK_RW_SHARED_MASK) == 0) { /* we were the last reader */
1074 data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
1075 }
1076 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1077 return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
1078 }
1079 } else {
1080 data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
1081 data -= LCK_RW_SHARED_READER; /* and shed our read count */
1082 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1083 break;
1084 }
1085 }
1086 cpu_pause();
1087 }
1088 /* we now own the WANT_UPGRADE */
1089 if (data & LCK_RW_SHARED_MASK) { /* check to see if all of the readers are drained */
1090 lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
1091 }
1092 #if MACH_ASSERT
1093 thread_t owner = ordered_load_rw_owner(lock);
1094 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1095 #endif
1096 ordered_store_rw_owner(lock, current_thread());
1097 #if CONFIG_DTRACE
1098 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
1099 #endif /* CONFIG_DTRACE */
1100 return TRUE;
1101 }
1102
1103
1104 /*
1105 * Routine: lck_rw_lock_shared_to_exclusive_failure
1106 * Function:
1107 * Fast path code has already dropped our read
1108 * count and determined that someone else owns 'lck_rw_want_upgrade'
1109 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1110 * all we need to do here is determine if a wakeup is needed
1111 */
1112 static boolean_t
1113 lck_rw_lock_shared_to_exclusive_failure(
1114 lck_rw_t *lck,
1115 uint32_t prior_lock_state)
1116 {
1117 thread_t thread = current_thread();
1118 uint32_t rwlock_count;
1119
1120 /* Check if dropping the lock means that we need to unpromote */
1121 rwlock_count = thread->rwlock_count--;
1122 #if MACH_LDEBUG
1123 if (rwlock_count == 0) {
1124 panic("rw lock count underflow for thread %p", thread);
1125 }
1126 #endif
1127 if ((prior_lock_state & LCK_RW_W_WAITING) &&
1128 ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
1129 /*
1130 * Someone else has requested upgrade.
1131 * Since we've released the read lock, wake
1132 * him up if he's blocked waiting
1133 */
1134 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1135 }
1136
1137 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1138 /* sched_flags checked without lock, but will be rechecked while clearing */
1139 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
1140 }
1141
1142 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1143 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1144
1145 return FALSE;
1146 }
1147
1148 /*
1149 * Routine: lck_rw_lock_shared_to_exclusive_success
1150 * Function:
1151 * assembly fast path code has already dropped our read
1152 * count and successfully acquired 'lck_rw_want_upgrade'
1153 * we just need to wait for the rest of the readers to drain
1154 * and then we can return as the exclusive holder of this lock
1155 */
1156 static boolean_t
1157 lck_rw_lock_shared_to_exclusive_success(
1158 lck_rw_t *lock)
1159 {
1160 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1161 int slept = 0;
1162 lck_rw_word_t word;
1163 wait_result_t res;
1164 boolean_t istate;
1165 boolean_t not_shared;
1166
1167 #if CONFIG_DTRACE
1168 uint64_t wait_interval = 0;
1169 int readers_at_sleep = 0;
1170 boolean_t dtrace_ls_initialized = FALSE;
1171 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1172 #endif
1173
1174 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE)) {
1175 word.data = ordered_load_rw(lock);
1176 #if CONFIG_DTRACE
1177 if (dtrace_ls_initialized == FALSE) {
1178 dtrace_ls_initialized = TRUE;
1179 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1180 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1181 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1182 if (dtrace_ls_enabled) {
1183 /*
1184 * Either sleeping or spinning is happening,
1185 * start a timing of our delay interval now.
1186 */
1187 readers_at_sleep = word.shared_count;
1188 wait_interval = mach_absolute_time();
1189 }
1190 }
1191 #endif
1192
1193 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1194 trace_lck, word.shared_count, 0, 0, 0);
1195
1196 not_shared = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE);
1197
1198 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1199 trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
1200
1201 if (not_shared) {
1202 break;
1203 }
1204
1205 /*
1206 * if we get here, the spin deadline in lck_rw_wait_on_status()
1207 * has expired w/o the rw_shared_count having drained to 0
1208 * check to see if we're allowed to do a thread_block
1209 */
1210 if (word.can_sleep) {
1211 istate = lck_interlock_lock(lock);
1212
1213 word.data = ordered_load_rw(lock);
1214 if (word.shared_count != 0) {
1215 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1216 trace_lck, word.shared_count, 0, 0, 0);
1217
1218 word.w_waiting = 1;
1219 ordered_store_rw(lock, word.data);
1220
1221 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
1222 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1223 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1224 lck_interlock_unlock(lock, istate);
1225
1226 if (res == THREAD_WAITING) {
1227 res = thread_block(THREAD_CONTINUE_NULL);
1228 slept++;
1229 }
1230 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1231 trace_lck, res, slept, 0, 0);
1232 } else {
1233 lck_interlock_unlock(lock, istate);
1234 break;
1235 }
1236 }
1237 }
1238 #if CONFIG_DTRACE
1239 /*
1240 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1241 */
1242 if (dtrace_ls_enabled == TRUE) {
1243 if (slept == 0) {
1244 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
1245 } else {
1246 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
1247 mach_absolute_time() - wait_interval, 1,
1248 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1249 }
1250 }
1251 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
1252 #endif
1253 return TRUE;
1254 }
1255
1256
1257 /*
1258 * Routine: lck_rw_lock_exclusive_to_shared
1259 */
1260
1261 void
1262 lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
1263 {
1264 uint32_t data, prev;
1265
1266 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
1267 ordered_store_rw_owner(lock, THREAD_NULL);
1268 for (;;) {
1269 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
1270 if (data & LCK_RW_INTERLOCK) {
1271 #if __SMP__
1272 atomic_exchange_abort();
1273 lck_rw_interlock_spin(lock); /* wait for interlock to clear */
1274 continue;
1275 #else
1276 panic("lck_rw_lock_exclusive_to_shared(): Interlock locked (%p): %x", lock, data);
1277 #endif // __SMP__
1278 }
1279 data += LCK_RW_SHARED_READER;
1280 if (data & LCK_RW_WANT_UPGRADE) {
1281 data &= ~(LCK_RW_WANT_UPGRADE);
1282 } else {
1283 data &= ~(LCK_RW_WANT_EXCL);
1284 }
1285 if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL))) {
1286 data &= ~(LCK_RW_W_WAITING);
1287 }
1288 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
1289 break;
1290 }
1291 cpu_pause();
1292 }
1293 return lck_rw_lock_exclusive_to_shared_gen(lock, prev);
1294 }
1295
1296 /*
1297 * Routine: lck_rw_lock_exclusive_to_shared_gen
1298 * Function:
1299 * Fast path has already dropped
1300 * our exclusive state and bumped lck_rw_shared_count
1301 * all we need to do here is determine if anyone
1302 * needs to be awakened.
1303 */
1304 static void
1305 lck_rw_lock_exclusive_to_shared_gen(
1306 lck_rw_t *lck,
1307 uint32_t prior_lock_state)
1308 {
1309 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1310 lck_rw_word_t fake_lck;
1311
1312 /*
1313 * prior_lock state is a snapshot of the 1st word of the
1314 * lock in question... we'll fake up a pointer to it
1315 * and carefully not access anything beyond whats defined
1316 * in the first word of a lck_rw_t
1317 */
1318 fake_lck.data = prior_lock_state;
1319
1320 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1321 trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
1322
1323 /*
1324 * don't wake up anyone waiting to take the lock exclusively
1325 * since we hold a read count... when the read count drops to 0,
1326 * the writers will be woken.
1327 *
1328 * wake up any waiting readers if we don't have any writers waiting,
1329 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1330 */
1331 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
1332 thread_wakeup(LCK_RW_READER_EVENT(lck));
1333 }
1334
1335 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1336 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
1337
1338 #if CONFIG_DTRACE
1339 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1340 #endif
1341 }
1342
1343
1344 /*
1345 * Routine: lck_rw_try_lock
1346 */
1347 boolean_t
1348 lck_rw_try_lock(
1349 lck_rw_t *lck,
1350 lck_rw_type_t lck_rw_type)
1351 {
1352 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
1353 return lck_rw_try_lock_shared(lck);
1354 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
1355 return lck_rw_try_lock_exclusive(lck);
1356 } else {
1357 panic("lck_rw_try_lock(): Invalid rw lock type: %x", lck_rw_type);
1358 }
1359 return FALSE;
1360 }
1361
1362 /*
1363 * Routine: lck_rw_try_lock_shared
1364 */
1365
1366 boolean_t
1367 lck_rw_try_lock_shared(lck_rw_t *lock)
1368 {
1369 uint32_t data, prev;
1370
1371 for (;;) {
1372 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1373 if (data & LCK_RW_INTERLOCK) {
1374 #if __SMP__
1375 atomic_exchange_abort();
1376 lck_rw_interlock_spin(lock);
1377 continue;
1378 #else
1379 panic("lck_rw_try_lock_shared(): Interlock locked (%p): %x", lock, data);
1380 #endif
1381 }
1382 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1383 atomic_exchange_abort();
1384 return FALSE; /* lock is busy */
1385 }
1386 data += LCK_RW_SHARED_READER; /* Increment reader refcount */
1387 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1388 break;
1389 }
1390 cpu_pause();
1391 }
1392 #if MACH_ASSERT
1393 thread_t owner = ordered_load_rw_owner(lock);
1394 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1395 #endif
1396 current_thread()->rwlock_count++;
1397 #if CONFIG_DTRACE
1398 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1399 #endif /* CONFIG_DTRACE */
1400 return TRUE;
1401 }
1402
1403
1404 /*
1405 * Routine: lck_rw_try_lock_exclusive
1406 */
1407
1408 boolean_t
1409 lck_rw_try_lock_exclusive(lck_rw_t *lock)
1410 {
1411 uint32_t data, prev;
1412 thread_t thread;
1413
1414 for (;;) {
1415 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1416 if (data & LCK_RW_INTERLOCK) {
1417 #if __SMP__
1418 atomic_exchange_abort();
1419 lck_rw_interlock_spin(lock);
1420 continue;
1421 #else
1422 panic("lck_rw_try_lock_exclusive(): Interlock locked (%p): %x", lock, data);
1423 #endif
1424 }
1425 if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1426 atomic_exchange_abort();
1427 return FALSE;
1428 }
1429 data |= LCK_RW_WANT_EXCL;
1430 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1431 break;
1432 }
1433 cpu_pause();
1434 }
1435 thread = current_thread();
1436 thread->rwlock_count++;
1437 #if MACH_ASSERT
1438 thread_t owner = ordered_load_rw_owner(lock);
1439 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1440 #endif
1441 ordered_store_rw_owner(lock, thread);
1442 #if CONFIG_DTRACE
1443 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1444 #endif /* CONFIG_DTRACE */
1445 return TRUE;
1446 }
1447
1448
1449 /*
1450 * Routine: lck_rw_unlock
1451 */
1452 void
1453 lck_rw_unlock(
1454 lck_rw_t *lck,
1455 lck_rw_type_t lck_rw_type)
1456 {
1457 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
1458 lck_rw_unlock_shared(lck);
1459 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
1460 lck_rw_unlock_exclusive(lck);
1461 } else {
1462 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
1463 }
1464 }
1465
1466
1467 /*
1468 * Routine: lck_rw_unlock_shared
1469 */
1470 void
1471 lck_rw_unlock_shared(
1472 lck_rw_t *lck)
1473 {
1474 lck_rw_type_t ret;
1475
1476 assertf(lck->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
1477 assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
1478 ret = lck_rw_done(lck);
1479
1480 if (ret != LCK_RW_TYPE_SHARED) {
1481 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
1482 }
1483 }
1484
1485
1486 /*
1487 * Routine: lck_rw_unlock_exclusive
1488 */
1489 void
1490 lck_rw_unlock_exclusive(
1491 lck_rw_t *lck)
1492 {
1493 lck_rw_type_t ret;
1494
1495 assertf(lck->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
1496 ret = lck_rw_done(lck);
1497
1498 if (ret != LCK_RW_TYPE_EXCLUSIVE) {
1499 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
1500 }
1501 }
1502
1503
1504 /*
1505 * Routine: lck_rw_lock_exclusive_gen
1506 */
1507 static void
1508 lck_rw_lock_exclusive_gen(
1509 lck_rw_t *lock)
1510 {
1511 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1512 lck_rw_word_t word;
1513 int slept = 0;
1514 boolean_t gotlock = 0;
1515 boolean_t not_shared_or_upgrade = 0;
1516 wait_result_t res = 0;
1517 boolean_t istate;
1518
1519 #if CONFIG_DTRACE
1520 boolean_t dtrace_ls_initialized = FALSE;
1521 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled = FALSE;
1522 uint64_t wait_interval = 0;
1523 int readers_at_sleep = 0;
1524 #endif
1525
1526 /*
1527 * Try to acquire the lck_rw_want_excl bit.
1528 */
1529 while (!lck_rw_grab(lock, LCK_RW_GRAB_WANT, FALSE)) {
1530 #if CONFIG_DTRACE
1531 if (dtrace_ls_initialized == FALSE) {
1532 dtrace_ls_initialized = TRUE;
1533 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1534 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1535 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1536 if (dtrace_ls_enabled) {
1537 /*
1538 * Either sleeping or spinning is happening,
1539 * start a timing of our delay interval now.
1540 */
1541 readers_at_sleep = lock->lck_rw_shared_count;
1542 wait_interval = mach_absolute_time();
1543 }
1544 }
1545 #endif
1546
1547 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1548
1549 gotlock = lck_rw_grab(lock, LCK_RW_GRAB_WANT, TRUE);
1550
1551 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
1552
1553 if (gotlock) {
1554 break;
1555 }
1556 /*
1557 * if we get here, the deadline has expired w/o us
1558 * being able to grab the lock exclusively
1559 * check to see if we're allowed to do a thread_block
1560 */
1561 word.data = ordered_load_rw(lock);
1562 if (word.can_sleep) {
1563 istate = lck_interlock_lock(lock);
1564 word.data = ordered_load_rw(lock);
1565
1566 if (word.want_excl) {
1567 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1568
1569 word.w_waiting = 1;
1570 ordered_store_rw(lock, word.data);
1571
1572 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
1573 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1574 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1575 lck_interlock_unlock(lock, istate);
1576
1577 if (res == THREAD_WAITING) {
1578 res = thread_block(THREAD_CONTINUE_NULL);
1579 slept++;
1580 }
1581 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1582 } else {
1583 word.want_excl = 1;
1584 ordered_store_rw(lock, word.data);
1585 lck_interlock_unlock(lock, istate);
1586 break;
1587 }
1588 }
1589 }
1590 /*
1591 * Wait for readers (and upgrades) to finish...
1592 */
1593 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE)) {
1594 #if CONFIG_DTRACE
1595 /*
1596 * Either sleeping or spinning is happening, start
1597 * a timing of our delay interval now. If we set it
1598 * to -1 we don't have accurate data so we cannot later
1599 * decide to record a dtrace spin or sleep event.
1600 */
1601 if (dtrace_ls_initialized == FALSE) {
1602 dtrace_ls_initialized = TRUE;
1603 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1604 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1605 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1606 if (dtrace_ls_enabled) {
1607 /*
1608 * Either sleeping or spinning is happening,
1609 * start a timing of our delay interval now.
1610 */
1611 readers_at_sleep = lock->lck_rw_shared_count;
1612 wait_interval = mach_absolute_time();
1613 }
1614 }
1615 #endif
1616
1617 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1618
1619 not_shared_or_upgrade = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, TRUE);
1620
1621 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, not_shared_or_upgrade, 0);
1622
1623 if (not_shared_or_upgrade) {
1624 break;
1625 }
1626 /*
1627 * if we get here, the deadline has expired w/o us
1628 * being able to grab the lock exclusively
1629 * check to see if we're allowed to do a thread_block
1630 */
1631 word.data = ordered_load_rw(lock);
1632 if (word.can_sleep) {
1633 istate = lck_interlock_lock(lock);
1634 word.data = ordered_load_rw(lock);
1635
1636 if (word.shared_count != 0 || word.want_upgrade) {
1637 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1638
1639 word.w_waiting = 1;
1640 ordered_store_rw(lock, word.data);
1641
1642 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
1643 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1644 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1645 lck_interlock_unlock(lock, istate);
1646
1647 if (res == THREAD_WAITING) {
1648 res = thread_block(THREAD_CONTINUE_NULL);
1649 slept++;
1650 }
1651 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1652 } else {
1653 lck_interlock_unlock(lock, istate);
1654 /*
1655 * must own the lock now, since we checked for
1656 * readers or upgrade owner behind the interlock
1657 * no need for a call to 'lck_rw_drain_status'
1658 */
1659 break;
1660 }
1661 }
1662 }
1663
1664 #if CONFIG_DTRACE
1665 /*
1666 * Decide what latencies we suffered that are Dtrace events.
1667 * If we have set wait_interval, then we either spun or slept.
1668 * At least we get out from under the interlock before we record
1669 * which is the best we can do here to minimize the impact
1670 * of the tracing.
1671 * If we have set wait_interval to -1, then dtrace was not enabled when we
1672 * started sleeping/spinning so we don't record this event.
1673 */
1674 if (dtrace_ls_enabled == TRUE) {
1675 if (slept == 0) {
1676 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
1677 mach_absolute_time() - wait_interval, 1);
1678 } else {
1679 /*
1680 * For the blocking case, we also record if when we blocked
1681 * it was held for read or write, and how many readers.
1682 * Notice that above we recorded this before we dropped
1683 * the interlock so the count is accurate.
1684 */
1685 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
1686 mach_absolute_time() - wait_interval, 1,
1687 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1688 }
1689 }
1690 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
1691 #endif /* CONFIG_DTRACE */
1692 }
1693
1694 /*
1695 * Routine: lck_rw_done
1696 */
1697
1698 lck_rw_type_t
1699 lck_rw_done(lck_rw_t *lock)
1700 {
1701 uint32_t data, prev;
1702 boolean_t once = FALSE;
1703
1704 for (;;) {
1705 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
1706 if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
1707 #if __SMP__
1708 atomic_exchange_abort();
1709 lck_rw_interlock_spin(lock);
1710 continue;
1711 #else
1712 panic("lck_rw_done(): Interlock locked (%p): %x", lock, data);
1713 #endif // __SMP__
1714 }
1715 if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
1716 assertf(lock->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
1717 data -= LCK_RW_SHARED_READER;
1718 if ((data & LCK_RW_SHARED_MASK) == 0) { /* if reader count has now gone to 0, check for waiters */
1719 goto check_waiters;
1720 }
1721 } else { /* if reader count == 0, must be exclusive lock */
1722 if (data & LCK_RW_WANT_UPGRADE) {
1723 data &= ~(LCK_RW_WANT_UPGRADE);
1724 } else {
1725 if (data & LCK_RW_WANT_EXCL) {
1726 data &= ~(LCK_RW_WANT_EXCL);
1727 } else { /* lock is not 'owned', panic */
1728 panic("Releasing non-exclusive RW lock without a reader refcount!");
1729 }
1730 }
1731 if (!once) {
1732 // Only check for holder and clear it once
1733 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
1734 ordered_store_rw_owner(lock, THREAD_NULL);
1735 once = TRUE;
1736 }
1737 check_waiters:
1738 /*
1739 * test the original values to match what
1740 * lck_rw_done_gen is going to do to determine
1741 * which wakeups need to happen...
1742 *
1743 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
1744 */
1745 if (prev & LCK_RW_W_WAITING) {
1746 data &= ~(LCK_RW_W_WAITING);
1747 if ((prev & LCK_RW_PRIV_EXCL) == 0) {
1748 data &= ~(LCK_RW_R_WAITING);
1749 }
1750 } else {
1751 data &= ~(LCK_RW_R_WAITING);
1752 }
1753 }
1754 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
1755 break;
1756 }
1757 cpu_pause();
1758 }
1759 return lck_rw_done_gen(lock, prev);
1760 }
1761
1762 /*
1763 * Routine: lck_rw_done_gen
1764 *
1765 * called from the assembly language wrapper...
1766 * prior_lock_state is the value in the 1st
1767 * word of the lock at the time of a successful
1768 * atomic compare and exchange with the new value...
1769 * it represents the state of the lock before we
1770 * decremented the rw_shared_count or cleared either
1771 * rw_want_upgrade or rw_want_write and
1772 * the lck_x_waiting bits... since the wrapper
1773 * routine has already changed the state atomically,
1774 * we just need to decide if we should
1775 * wake up anyone and what value to return... we do
1776 * this by examining the state of the lock before
1777 * we changed it
1778 */
1779 static lck_rw_type_t
1780 lck_rw_done_gen(
1781 lck_rw_t *lck,
1782 uint32_t prior_lock_state)
1783 {
1784 lck_rw_word_t fake_lck;
1785 lck_rw_type_t lock_type;
1786 thread_t thread;
1787 uint32_t rwlock_count;
1788
1789 /*
1790 * prior_lock state is a snapshot of the 1st word of the
1791 * lock in question... we'll fake up a pointer to it
1792 * and carefully not access anything beyond whats defined
1793 * in the first word of a lck_rw_t
1794 */
1795 fake_lck.data = prior_lock_state;
1796
1797 if (fake_lck.shared_count <= 1) {
1798 if (fake_lck.w_waiting) {
1799 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1800 }
1801
1802 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
1803 thread_wakeup(LCK_RW_READER_EVENT(lck));
1804 }
1805 }
1806 if (fake_lck.shared_count) {
1807 lock_type = LCK_RW_TYPE_SHARED;
1808 } else {
1809 lock_type = LCK_RW_TYPE_EXCLUSIVE;
1810 }
1811
1812 /* Check if dropping the lock means that we need to unpromote */
1813 thread = current_thread();
1814 rwlock_count = thread->rwlock_count--;
1815 #if MACH_LDEBUG
1816 if (rwlock_count == 0) {
1817 panic("rw lock count underflow for thread %p", thread);
1818 }
1819 #endif
1820 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1821 /* sched_flags checked without lock, but will be rechecked while clearing */
1822 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
1823 }
1824 #if CONFIG_DTRACE
1825 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
1826 #endif
1827 return lock_type;
1828 }
1829
1830 /*
1831 * Routine: lck_rw_lock_shared_gen
1832 * Function:
1833 * Fast path code has determined that this lock
1834 * is held exclusively... this is where we spin/block
1835 * until we can acquire the lock in the shared mode
1836 */
1837 static void
1838 lck_rw_lock_shared_gen(
1839 lck_rw_t *lck)
1840 {
1841 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1842 lck_rw_word_t word;
1843 boolean_t gotlock = 0;
1844 int slept = 0;
1845 wait_result_t res = 0;
1846 boolean_t istate;
1847
1848 #if CONFIG_DTRACE
1849 uint64_t wait_interval = 0;
1850 int readers_at_sleep = 0;
1851 boolean_t dtrace_ls_initialized = FALSE;
1852 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
1853 #endif /* CONFIG_DTRACE */
1854
1855 while (!lck_rw_grab(lck, LCK_RW_GRAB_SHARED, FALSE)) {
1856 #if CONFIG_DTRACE
1857 if (dtrace_ls_initialized == FALSE) {
1858 dtrace_ls_initialized = TRUE;
1859 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1860 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1861 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1862 if (dtrace_ls_enabled) {
1863 /*
1864 * Either sleeping or spinning is happening,
1865 * start a timing of our delay interval now.
1866 */
1867 readers_at_sleep = lck->lck_rw_shared_count;
1868 wait_interval = mach_absolute_time();
1869 }
1870 }
1871 #endif
1872
1873 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
1874 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
1875
1876 gotlock = lck_rw_grab(lck, LCK_RW_GRAB_SHARED, TRUE);
1877
1878 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
1879 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, gotlock, 0);
1880
1881 if (gotlock) {
1882 break;
1883 }
1884 /*
1885 * if we get here, the deadline has expired w/o us
1886 * being able to grab the lock for read
1887 * check to see if we're allowed to do a thread_block
1888 */
1889 if (lck->lck_rw_can_sleep) {
1890 istate = lck_interlock_lock(lck);
1891
1892 word.data = ordered_load_rw(lck);
1893 if ((word.want_excl || word.want_upgrade) &&
1894 ((word.shared_count == 0) || word.priv_excl)) {
1895 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
1896 trace_lck, word.want_excl, word.want_upgrade, 0, 0);
1897
1898 word.r_waiting = 1;
1899 ordered_store_rw(lck, word.data);
1900
1901 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
1902 res = assert_wait(LCK_RW_READER_EVENT(lck),
1903 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1904 lck_interlock_unlock(lck, istate);
1905
1906 if (res == THREAD_WAITING) {
1907 res = thread_block(THREAD_CONTINUE_NULL);
1908 slept++;
1909 }
1910 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
1911 trace_lck, res, slept, 0, 0);
1912 } else {
1913 word.shared_count++;
1914 ordered_store_rw(lck, word.data);
1915 lck_interlock_unlock(lck, istate);
1916 break;
1917 }
1918 }
1919 }
1920
1921 #if CONFIG_DTRACE
1922 if (dtrace_ls_enabled == TRUE) {
1923 if (slept == 0) {
1924 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1925 } else {
1926 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1927 mach_absolute_time() - wait_interval, 0,
1928 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1929 }
1930 }
1931 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1932 #endif /* CONFIG_DTRACE */
1933 }
1934
1935
1936 void
1937 lck_rw_assert(
1938 lck_rw_t *lck,
1939 unsigned int type)
1940 {
1941 switch (type) {
1942 case LCK_RW_ASSERT_SHARED:
1943 if ((lck->lck_rw_shared_count != 0) &&
1944 (lck->lck_rw_owner == THREAD_NULL)) {
1945 return;
1946 }
1947 break;
1948 case LCK_RW_ASSERT_EXCLUSIVE:
1949 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
1950 (lck->lck_rw_shared_count == 0) &&
1951 (lck->lck_rw_owner == current_thread())) {
1952 return;
1953 }
1954 break;
1955 case LCK_RW_ASSERT_HELD:
1956 if (lck->lck_rw_shared_count != 0) {
1957 return; // Held shared
1958 }
1959 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
1960 (lck->lck_rw_owner == current_thread())) {
1961 return; // Held exclusive
1962 }
1963 break;
1964 case LCK_RW_ASSERT_NOTHELD:
1965 if ((lck->lck_rw_shared_count == 0) &&
1966 !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
1967 (lck->lck_rw_owner == THREAD_NULL)) {
1968 return;
1969 }
1970 break;
1971 default:
1972 break;
1973 }
1974 panic("rw lock (%p)%s held (mode=%u)", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type);
1975 }
1976
1977
1978 /*
1979 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
1980 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
1981 */
1982 boolean_t
1983 kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck)
1984 {
1985 if (not_in_kdp) {
1986 panic("panic: rw lock exclusive check done outside of kernel debugger");
1987 }
1988 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
1989 }
1990
1991 /*
1992 * The C portion of the mutex package. These routines are only invoked
1993 * if the optimized assembler routines can't do the work.
1994 */
1995
1996 /*
1997 * Forward declaration
1998 */
1999
2000 void
2001 lck_mtx_ext_init(
2002 lck_mtx_ext_t * lck,
2003 lck_grp_t * grp,
2004 lck_attr_t * attr);
2005
2006 /*
2007 * Routine: lck_mtx_alloc_init
2008 */
2009 lck_mtx_t *
2010 lck_mtx_alloc_init(
2011 lck_grp_t * grp,
2012 lck_attr_t * attr)
2013 {
2014 lck_mtx_t *lck;
2015
2016 if ((lck = (lck_mtx_t *) kalloc(sizeof(lck_mtx_t))) != 0) {
2017 lck_mtx_init(lck, grp, attr);
2018 }
2019
2020 return lck;
2021 }
2022
2023 /*
2024 * Routine: lck_mtx_free
2025 */
2026 void
2027 lck_mtx_free(
2028 lck_mtx_t * lck,
2029 lck_grp_t * grp)
2030 {
2031 lck_mtx_destroy(lck, grp);
2032 kfree(lck, sizeof(lck_mtx_t));
2033 }
2034
2035 /*
2036 * Routine: lck_mtx_init
2037 */
2038 void
2039 lck_mtx_init(
2040 lck_mtx_t * lck,
2041 lck_grp_t * grp,
2042 lck_attr_t * attr)
2043 {
2044 #ifdef BER_XXX
2045 lck_mtx_ext_t *lck_ext;
2046 #endif
2047 lck_attr_t *lck_attr;
2048
2049 if (attr != LCK_ATTR_NULL) {
2050 lck_attr = attr;
2051 } else {
2052 lck_attr = &LockDefaultLckAttr;
2053 }
2054
2055 #ifdef BER_XXX
2056 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2057 if ((lck_ext = (lck_mtx_ext_t *) kalloc(sizeof(lck_mtx_ext_t))) != 0) {
2058 lck_mtx_ext_init(lck_ext, grp, lck_attr);
2059 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2060 lck->lck_mtx_ptr = lck_ext;
2061 lck->lck_mtx_type = LCK_MTX_TYPE;
2062 }
2063 } else
2064 #endif
2065 {
2066 lck->lck_mtx_ptr = NULL; // Clear any padding in the union fields below
2067 lck->lck_mtx_waiters = 0;
2068 lck->lck_mtx_type = LCK_MTX_TYPE;
2069 ordered_store_mtx(lck, 0);
2070 }
2071 lck_grp_reference(grp);
2072 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2073 }
2074
2075 /*
2076 * Routine: lck_mtx_init_ext
2077 */
2078 void
2079 lck_mtx_init_ext(
2080 lck_mtx_t * lck,
2081 lck_mtx_ext_t * lck_ext,
2082 lck_grp_t * grp,
2083 lck_attr_t * attr)
2084 {
2085 lck_attr_t *lck_attr;
2086
2087 if (attr != LCK_ATTR_NULL) {
2088 lck_attr = attr;
2089 } else {
2090 lck_attr = &LockDefaultLckAttr;
2091 }
2092
2093 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2094 lck_mtx_ext_init(lck_ext, grp, lck_attr);
2095 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2096 lck->lck_mtx_ptr = lck_ext;
2097 lck->lck_mtx_type = LCK_MTX_TYPE;
2098 } else {
2099 lck->lck_mtx_waiters = 0;
2100 lck->lck_mtx_type = LCK_MTX_TYPE;
2101 ordered_store_mtx(lck, 0);
2102 }
2103 lck_grp_reference(grp);
2104 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2105 }
2106
2107 /*
2108 * Routine: lck_mtx_ext_init
2109 */
2110 void
2111 lck_mtx_ext_init(
2112 lck_mtx_ext_t * lck,
2113 lck_grp_t * grp,
2114 lck_attr_t * attr)
2115 {
2116 bzero((void *) lck, sizeof(lck_mtx_ext_t));
2117
2118 lck->lck_mtx.lck_mtx_type = LCK_MTX_TYPE;
2119
2120 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2121 lck->lck_mtx_deb.type = MUTEX_TAG;
2122 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
2123 }
2124 lck->lck_mtx_grp = grp;
2125
2126 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) {
2127 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
2128 }
2129 }
2130
2131 /* The slow versions */
2132 static void lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2133 static boolean_t lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread);
2134 static void lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2135
2136 /* The adaptive spin function */
2137 static spinwait_result_t lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2138
2139 /*
2140 * Routine: lck_mtx_verify
2141 *
2142 * Verify if a mutex is valid
2143 */
2144 static inline void
2145 lck_mtx_verify(lck_mtx_t *lock)
2146 {
2147 if (lock->lck_mtx_type != LCK_MTX_TYPE) {
2148 panic("Invalid mutex %p", lock);
2149 }
2150 #if DEVELOPMENT || DEBUG
2151 if (lock->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
2152 panic("Mutex destroyed %p", lock);
2153 }
2154 #endif /* DEVELOPMENT || DEBUG */
2155 }
2156
2157 /*
2158 * Routine: lck_mtx_check_preemption
2159 *
2160 * Verify preemption is enabled when attempting to acquire a mutex.
2161 */
2162
2163 static inline void
2164 lck_mtx_check_preemption(lck_mtx_t *lock)
2165 {
2166 #if DEVELOPMENT || DEBUG
2167 int pl = get_preemption_level();
2168
2169 if (pl != 0) {
2170 panic("Attempt to take mutex with preemption disabled. Lock=%p, level=%d", lock, pl);
2171 }
2172 #else
2173 (void)lock;
2174 #endif
2175 }
2176
2177 /*
2178 * Routine: lck_mtx_lock
2179 */
2180 void
2181 lck_mtx_lock(lck_mtx_t *lock)
2182 {
2183 thread_t thread;
2184
2185 lck_mtx_verify(lock);
2186 lck_mtx_check_preemption(lock);
2187 thread = current_thread();
2188 if (os_atomic_cmpxchg(&lock->lck_mtx_data,
2189 0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
2190 #if CONFIG_DTRACE
2191 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
2192 #endif /* CONFIG_DTRACE */
2193 return;
2194 }
2195 lck_mtx_lock_contended(lock, thread, FALSE);
2196 }
2197
2198 /*
2199 * This is the slow version of mutex locking.
2200 */
2201 static void NOINLINE
2202 lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
2203 {
2204 thread_t holding_thread;
2205 uintptr_t state;
2206 int waiters = 0;
2207 spinwait_result_t sw_res;
2208 struct turnstile *ts = NULL;
2209
2210 /* Loop waiting until I see that the mutex is unowned */
2211 for (;;) {
2212 sw_res = lck_mtx_lock_contended_spinwait_arm(lock, thread, interlocked);
2213 interlocked = FALSE;
2214
2215 switch (sw_res) {
2216 case SPINWAIT_ACQUIRED:
2217 if (ts != NULL) {
2218 interlock_lock(lock);
2219 turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
2220 interlock_unlock(lock);
2221 }
2222 goto done;
2223 case SPINWAIT_INTERLOCK:
2224 goto set_owner;
2225 default:
2226 break;
2227 }
2228
2229 state = ordered_load_mtx(lock);
2230 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2231 if (holding_thread == NULL) {
2232 break;
2233 }
2234 ordered_store_mtx(lock, (state | LCK_ILOCK | ARM_LCK_WAITERS)); // Set waiters bit and wait
2235 lck_mtx_lock_wait(lock, holding_thread, &ts);
2236 /* returns interlock unlocked */
2237 }
2238
2239 set_owner:
2240 /* Hooray, I'm the new owner! */
2241 state = ordered_load_mtx(lock);
2242
2243 if (state & ARM_LCK_WAITERS) {
2244 /* Skip lck_mtx_lock_acquire if there are no waiters. */
2245 waiters = lck_mtx_lock_acquire(lock, ts);
2246 /*
2247 * lck_mtx_lock_acquire will call
2248 * turnstile_complete
2249 */
2250 } else {
2251 if (ts != NULL) {
2252 turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
2253 }
2254 }
2255
2256 state = LCK_MTX_THREAD_TO_STATE(thread);
2257 if (waiters != 0) {
2258 state |= ARM_LCK_WAITERS;
2259 }
2260 #if __SMP__
2261 state |= LCK_ILOCK; // Preserve interlock
2262 ordered_store_mtx(lock, state); // Set ownership
2263 interlock_unlock(lock); // Release interlock, enable preemption
2264 #else
2265 ordered_store_mtx(lock, state); // Set ownership
2266 enable_preemption();
2267 #endif
2268
2269 done:
2270 load_memory_barrier();
2271
2272 assert(thread->turnstile != NULL);
2273
2274 if (ts != NULL) {
2275 turnstile_cleanup();
2276 }
2277
2278 #if CONFIG_DTRACE
2279 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
2280 #endif /* CONFIG_DTRACE */
2281 }
2282
2283 /*
2284 * Routine: lck_mtx_lock_spinwait_arm
2285 *
2286 * Invoked trying to acquire a mutex when there is contention but
2287 * the holder is running on another processor. We spin for up to a maximum
2288 * time waiting for the lock to be released.
2289 */
2290 static spinwait_result_t
2291 lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
2292 {
2293 int has_interlock = (int)interlocked;
2294 #if __SMP__
2295 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
2296 thread_t holder;
2297 uint64_t overall_deadline;
2298 uint64_t check_owner_deadline;
2299 uint64_t cur_time;
2300 spinwait_result_t retval = SPINWAIT_DID_SPIN;
2301 int loopcount = 0;
2302 uintptr_t state;
2303 boolean_t istate;
2304
2305 if (__improbable(!(lck_mtx_adaptive_spin_mode & ADAPTIVE_SPIN_ENABLE))) {
2306 if (!has_interlock) {
2307 interlock_lock(lock);
2308 }
2309
2310 return SPINWAIT_DID_NOT_SPIN;
2311 }
2312
2313 state = ordered_load_mtx(lock);
2314
2315 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
2316 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, 0, 0);
2317
2318 cur_time = mach_absolute_time();
2319 overall_deadline = cur_time + MutexSpin;
2320 check_owner_deadline = cur_time;
2321
2322 if (has_interlock) {
2323 istate = ml_get_interrupts_enabled();
2324 }
2325
2326 /* Snoop the lock state */
2327 state = ordered_load_mtx(lock);
2328
2329 /*
2330 * Spin while:
2331 * - mutex is locked, and
2332 * - it's locked as a spin lock, and
2333 * - owner is running on another processor, and
2334 * - owner (processor) is not idling, and
2335 * - we haven't spun for long enough.
2336 */
2337 do {
2338 if (!(state & LCK_ILOCK) || has_interlock) {
2339 if (!has_interlock) {
2340 has_interlock = interlock_try_disable_interrupts(lock, &istate);
2341 }
2342
2343 if (has_interlock) {
2344 state = ordered_load_mtx(lock);
2345 holder = LCK_MTX_STATE_TO_THREAD(state);
2346
2347 if (holder == NULL) {
2348 retval = SPINWAIT_INTERLOCK;
2349
2350 if (istate) {
2351 ml_set_interrupts_enabled(istate);
2352 }
2353
2354 break;
2355 }
2356
2357 if (!(holder->machine.machine_thread_flags & MACHINE_THREAD_FLAGS_ON_CPU) ||
2358 (holder->state & TH_IDLE)) {
2359 if (loopcount == 0) {
2360 retval = SPINWAIT_DID_NOT_SPIN;
2361 }
2362
2363 if (istate) {
2364 ml_set_interrupts_enabled(istate);
2365 }
2366
2367 break;
2368 }
2369
2370 interlock_unlock_enable_interrupts(lock, istate);
2371 has_interlock = 0;
2372 }
2373 }
2374
2375 cur_time = mach_absolute_time();
2376
2377 if (cur_time >= overall_deadline) {
2378 break;
2379 }
2380
2381 check_owner_deadline = cur_time + (MutexSpin / SPINWAIT_OWNER_CHECK_COUNT);
2382
2383 if (cur_time < check_owner_deadline) {
2384 machine_delay_until(check_owner_deadline - cur_time, check_owner_deadline);
2385 }
2386
2387 /* Snoop the lock state */
2388 state = ordered_load_mtx(lock);
2389
2390 if (state == 0) {
2391 /* Try to grab the lock. */
2392 if (os_atomic_cmpxchg(&lock->lck_mtx_data,
2393 0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
2394 retval = SPINWAIT_ACQUIRED;
2395 break;
2396 }
2397 }
2398
2399 loopcount++;
2400 } while (TRUE);
2401
2402 #if CONFIG_DTRACE
2403 /*
2404 * We've already kept a count via overall_deadline of how long we spun.
2405 * If dtrace is active, then we compute backwards to decide how
2406 * long we spun.
2407 *
2408 * Note that we record a different probe id depending on whether
2409 * this is a direct or indirect mutex. This allows us to
2410 * penalize only lock groups that have debug/stats enabled
2411 * with dtrace processing if desired.
2412 */
2413 if (__probable(lock->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)) {
2414 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, lock,
2415 mach_absolute_time() - (overall_deadline - MutexSpin));
2416 } else {
2417 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, lock,
2418 mach_absolute_time() - (overall_deadline - MutexSpin));
2419 }
2420 /* The lockstat acquire event is recorded by the caller. */
2421 #endif
2422
2423 state = ordered_load_mtx(lock);
2424
2425 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
2426 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, retval, 0);
2427 #else /* __SMP__ */
2428 /* Spinwaiting is not useful on UP systems. */
2429 #pragma unused(lock, thread)
2430 int retval = SPINWAIT_DID_NOT_SPIN;
2431 #endif /* __SMP__ */
2432 if ((!has_interlock) && (retval != SPINWAIT_ACQUIRED)) {
2433 /* We must own either the lock or the interlock on return. */
2434 interlock_lock(lock);
2435 }
2436
2437 return retval;
2438 }
2439
2440 /*
2441 * Common code for mutex locking as spinlock
2442 */
2443 static inline void
2444 lck_mtx_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
2445 {
2446 uintptr_t state;
2447
2448 interlock_lock(lock);
2449 state = ordered_load_mtx(lock);
2450 if (LCK_MTX_STATE_TO_THREAD(state)) {
2451 if (allow_held_as_mutex) {
2452 lck_mtx_lock_contended(lock, current_thread(), TRUE);
2453 } else {
2454 // "Always" variants can never block. If the lock is held and blocking is not allowed
2455 // then someone is mixing always and non-always calls on the same lock, which is
2456 // forbidden.
2457 panic("Attempting to block on a lock taken as spin-always %p", lock);
2458 }
2459 return;
2460 }
2461 state &= ARM_LCK_WAITERS; // Preserve waiters bit
2462 state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK); // Add spin tag and maintain interlock
2463 ordered_store_mtx(lock, state);
2464 load_memory_barrier();
2465
2466 #if CONFIG_DTRACE
2467 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
2468 #endif /* CONFIG_DTRACE */
2469 }
2470
2471 /*
2472 * Routine: lck_mtx_lock_spin
2473 */
2474 void
2475 lck_mtx_lock_spin(lck_mtx_t *lock)
2476 {
2477 lck_mtx_check_preemption(lock);
2478 lck_mtx_lock_spin_internal(lock, TRUE);
2479 }
2480
2481 /*
2482 * Routine: lck_mtx_lock_spin_always
2483 */
2484 void
2485 lck_mtx_lock_spin_always(lck_mtx_t *lock)
2486 {
2487 lck_mtx_lock_spin_internal(lock, FALSE);
2488 }
2489
2490 /*
2491 * Routine: lck_mtx_try_lock
2492 */
2493 boolean_t
2494 lck_mtx_try_lock(lck_mtx_t *lock)
2495 {
2496 thread_t thread = current_thread();
2497
2498 lck_mtx_verify(lock);
2499 if (os_atomic_cmpxchg(&lock->lck_mtx_data,
2500 0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
2501 #if CONFIG_DTRACE
2502 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, lock, 0);
2503 #endif /* CONFIG_DTRACE */
2504 return TRUE;
2505 }
2506 return lck_mtx_try_lock_contended(lock, thread);
2507 }
2508
2509 static boolean_t NOINLINE
2510 lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread)
2511 {
2512 thread_t holding_thread;
2513 uintptr_t state;
2514 int waiters;
2515
2516 #if __SMP__
2517 interlock_lock(lock);
2518 state = ordered_load_mtx(lock);
2519 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2520 if (holding_thread) {
2521 interlock_unlock(lock);
2522 return FALSE;
2523 }
2524 #else
2525 disable_preemption_for_thread(thread);
2526 state = ordered_load_mtx(lock);
2527 if (state & LCK_ILOCK) {
2528 panic("Unexpected interlock set (%p)", lock);
2529 }
2530 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2531 if (holding_thread) {
2532 enable_preemption();
2533 return FALSE;
2534 }
2535 state |= LCK_ILOCK;
2536 ordered_store_mtx(lock, state);
2537 #endif // __SMP__
2538 waiters = lck_mtx_lock_acquire(lock, NULL);
2539 state = LCK_MTX_THREAD_TO_STATE(thread);
2540 if (waiters != 0) {
2541 state |= ARM_LCK_WAITERS;
2542 }
2543 #if __SMP__
2544 state |= LCK_ILOCK; // Preserve interlock
2545 ordered_store_mtx(lock, state); // Set ownership
2546 interlock_unlock(lock); // Release interlock, enable preemption
2547 #else
2548 ordered_store_mtx(lock, state); // Set ownership
2549 enable_preemption();
2550 #endif
2551 load_memory_barrier();
2552
2553 turnstile_cleanup();
2554
2555 return TRUE;
2556 }
2557
2558 static inline boolean_t
2559 lck_mtx_try_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
2560 {
2561 uintptr_t state;
2562
2563 if (!interlock_try(lock)) {
2564 return FALSE;
2565 }
2566 state = ordered_load_mtx(lock);
2567 if (LCK_MTX_STATE_TO_THREAD(state)) {
2568 // Lock is held as mutex
2569 if (allow_held_as_mutex) {
2570 interlock_unlock(lock);
2571 } else {
2572 // "Always" variants can never block. If the lock is held as a normal mutex
2573 // then someone is mixing always and non-always calls on the same lock, which is
2574 // forbidden.
2575 panic("Spin-mutex held as full mutex %p", lock);
2576 }
2577 return FALSE;
2578 }
2579 state &= ARM_LCK_WAITERS; // Preserve waiters bit
2580 state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK); // Add spin tag and maintain interlock
2581 ordered_store_mtx(lock, state);
2582 load_memory_barrier();
2583
2584 #if CONFIG_DTRACE
2585 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, lock, 0);
2586 #endif /* CONFIG_DTRACE */
2587 return TRUE;
2588 }
2589
2590 /*
2591 * Routine: lck_mtx_try_lock_spin
2592 */
2593 boolean_t
2594 lck_mtx_try_lock_spin(lck_mtx_t *lock)
2595 {
2596 return lck_mtx_try_lock_spin_internal(lock, TRUE);
2597 }
2598
2599 /*
2600 * Routine: lck_mtx_try_lock_spin_always
2601 */
2602 boolean_t
2603 lck_mtx_try_lock_spin_always(lck_mtx_t *lock)
2604 {
2605 return lck_mtx_try_lock_spin_internal(lock, FALSE);
2606 }
2607
2608
2609
2610 /*
2611 * Routine: lck_mtx_unlock
2612 */
2613 void
2614 lck_mtx_unlock(lck_mtx_t *lock)
2615 {
2616 thread_t thread = current_thread();
2617 uintptr_t state;
2618 boolean_t ilk_held = FALSE;
2619
2620 lck_mtx_verify(lock);
2621
2622 state = ordered_load_mtx(lock);
2623 if (state & LCK_ILOCK) {
2624 if (LCK_MTX_STATE_TO_THREAD(state) == (thread_t)LCK_MTX_SPIN_TAG) {
2625 ilk_held = TRUE; // Interlock is held by (presumably) this thread
2626 }
2627 goto slow_case;
2628 }
2629 // Locked as a mutex
2630 if (os_atomic_cmpxchg(&lock->lck_mtx_data,
2631 LCK_MTX_THREAD_TO_STATE(thread), 0, release)) {
2632 #if CONFIG_DTRACE
2633 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
2634 #endif /* CONFIG_DTRACE */
2635 return;
2636 }
2637 slow_case:
2638 lck_mtx_unlock_contended(lock, thread, ilk_held);
2639 }
2640
2641 static void NOINLINE
2642 lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t ilk_held)
2643 {
2644 uintptr_t state;
2645 boolean_t cleanup = FALSE;
2646
2647 if (ilk_held) {
2648 state = ordered_load_mtx(lock);
2649 } else {
2650 #if __SMP__
2651 interlock_lock(lock);
2652 state = ordered_load_mtx(lock);
2653 if (thread != LCK_MTX_STATE_TO_THREAD(state)) {
2654 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
2655 }
2656 #else
2657 disable_preemption_for_thread(thread);
2658 state = ordered_load_mtx(lock);
2659 if (state & LCK_ILOCK) {
2660 panic("lck_mtx_unlock(): Unexpected interlock set (%p)", lock);
2661 }
2662 if (thread != LCK_MTX_STATE_TO_THREAD(state)) {
2663 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
2664 }
2665 state |= LCK_ILOCK;
2666 ordered_store_mtx(lock, state);
2667 #endif
2668 if (state & ARM_LCK_WAITERS) {
2669 if (lck_mtx_unlock_wakeup(lock, thread)) {
2670 state = ARM_LCK_WAITERS;
2671 } else {
2672 state = 0;
2673 }
2674 cleanup = TRUE;
2675 goto unlock;
2676 }
2677 }
2678 state &= ARM_LCK_WAITERS; /* Clear state, retain waiters bit */
2679 unlock:
2680 #if __SMP__
2681 state |= LCK_ILOCK;
2682 ordered_store_mtx(lock, state);
2683 interlock_unlock(lock);
2684 #else
2685 ordered_store_mtx(lock, state);
2686 enable_preemption();
2687 #endif
2688 if (cleanup) {
2689 /*
2690 * Do not do any turnstile operations outside of this block.
2691 * lock/unlock is called at early stage of boot with single thread,
2692 * when turnstile is not yet initialized.
2693 * Even without contention we can come throught the slow path
2694 * if the mutex is acquired as a spin lock.
2695 */
2696 turnstile_cleanup();
2697 }
2698
2699 #if CONFIG_DTRACE
2700 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
2701 #endif /* CONFIG_DTRACE */
2702 }
2703
2704 /*
2705 * Routine: lck_mtx_assert
2706 */
2707 void
2708 lck_mtx_assert(lck_mtx_t *lock, unsigned int type)
2709 {
2710 thread_t thread, holder;
2711 uintptr_t state;
2712
2713 state = ordered_load_mtx(lock);
2714 holder = LCK_MTX_STATE_TO_THREAD(state);
2715 if (holder == (thread_t)LCK_MTX_SPIN_TAG) {
2716 // Lock is held in spin mode, owner is unknown.
2717 return; // Punt
2718 }
2719 thread = current_thread();
2720 if (type == LCK_MTX_ASSERT_OWNED) {
2721 if (thread != holder) {
2722 panic("lck_mtx_assert(): mutex (%p) owned", lock);
2723 }
2724 } else if (type == LCK_MTX_ASSERT_NOTOWNED) {
2725 if (thread == holder) {
2726 panic("lck_mtx_assert(): mutex (%p) not owned", lock);
2727 }
2728 } else {
2729 panic("lck_mtx_assert(): invalid arg (%u)", type);
2730 }
2731 }
2732
2733 /*
2734 * Routine: lck_mtx_ilk_unlock
2735 */
2736 boolean_t
2737 lck_mtx_ilk_unlock(lck_mtx_t *lock)
2738 {
2739 interlock_unlock(lock);
2740 return TRUE;
2741 }
2742
2743 /*
2744 * Routine: lck_mtx_convert_spin
2745 *
2746 * Convert a mutex held for spin into a held full mutex
2747 */
2748 void
2749 lck_mtx_convert_spin(lck_mtx_t *lock)
2750 {
2751 thread_t thread = current_thread();
2752 uintptr_t state;
2753 int waiters;
2754
2755 state = ordered_load_mtx(lock);
2756 if (LCK_MTX_STATE_TO_THREAD(state) == thread) {
2757 return; // Already owned as mutex, return
2758 }
2759 if ((state & LCK_ILOCK) == 0 || (LCK_MTX_STATE_TO_THREAD(state) != (thread_t)LCK_MTX_SPIN_TAG)) {
2760 panic("lck_mtx_convert_spin: Not held as spinlock (%p)", lock);
2761 }
2762 state &= ~(LCK_MTX_THREAD_MASK); // Clear the spin tag
2763 ordered_store_mtx(lock, state);
2764 waiters = lck_mtx_lock_acquire(lock, NULL); // Acquire to manage priority boosts
2765 state = LCK_MTX_THREAD_TO_STATE(thread);
2766 if (waiters != 0) {
2767 state |= ARM_LCK_WAITERS;
2768 }
2769 #if __SMP__
2770 state |= LCK_ILOCK;
2771 ordered_store_mtx(lock, state); // Set ownership
2772 interlock_unlock(lock); // Release interlock, enable preemption
2773 #else
2774 ordered_store_mtx(lock, state); // Set ownership
2775 enable_preemption();
2776 #endif
2777 turnstile_cleanup();
2778 }
2779
2780
2781 /*
2782 * Routine: lck_mtx_destroy
2783 */
2784 void
2785 lck_mtx_destroy(
2786 lck_mtx_t * lck,
2787 lck_grp_t * grp)
2788 {
2789 if (lck->lck_mtx_type != LCK_MTX_TYPE) {
2790 panic("Destroying invalid mutex %p", lck);
2791 }
2792 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
2793 panic("Destroying previously destroyed lock %p", lck);
2794 }
2795 lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
2796 lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED;
2797 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
2798 lck_grp_deallocate(grp);
2799 return;
2800 }
2801
2802 /*
2803 * Routine: lck_spin_assert
2804 */
2805 void
2806 lck_spin_assert(lck_spin_t *lock, unsigned int type)
2807 {
2808 thread_t thread, holder;
2809 uintptr_t state;
2810
2811 if (lock->type != LCK_SPIN_TYPE) {
2812 panic("Invalid spinlock %p", lock);
2813 }
2814
2815 state = lock->lck_spin_data;
2816 holder = (thread_t)(state & ~LCK_ILOCK);
2817 thread = current_thread();
2818 if (type == LCK_ASSERT_OWNED) {
2819 if (holder == 0) {
2820 panic("Lock not owned %p = %lx", lock, state);
2821 }
2822 if (holder != thread) {
2823 panic("Lock not owned by current thread %p = %lx", lock, state);
2824 }
2825 if ((state & LCK_ILOCK) == 0) {
2826 panic("Lock bit not set %p = %lx", lock, state);
2827 }
2828 } else if (type == LCK_ASSERT_NOTOWNED) {
2829 if (holder != 0) {
2830 if (holder == thread) {
2831 panic("Lock owned by current thread %p = %lx", lock, state);
2832 }
2833 }
2834 } else {
2835 panic("lck_spin_assert(): invalid arg (%u)", type);
2836 }
2837 }
2838
2839 boolean_t
2840 lck_rw_lock_yield_shared(lck_rw_t *lck, boolean_t force_yield)
2841 {
2842 lck_rw_word_t word;
2843
2844 lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
2845
2846 word.data = ordered_load_rw(lck);
2847 if (word.want_excl || word.want_upgrade || force_yield) {
2848 lck_rw_unlock_shared(lck);
2849 mutex_pause(2);
2850 lck_rw_lock_shared(lck);
2851 return TRUE;
2852 }
2853
2854 return FALSE;
2855 }
2856
2857 /*
2858 * Routine: kdp_lck_mtx_lock_spin_is_acquired
2859 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2860 */
2861 boolean_t
2862 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
2863 {
2864 uintptr_t state;
2865
2866 if (not_in_kdp) {
2867 panic("panic: spinlock acquired check done outside of kernel debugger");
2868 }
2869 state = ordered_load_mtx(lck);
2870 if (state == LCK_MTX_TAG_DESTROYED) {
2871 return FALSE;
2872 }
2873 if (LCK_MTX_STATE_TO_THREAD(state) || (state & LCK_ILOCK)) {
2874 return TRUE;
2875 }
2876 return FALSE;
2877 }
2878
2879 void
2880 kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2881 {
2882 lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
2883 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
2884 uintptr_t state = ordered_load_mtx(mutex);
2885 thread_t holder = LCK_MTX_STATE_TO_THREAD(state);
2886 if ((uintptr_t)holder == (uintptr_t)LCK_MTX_SPIN_TAG) {
2887 waitinfo->owner = STACKSHOT_WAITOWNER_MTXSPIN;
2888 } else {
2889 assertf(state != (uintptr_t)LCK_MTX_TAG_DESTROYED, "state=0x%llx", (uint64_t)state);
2890 assertf(state != (uintptr_t)LCK_MTX_TAG_INDIRECT, "state=0x%llx", (uint64_t)state);
2891 waitinfo->owner = thread_tid(holder);
2892 }
2893 }
2894
2895 void
2896 kdp_rwlck_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2897 {
2898 lck_rw_t *rwlck = NULL;
2899 switch (waitinfo->wait_type) {
2900 case kThreadWaitKernelRWLockRead:
2901 rwlck = READ_EVENT_TO_RWLOCK(event);
2902 break;
2903 case kThreadWaitKernelRWLockWrite:
2904 case kThreadWaitKernelRWLockUpgrade:
2905 rwlck = WRITE_EVENT_TO_RWLOCK(event);
2906 break;
2907 default:
2908 panic("%s was called with an invalid blocking type", __FUNCTION__);
2909 break;
2910 }
2911 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
2912 waitinfo->owner = thread_tid(rwlck->lck_rw_owner);
2913 }