]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm/locks_arm.c
xnu-4903.270.47.tar.gz
[apple/xnu.git] / osfmk / arm / locks_arm.c
1 /*
2 * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
33 * Mellon University All Rights Reserved.
34 *
35 * Permission to use, copy, modify and distribute this software and its
36 * documentation is hereby granted, provided that both the copyright notice
37 * and this permission notice appear in all copies of the software,
38 * derivative works or modified versions, and any portions thereof, and that
39 * both notices appear in supporting documentation.
40 *
41 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
42 * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
43 * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
44 *
45 * Carnegie Mellon requests users of this software to return to
46 *
47 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
48 * School of Computer Science Carnegie Mellon University Pittsburgh PA
49 * 15213-3890
50 *
51 * any improvements or extensions that they make and grant Carnegie Mellon the
52 * rights to redistribute these changes.
53 */
54 /*
55 * File: kern/lock.c
56 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * Date: 1985
58 *
59 * Locking primitives implementation
60 */
61
62 #define ATOMIC_PRIVATE 1
63 #define LOCK_PRIVATE 1
64
65 #include <mach_ldebug.h>
66
67 #include <kern/kalloc.h>
68 #include <kern/lock_stat.h>
69 #include <kern/locks.h>
70 #include <kern/misc_protos.h>
71 #include <kern/thread.h>
72 #include <kern/processor.h>
73 #include <kern/sched_prim.h>
74 #include <kern/xpr.h>
75 #include <kern/debug.h>
76 #include <kern/kcdata.h>
77 #include <string.h>
78
79 #include <arm/cpu_data_internal.h>
80 #include <arm/proc_reg.h>
81 #include <arm/smp.h>
82 #include <machine/atomic.h>
83 #include <machine/machine_cpu.h>
84
85 #include <sys/kdebug.h>
86
87 #if CONFIG_DTRACE
88 #define DTRACE_RW_SHARED 0x0 //reader
89 #define DTRACE_RW_EXCL 0x1 //writer
90 #define DTRACE_NO_FLAG 0x0 //not applicable
91 #endif /* CONFIG_DTRACE */
92
93 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
94 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
95 #define LCK_RW_LCK_SHARED_CODE 0x102
96 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
97 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
98 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
99
100
101 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
102
103 // Panic in tests that check lock usage correctness
104 // These are undesirable when in a panic or a debugger is runnning.
105 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
106
107 unsigned int LcksOpts = 0;
108
109 #define ADAPTIVE_SPIN_ENABLE 0x1
110
111 #if __SMP__
112 int lck_mtx_adaptive_spin_mode = ADAPTIVE_SPIN_ENABLE;
113 #else /* __SMP__ */
114 int lck_mtx_adaptive_spin_mode = 0;
115 #endif /* __SMP__ */
116
117 #define SPINWAIT_OWNER_CHECK_COUNT 4
118
119 typedef enum {
120 SPINWAIT_ACQUIRED, /* Got the lock. */
121 SPINWAIT_INTERLOCK, /* Got the interlock, no owner, but caller must finish acquiring the lock. */
122 SPINWAIT_DID_SPIN, /* Got the interlock, spun, but failed to get the lock. */
123 SPINWAIT_DID_NOT_SPIN, /* Got the interlock, did not spin. */
124 } spinwait_result_t;
125
126 #if CONFIG_DTRACE && __SMP__
127 extern uint64_t dtrace_spin_threshold;
128 #endif
129
130 /* Forwards */
131
132
133 #if USLOCK_DEBUG
134 /*
135 * Perform simple lock checks.
136 */
137 int uslock_check = 1;
138 int max_lock_loops = 100000000;
139 decl_simple_lock_data(extern, printf_lock)
140 decl_simple_lock_data(extern, panic_lock)
141 #endif /* USLOCK_DEBUG */
142
143 extern unsigned int not_in_kdp;
144
145 /*
146 * We often want to know the addresses of the callers
147 * of the various lock routines. However, this information
148 * is only used for debugging and statistics.
149 */
150 typedef void *pc_t;
151 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
152 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
153
154 #ifdef lint
155 /*
156 * Eliminate lint complaints about unused local pc variables.
157 */
158 #define OBTAIN_PC(pc, l) ++pc
159 #else /* lint */
160 #define OBTAIN_PC(pc, l)
161 #endif /* lint */
162
163
164 /*
165 * Portable lock package implementation of usimple_locks.
166 */
167
168 #if USLOCK_DEBUG
169 #define USLDBG(stmt) stmt
170 void usld_lock_init(usimple_lock_t, unsigned short);
171 void usld_lock_pre(usimple_lock_t, pc_t);
172 void usld_lock_post(usimple_lock_t, pc_t);
173 void usld_unlock(usimple_lock_t, pc_t);
174 void usld_lock_try_pre(usimple_lock_t, pc_t);
175 void usld_lock_try_post(usimple_lock_t, pc_t);
176 int usld_lock_common_checks(usimple_lock_t, const char *);
177 #else /* USLOCK_DEBUG */
178 #define USLDBG(stmt)
179 #endif /* USLOCK_DEBUG */
180
181 /*
182 * Owner thread pointer when lock held in spin mode
183 */
184 #define LCK_MTX_SPIN_TAG 0xfffffff0
185
186
187 #define interlock_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
188 #define interlock_try(lock) hw_lock_bit_try((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
189 #define interlock_unlock(lock) hw_unlock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
190 #define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT, LCK_GRP_NULL)
191 #define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
192
193 #define memory_barrier() __c11_atomic_thread_fence(memory_order_acq_rel_smp)
194 #define load_memory_barrier() __c11_atomic_thread_fence(memory_order_acquire_smp)
195 #define store_memory_barrier() __c11_atomic_thread_fence(memory_order_release_smp)
196
197 // Enforce program order of loads and stores.
198 #define ordered_load(target, type) \
199 __c11_atomic_load((_Atomic type *)(target), memory_order_relaxed)
200 #define ordered_store(target, type, value) \
201 __c11_atomic_store((_Atomic type *)(target), value, memory_order_relaxed)
202
203 #define ordered_load_mtx(lock) ordered_load(&(lock)->lck_mtx_data, uintptr_t)
204 #define ordered_store_mtx(lock, value) ordered_store(&(lock)->lck_mtx_data, uintptr_t, (value))
205 #define ordered_load_rw(lock) ordered_load(&(lock)->lck_rw_data, uint32_t)
206 #define ordered_store_rw(lock, value) ordered_store(&(lock)->lck_rw_data, uint32_t, (value))
207 #define ordered_load_rw_owner(lock) ordered_load(&(lock)->lck_rw_owner, thread_t)
208 #define ordered_store_rw_owner(lock, value) ordered_store(&(lock)->lck_rw_owner, thread_t, (value))
209 #define ordered_load_hw(lock) ordered_load(&(lock)->lock_data, uintptr_t)
210 #define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, uintptr_t, (value))
211 #define ordered_load_bit(lock) ordered_load((lock), uint32_t)
212 #define ordered_store_bit(lock, value) ordered_store((lock), uint32_t, (value))
213
214
215 // Prevent the compiler from reordering memory operations around this
216 #define compiler_memory_fence() __asm__ volatile ("" ::: "memory")
217
218 #define LOCK_PANIC_TIMEOUT 0xc00000
219 #define NOINLINE __attribute__((noinline))
220
221
222 #if __arm__
223 #define interrupts_disabled(mask) (mask & PSR_INTMASK)
224 #else
225 #define interrupts_disabled(mask) (mask & DAIF_IRQF)
226 #endif
227
228
229 #if __arm__
230 #define enable_fiq() __asm__ volatile ("cpsie f" ::: "memory");
231 #define enable_interrupts() __asm__ volatile ("cpsie if" ::: "memory");
232 #endif
233
234 /*
235 * Forward declarations
236 */
237
238 static void lck_rw_lock_shared_gen(lck_rw_t *lck);
239 static void lck_rw_lock_exclusive_gen(lck_rw_t *lck);
240 static boolean_t lck_rw_lock_shared_to_exclusive_success(lck_rw_t *lck);
241 static boolean_t lck_rw_lock_shared_to_exclusive_failure(lck_rw_t *lck, uint32_t prior_lock_state);
242 static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t *lck, uint32_t prior_lock_state);
243 static lck_rw_type_t lck_rw_done_gen(lck_rw_t *lck, uint32_t prior_lock_state);
244 static boolean_t lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait);
245
246 /*
247 * atomic exchange API is a low level abstraction of the operations
248 * to atomically read, modify, and write a pointer. This abstraction works
249 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
250 * well as the ARM exclusive instructions.
251 *
252 * atomic_exchange_begin() - begin exchange and retrieve current value
253 * atomic_exchange_complete() - conclude an exchange
254 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
255 */
256 static uint32_t
257 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
258 {
259 uint32_t val;
260
261 val = load_exclusive32(target, ord);
262 *previous = val;
263 return val;
264 }
265
266 static boolean_t
267 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
268 {
269 (void)previous; // Previous not needed, monitor is held
270 return store_exclusive32(target, newval, ord);
271 }
272
273 static void
274 atomic_exchange_abort(void)
275 {
276 clear_exclusive();
277 }
278
279 static boolean_t
280 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
281 {
282 uint32_t value, prev;
283
284 for (;;) {
285 value = atomic_exchange_begin32(target, &prev, ord);
286 if (value & test_mask) {
287 if (wait) {
288 wait_for_event(); // Wait with monitor held
289 } else {
290 atomic_exchange_abort(); // Clear exclusive monitor
291 }
292 return FALSE;
293 }
294 value |= set_mask;
295 if (atomic_exchange_complete32(target, prev, value, ord)) {
296 return TRUE;
297 }
298 }
299 }
300
301 void
302 _disable_preemption(void)
303 {
304 thread_t thread = current_thread();
305 unsigned int count;
306
307 count = thread->machine.preemption_count + 1;
308 ordered_store(&thread->machine.preemption_count, unsigned int, count);
309 }
310
311 void
312 _enable_preemption(void)
313 {
314 thread_t thread = current_thread();
315 long state;
316 unsigned int count;
317 #if __arm__
318 #define INTERRUPT_MASK PSR_IRQF
319 #else // __arm__
320 #define INTERRUPT_MASK DAIF_IRQF
321 #endif // __arm__
322
323 count = thread->machine.preemption_count;
324 if (count == 0) {
325 panic("Preemption count negative"); // Count will go negative when released
326 }
327 count--;
328 if (count > 0) {
329 goto update_count; // Preemption is still disabled, just update
330 }
331 state = get_interrupts(); // Get interrupt state
332 if (state & INTERRUPT_MASK) {
333 goto update_count; // Interrupts are already masked, can't take AST here
334 }
335 disable_interrupts_noread(); // Disable interrupts
336 ordered_store(&thread->machine.preemption_count, unsigned int, count);
337 if (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
338 #if __arm__
339 #if __ARM_USER_PROTECT__
340 uintptr_t up = arm_user_protect_begin(thread);
341 #endif // __ARM_USER_PROTECT__
342 enable_fiq();
343 #endif // __arm__
344 ast_taken_kernel(); // Handle urgent AST
345 #if __arm__
346 #if __ARM_USER_PROTECT__
347 arm_user_protect_end(thread, up, TRUE);
348 #endif // __ARM_USER_PROTECT__
349 enable_interrupts();
350 return; // Return early on arm only due to FIQ enabling
351 #endif // __arm__
352 }
353 restore_interrupts(state); // Enable interrupts
354 return;
355
356 update_count:
357 ordered_store(&thread->machine.preemption_count, unsigned int, count);
358 return;
359 }
360
361 int
362 get_preemption_level(void)
363 {
364 return current_thread()->machine.preemption_count;
365 }
366
367 #if __SMP__
368 static unsigned int
369 hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp));
370 #endif
371
372 static inline unsigned int
373 hw_lock_bit_to_internal(hw_lock_bit_t *lock, unsigned int bit, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp))
374 {
375 unsigned int success = 0;
376 uint32_t mask = (1 << bit);
377 #if !__SMP__
378 uint32_t state;
379 #endif
380
381 #if __SMP__
382 if (__improbable(!atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE))) {
383 success = hw_lock_bit_to_contended(lock, mask, timeout LCK_GRP_ARG(grp));
384 } else {
385 success = 1;
386 }
387 #else // __SMP__
388 (void)timeout;
389 state = ordered_load_bit(lock);
390 if (!(mask & state)) {
391 ordered_store_bit(lock, state | mask);
392 success = 1;
393 }
394 #endif // __SMP__
395
396 if (success) {
397 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
398 }
399
400 return success;
401 }
402
403 unsigned
404 int
405 (hw_lock_bit_to)(hw_lock_bit_t * lock, unsigned int bit, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp))
406 {
407 _disable_preemption();
408 return hw_lock_bit_to_internal(lock, bit, timeout LCK_GRP_ARG(grp));
409 }
410
411 #if __SMP__
412 static unsigned int NOINLINE
413 hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp))
414 {
415 uint64_t end = 0;
416 int i;
417 #if CONFIG_DTRACE || LOCK_STATS
418 uint64_t begin = 0;
419 boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
420 #endif /* CONFIG_DTRACE || LOCK_STATS */
421
422 #if LOCK_STATS || CONFIG_DTRACE
423 if (__improbable(stat_enabled)) {
424 begin = mach_absolute_time();
425 }
426 #endif /* LOCK_STATS || CONFIG_DTRACE */
427 for (;;) {
428 for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
429 // Always load-exclusive before wfe
430 // This grabs the monitor and wakes up on a release event
431 if (atomic_test_and_set32(lock, mask, mask, memory_order_acquire, TRUE)) {
432 goto end;
433 }
434 }
435 if (end == 0) {
436 end = ml_get_timebase() + timeout;
437 } else if (ml_get_timebase() >= end) {
438 break;
439 }
440 }
441 return 0;
442 end:
443 #if CONFIG_DTRACE || LOCK_STATS
444 if (__improbable(stat_enabled)) {
445 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp), mach_absolute_time() - begin);
446 }
447 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
448 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
449
450 return 1;
451 }
452 #endif // __SMP__
453
454 void
455 (hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
456 {
457 if (hw_lock_bit_to(lock, bit, LOCK_PANIC_TIMEOUT, LCK_GRP_PROBEARG(grp))) {
458 return;
459 }
460 #if __SMP__
461 panic("hw_lock_bit(): timed out (%p)", lock);
462 #else
463 panic("hw_lock_bit(): interlock held (%p)", lock);
464 #endif
465 }
466
467 void
468 (hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
469 {
470 if (__improbable(get_preemption_level() == 0)) {
471 panic("Attempt to take no-preempt bitlock %p in preemptible context", lock);
472 }
473 if (hw_lock_bit_to_internal(lock, bit, LOCK_PANIC_TIMEOUT LCK_GRP_ARG(grp))) {
474 return;
475 }
476 #if __SMP__
477 panic("hw_lock_bit_nopreempt(): timed out (%p)", lock);
478 #else
479 panic("hw_lock_bit_nopreempt(): interlock held (%p)", lock);
480 #endif
481 }
482
483 unsigned
484 int
485 (hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
486 {
487 uint32_t mask = (1 << bit);
488 #if !__SMP__
489 uint32_t state;
490 #endif
491 boolean_t success = FALSE;
492
493 _disable_preemption();
494 #if __SMP__
495 // TODO: consider weak (non-looping) atomic test-and-set
496 success = atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE);
497 #else
498 state = ordered_load_bit(lock);
499 if (!(mask & state)) {
500 ordered_store_bit(lock, state | mask);
501 success = TRUE;
502 }
503 #endif // __SMP__
504 if (!success) {
505 _enable_preemption();
506 }
507
508 if (success) {
509 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
510 }
511
512 return success;
513 }
514
515 static inline void
516 hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
517 {
518 uint32_t mask = (1 << bit);
519 #if !__SMP__
520 uint32_t state;
521 #endif
522
523 #if __SMP__
524 __c11_atomic_fetch_and((_Atomic uint32_t *)lock, ~mask, memory_order_release);
525 set_event();
526 #else // __SMP__
527 state = ordered_load_bit(lock);
528 ordered_store_bit(lock, state & ~mask);
529 #endif // __SMP__
530 #if CONFIG_DTRACE
531 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
532 #endif
533 }
534
535 /*
536 * Routine: hw_unlock_bit
537 *
538 * Release spin-lock. The second parameter is the bit number to test and set.
539 * Decrement the preemption level.
540 */
541 void
542 hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
543 {
544 hw_unlock_bit_internal(lock, bit);
545 _enable_preemption();
546 }
547
548 void
549 hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
550 {
551 if (__improbable(get_preemption_level() == 0)) {
552 panic("Attempt to release no-preempt bitlock %p in preemptible context", lock);
553 }
554 hw_unlock_bit_internal(lock, bit);
555 }
556
557 #if __SMP__
558 static inline boolean_t
559 interlock_try_disable_interrupts(
560 lck_mtx_t *mutex,
561 boolean_t *istate)
562 {
563 *istate = ml_set_interrupts_enabled(FALSE);
564
565 if (interlock_try(mutex)) {
566 return 1;
567 } else {
568 ml_set_interrupts_enabled(*istate);
569 return 0;
570 }
571 }
572
573 static inline void
574 interlock_unlock_enable_interrupts(
575 lck_mtx_t *mutex,
576 boolean_t istate)
577 {
578 interlock_unlock(mutex);
579 ml_set_interrupts_enabled(istate);
580 }
581 #endif /* __SMP__ */
582
583 /*
584 * Routine: lck_spin_alloc_init
585 */
586 lck_spin_t *
587 lck_spin_alloc_init(
588 lck_grp_t * grp,
589 lck_attr_t * attr)
590 {
591 lck_spin_t *lck;
592
593 if ((lck = (lck_spin_t *) kalloc(sizeof(lck_spin_t))) != 0) {
594 lck_spin_init(lck, grp, attr);
595 }
596
597 return lck;
598 }
599
600 /*
601 * Routine: lck_spin_free
602 */
603 void
604 lck_spin_free(
605 lck_spin_t * lck,
606 lck_grp_t * grp)
607 {
608 lck_spin_destroy(lck, grp);
609 kfree(lck, sizeof(lck_spin_t));
610 }
611
612 /*
613 * Routine: lck_spin_init
614 */
615 void
616 lck_spin_init(
617 lck_spin_t * lck,
618 lck_grp_t * grp,
619 __unused lck_attr_t * attr)
620 {
621 hw_lock_init(&lck->hwlock);
622 lck->type = LCK_SPIN_TYPE;
623 lck_grp_reference(grp);
624 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
625 store_memory_barrier();
626 }
627
628 /*
629 * arm_usimple_lock is a lck_spin_t without a group or attributes
630 */
631 void inline
632 arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
633 {
634 lck->type = LCK_SPIN_TYPE;
635 hw_lock_init(&lck->hwlock);
636 store_memory_barrier();
637 }
638
639
640 /*
641 * Routine: lck_spin_lock
642 */
643 void
644 lck_spin_lock(lck_spin_t *lock)
645 {
646 #if DEVELOPMENT || DEBUG
647 if (lock->type != LCK_SPIN_TYPE) {
648 panic("Invalid spinlock %p", lock);
649 }
650 #endif // DEVELOPMENT || DEBUG
651 hw_lock_lock(&lock->hwlock, LCK_GRP_NULL);
652 }
653
654 void
655 lck_spin_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
656 {
657 #pragma unused(grp)
658 #if DEVELOPMENT || DEBUG
659 if (lock->type != LCK_SPIN_TYPE) {
660 panic("Invalid spinlock %p", lock);
661 }
662 #endif // DEVELOPMENT || DEBUG
663 hw_lock_lock(&lock->hwlock, grp);
664 }
665
666 /*
667 * Routine: lck_spin_lock_nopreempt
668 */
669 void
670 lck_spin_lock_nopreempt(lck_spin_t *lock)
671 {
672 #if DEVELOPMENT || DEBUG
673 if (lock->type != LCK_SPIN_TYPE) {
674 panic("Invalid spinlock %p", lock);
675 }
676 #endif // DEVELOPMENT || DEBUG
677 hw_lock_lock_nopreempt(&lock->hwlock, LCK_GRP_NULL);
678 }
679
680 void
681 lck_spin_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
682 {
683 #pragma unused(grp)
684 #if DEVELOPMENT || DEBUG
685 if (lock->type != LCK_SPIN_TYPE) {
686 panic("Invalid spinlock %p", lock);
687 }
688 #endif // DEVELOPMENT || DEBUG
689 hw_lock_lock_nopreempt(&lock->hwlock, grp);
690 }
691
692 /*
693 * Routine: lck_spin_try_lock
694 */
695 int
696 lck_spin_try_lock(lck_spin_t *lock)
697 {
698 return hw_lock_try(&lock->hwlock, LCK_GRP_NULL);
699 }
700
701 int
702 lck_spin_try_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
703 {
704 #pragma unused(grp)
705 return hw_lock_try(&lock->hwlock, grp);
706 }
707
708 /*
709 * Routine: lck_spin_try_lock_nopreempt
710 */
711 int
712 lck_spin_try_lock_nopreempt(lck_spin_t *lock)
713 {
714 return hw_lock_try_nopreempt(&lock->hwlock, LCK_GRP_NULL);
715 }
716
717 int
718 lck_spin_try_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
719 {
720 #pragma unused(grp)
721 return hw_lock_try_nopreempt(&lock->hwlock, grp);
722 }
723
724 /*
725 * Routine: lck_spin_unlock
726 */
727 void
728 lck_spin_unlock(lck_spin_t *lock)
729 {
730 #if DEVELOPMENT || DEBUG
731 if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
732 panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
733 }
734 if (lock->type != LCK_SPIN_TYPE) {
735 panic("Invalid spinlock type %p", lock);
736 }
737 #endif // DEVELOPMENT || DEBUG
738 hw_lock_unlock(&lock->hwlock);
739 }
740
741 /*
742 * Routine: lck_spin_unlock_nopreempt
743 */
744 void
745 lck_spin_unlock_nopreempt(lck_spin_t *lock)
746 {
747 #if DEVELOPMENT || DEBUG
748 if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
749 panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
750 }
751 if (lock->type != LCK_SPIN_TYPE) {
752 panic("Invalid spinlock type %p", lock);
753 }
754 #endif // DEVELOPMENT || DEBUG
755 hw_lock_unlock_nopreempt(&lock->hwlock);
756 }
757
758 /*
759 * Routine: lck_spin_destroy
760 */
761 void
762 lck_spin_destroy(
763 lck_spin_t * lck,
764 lck_grp_t * grp)
765 {
766 if (lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED) {
767 return;
768 }
769 lck->lck_spin_data = LCK_SPIN_TAG_DESTROYED;
770 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
771 lck_grp_deallocate(grp);
772 }
773
774 /*
775 * Routine: kdp_lck_spin_is_acquired
776 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
777 */
778 boolean_t
779 kdp_lck_spin_is_acquired(lck_spin_t *lck)
780 {
781 if (not_in_kdp) {
782 panic("panic: spinlock acquired check done outside of kernel debugger");
783 }
784 return ((lck->lck_spin_data & ~LCK_SPIN_TAG_DESTROYED) != 0) ? TRUE:FALSE;
785 }
786
787 /*
788 * Initialize a usimple_lock.
789 *
790 * No change in preemption state.
791 */
792 void
793 usimple_lock_init(
794 usimple_lock_t l,
795 unsigned short tag)
796 {
797 #ifndef MACHINE_SIMPLE_LOCK
798 USLDBG(usld_lock_init(l, tag));
799 hw_lock_init(&l->lck_spin_data);
800 #else
801 simple_lock_init((simple_lock_t) l, tag);
802 #endif
803 }
804
805
806 /*
807 * Acquire a usimple_lock.
808 *
809 * Returns with preemption disabled. Note
810 * that the hw_lock routines are responsible for
811 * maintaining preemption state.
812 */
813 void
814 (usimple_lock)(
815 usimple_lock_t l
816 LCK_GRP_ARG(lck_grp_t *grp))
817 {
818 #ifndef MACHINE_SIMPLE_LOCK
819 pc_t pc;
820
821 OBTAIN_PC(pc, l);
822 USLDBG(usld_lock_pre(l, pc));
823
824 if (!hw_lock_to(&l->lck_spin_data, LockTimeOut, LCK_GRP_ARG(grp))) { /* Try to get the lock
825 * with a timeout */
826 panic("simple lock deadlock detection - l=%p, cpu=%d, ret=%p", &l, cpu_number(), pc);
827 }
828
829 USLDBG(usld_lock_post(l, pc));
830 #else
831 simple_lock((simple_lock_t) l, LCK_GRP_PROBEARG(grp));
832 #endif
833 }
834
835
836 extern void sync(void);
837
838 /*
839 * Release a usimple_lock.
840 *
841 * Returns with preemption enabled. Note
842 * that the hw_lock routines are responsible for
843 * maintaining preemption state.
844 */
845 void
846 (usimple_unlock)(
847 usimple_lock_t l)
848 {
849 #ifndef MACHINE_SIMPLE_LOCK
850 pc_t pc;
851
852 OBTAIN_PC(pc, l);
853 USLDBG(usld_unlock(l, pc));
854 sync();
855 hw_lock_unlock(&l->lck_spin_data);
856 #else
857 simple_unlock((simple_lock_t)l);
858 #endif
859 }
860
861
862 /*
863 * Conditionally acquire a usimple_lock.
864 *
865 * On success, returns with preemption disabled.
866 * On failure, returns with preemption in the same state
867 * as when first invoked. Note that the hw_lock routines
868 * are responsible for maintaining preemption state.
869 *
870 * XXX No stats are gathered on a miss; I preserved this
871 * behavior from the original assembly-language code, but
872 * doesn't it make sense to log misses? XXX
873 */
874 unsigned
875 int
876 (usimple_lock_try)(
877 usimple_lock_t l
878 LCK_GRP_ARG(lck_grp_t *grp))
879 {
880 #ifndef MACHINE_SIMPLE_LOCK
881 pc_t pc;
882 unsigned int success;
883
884 OBTAIN_PC(pc, l);
885 USLDBG(usld_lock_try_pre(l, pc));
886 if ((success = hw_lock_try(&l->lck_spin_data LCK_GRP_ARG(grp)))) {
887 USLDBG(usld_lock_try_post(l, pc));
888 }
889 return success;
890 #else
891 return simple_lock_try((simple_lock_t) l, grp);
892 #endif
893 }
894
895 #if USLOCK_DEBUG
896 /*
897 * States of a usimple_lock. The default when initializing
898 * a usimple_lock is setting it up for debug checking.
899 */
900 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
901 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
902 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
903 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
904 #define USLOCK_CHECKING(l) (uslock_check && \
905 ((l)->debug.state & USLOCK_CHECKED))
906
907 /*
908 * Trace activities of a particularly interesting lock.
909 */
910 void usl_trace(usimple_lock_t, int, pc_t, const char *);
911
912
913 /*
914 * Initialize the debugging information contained
915 * in a usimple_lock.
916 */
917 void
918 usld_lock_init(
919 usimple_lock_t l,
920 __unused unsigned short tag)
921 {
922 if (l == USIMPLE_LOCK_NULL) {
923 panic("lock initialization: null lock pointer");
924 }
925 l->lock_type = USLOCK_TAG;
926 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
927 l->debug.lock_cpu = l->debug.unlock_cpu = 0;
928 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
929 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
930 l->debug.duration[0] = l->debug.duration[1] = 0;
931 l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
932 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
933 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
934 }
935
936
937 /*
938 * These checks apply to all usimple_locks, not just
939 * those with USLOCK_CHECKED turned on.
940 */
941 int
942 usld_lock_common_checks(
943 usimple_lock_t l,
944 const char *caller)
945 {
946 if (l == USIMPLE_LOCK_NULL) {
947 panic("%s: null lock pointer", caller);
948 }
949 if (l->lock_type != USLOCK_TAG) {
950 panic("%s: 0x%x is not a usimple lock", caller, (integer_t) l);
951 }
952 if (!(l->debug.state & USLOCK_INIT)) {
953 panic("%s: 0x%x is not an initialized lock",
954 caller, (integer_t) l);
955 }
956 return USLOCK_CHECKING(l);
957 }
958
959
960 /*
961 * Debug checks on a usimple_lock just before attempting
962 * to acquire it.
963 */
964 /* ARGSUSED */
965 void
966 usld_lock_pre(
967 usimple_lock_t l,
968 pc_t pc)
969 {
970 const char *caller = "usimple_lock";
971
972
973 if (!usld_lock_common_checks(l, caller)) {
974 return;
975 }
976
977 /*
978 * Note that we have a weird case where we are getting a lock when we are]
979 * in the process of putting the system to sleep. We are running with no
980 * current threads, therefore we can't tell if we are trying to retake a lock
981 * we have or someone on the other processor has it. Therefore we just
982 * ignore this test if the locking thread is 0.
983 */
984
985 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
986 l->debug.lock_thread == (void *) current_thread()) {
987 printf("%s: lock 0x%x already locked (at %p) by",
988 caller, (integer_t) l, l->debug.lock_pc);
989 printf(" current thread %p (new attempt at pc %p)\n",
990 l->debug.lock_thread, pc);
991 panic("%s", caller);
992 }
993 mp_disable_preemption();
994 usl_trace(l, cpu_number(), pc, caller);
995 mp_enable_preemption();
996 }
997
998
999 /*
1000 * Debug checks on a usimple_lock just after acquiring it.
1001 *
1002 * Pre-emption has been disabled at this point,
1003 * so we are safe in using cpu_number.
1004 */
1005 void
1006 usld_lock_post(
1007 usimple_lock_t l,
1008 pc_t pc)
1009 {
1010 int mycpu;
1011 const char *caller = "successful usimple_lock";
1012
1013
1014 if (!usld_lock_common_checks(l, caller)) {
1015 return;
1016 }
1017
1018 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
1019 panic("%s: lock 0x%x became uninitialized",
1020 caller, (integer_t) l);
1021 }
1022 if ((l->debug.state & USLOCK_TAKEN)) {
1023 panic("%s: lock 0x%x became TAKEN by someone else",
1024 caller, (integer_t) l);
1025 }
1026
1027 mycpu = cpu_number();
1028 l->debug.lock_thread = (void *) current_thread();
1029 l->debug.state |= USLOCK_TAKEN;
1030 l->debug.lock_pc = pc;
1031 l->debug.lock_cpu = mycpu;
1032
1033 usl_trace(l, mycpu, pc, caller);
1034 }
1035
1036
1037 /*
1038 * Debug checks on a usimple_lock just before
1039 * releasing it. Note that the caller has not
1040 * yet released the hardware lock.
1041 *
1042 * Preemption is still disabled, so there's
1043 * no problem using cpu_number.
1044 */
1045 void
1046 usld_unlock(
1047 usimple_lock_t l,
1048 pc_t pc)
1049 {
1050 int mycpu;
1051 const char *caller = "usimple_unlock";
1052
1053
1054 if (!usld_lock_common_checks(l, caller)) {
1055 return;
1056 }
1057
1058 mycpu = cpu_number();
1059
1060 if (!(l->debug.state & USLOCK_TAKEN)) {
1061 panic("%s: lock 0x%x hasn't been taken",
1062 caller, (integer_t) l);
1063 }
1064 if (l->debug.lock_thread != (void *) current_thread()) {
1065 panic("%s: unlocking lock 0x%x, owned by thread %p",
1066 caller, (integer_t) l, l->debug.lock_thread);
1067 }
1068 if (l->debug.lock_cpu != mycpu) {
1069 printf("%s: unlocking lock 0x%x on cpu 0x%x",
1070 caller, (integer_t) l, mycpu);
1071 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
1072 panic("%s", caller);
1073 }
1074 usl_trace(l, mycpu, pc, caller);
1075
1076 l->debug.unlock_thread = l->debug.lock_thread;
1077 l->debug.lock_thread = INVALID_PC;
1078 l->debug.state &= ~USLOCK_TAKEN;
1079 l->debug.unlock_pc = pc;
1080 l->debug.unlock_cpu = mycpu;
1081 }
1082
1083
1084 /*
1085 * Debug checks on a usimple_lock just before
1086 * attempting to acquire it.
1087 *
1088 * Preemption isn't guaranteed to be disabled.
1089 */
1090 void
1091 usld_lock_try_pre(
1092 usimple_lock_t l,
1093 pc_t pc)
1094 {
1095 const char *caller = "usimple_lock_try";
1096
1097 if (!usld_lock_common_checks(l, caller)) {
1098 return;
1099 }
1100 mp_disable_preemption();
1101 usl_trace(l, cpu_number(), pc, caller);
1102 mp_enable_preemption();
1103 }
1104
1105
1106 /*
1107 * Debug checks on a usimple_lock just after
1108 * successfully attempting to acquire it.
1109 *
1110 * Preemption has been disabled by the
1111 * lock acquisition attempt, so it's safe
1112 * to use cpu_number.
1113 */
1114 void
1115 usld_lock_try_post(
1116 usimple_lock_t l,
1117 pc_t pc)
1118 {
1119 int mycpu;
1120 const char *caller = "successful usimple_lock_try";
1121
1122 if (!usld_lock_common_checks(l, caller)) {
1123 return;
1124 }
1125
1126 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
1127 panic("%s: lock 0x%x became uninitialized",
1128 caller, (integer_t) l);
1129 }
1130 if ((l->debug.state & USLOCK_TAKEN)) {
1131 panic("%s: lock 0x%x became TAKEN by someone else",
1132 caller, (integer_t) l);
1133 }
1134
1135 mycpu = cpu_number();
1136 l->debug.lock_thread = (void *) current_thread();
1137 l->debug.state |= USLOCK_TAKEN;
1138 l->debug.lock_pc = pc;
1139 l->debug.lock_cpu = mycpu;
1140
1141 usl_trace(l, mycpu, pc, caller);
1142 }
1143
1144
1145 /*
1146 * For very special cases, set traced_lock to point to a
1147 * specific lock of interest. The result is a series of
1148 * XPRs showing lock operations on that lock. The lock_seq
1149 * value is used to show the order of those operations.
1150 */
1151 usimple_lock_t traced_lock;
1152 unsigned int lock_seq;
1153
1154 void
1155 usl_trace(
1156 usimple_lock_t l,
1157 int mycpu,
1158 pc_t pc,
1159 const char *op_name)
1160 {
1161 if (traced_lock == l) {
1162 XPR(XPR_SLOCK,
1163 "seq %d, cpu %d, %s @ %x\n",
1164 (integer_t) lock_seq, (integer_t) mycpu,
1165 (integer_t) op_name, (integer_t) pc, 0);
1166 lock_seq++;
1167 }
1168 }
1169
1170
1171 #endif /* USLOCK_DEBUG */
1172
1173 /*
1174 * The C portion of the shared/exclusive locks package.
1175 */
1176
1177 /*
1178 * compute the deadline to spin against when
1179 * waiting for a change of state on a lck_rw_t
1180 */
1181 #if __SMP__
1182 static inline uint64_t
1183 lck_rw_deadline_for_spin(lck_rw_t *lck)
1184 {
1185 lck_rw_word_t word;
1186
1187 word.data = ordered_load_rw(lck);
1188 if (word.can_sleep) {
1189 if (word.r_waiting || word.w_waiting || (word.shared_count > machine_info.max_cpus)) {
1190 /*
1191 * there are already threads waiting on this lock... this
1192 * implies that they have spun beyond their deadlines waiting for
1193 * the desired state to show up so we will not bother spinning at this time...
1194 * or
1195 * the current number of threads sharing this lock exceeds our capacity to run them
1196 * concurrently and since all states we're going to spin for require the rw_shared_count
1197 * to be at 0, we'll not bother spinning since the latency for this to happen is
1198 * unpredictable...
1199 */
1200 return mach_absolute_time();
1201 }
1202 return mach_absolute_time() + MutexSpin;
1203 } else {
1204 return mach_absolute_time() + (100000LL * 1000000000LL);
1205 }
1206 }
1207 #endif // __SMP__
1208
1209 static boolean_t
1210 lck_rw_drain_status(lck_rw_t *lock, uint32_t status_mask, boolean_t wait __unused)
1211 {
1212 #if __SMP__
1213 uint64_t deadline = 0;
1214 uint32_t data;
1215
1216 if (wait) {
1217 deadline = lck_rw_deadline_for_spin(lock);
1218 }
1219
1220 for (;;) {
1221 data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp);
1222 if ((data & status_mask) == 0) {
1223 break;
1224 }
1225 if (wait) {
1226 wait_for_event();
1227 } else {
1228 clear_exclusive();
1229 }
1230 if (!wait || (mach_absolute_time() >= deadline)) {
1231 return FALSE;
1232 }
1233 }
1234 clear_exclusive();
1235 return TRUE;
1236 #else
1237 uint32_t data;
1238
1239 data = ordered_load_rw(lock);
1240 if ((data & status_mask) == 0) {
1241 return TRUE;
1242 } else {
1243 return FALSE;
1244 }
1245 #endif // __SMP__
1246 }
1247
1248 /*
1249 * Spin while interlock is held.
1250 */
1251 static inline void
1252 lck_rw_interlock_spin(lck_rw_t *lock)
1253 {
1254 #if __SMP__
1255 uint32_t data;
1256
1257 for (;;) {
1258 data = load_exclusive32(&lock->lck_rw_data, memory_order_relaxed);
1259 if (data & LCK_RW_INTERLOCK) {
1260 wait_for_event();
1261 } else {
1262 clear_exclusive();
1263 return;
1264 }
1265 }
1266 #else
1267 panic("lck_rw_interlock_spin(): Interlock locked %p %x", lock, lock->lck_rw_data);
1268 #endif
1269 }
1270
1271 /*
1272 * We disable interrupts while holding the RW interlock to prevent an
1273 * interrupt from exacerbating hold time.
1274 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
1275 */
1276 static inline boolean_t
1277 lck_interlock_lock(lck_rw_t *lck)
1278 {
1279 boolean_t istate;
1280
1281 istate = ml_set_interrupts_enabled(FALSE);
1282 lck_rw_ilk_lock(lck);
1283 return istate;
1284 }
1285
1286 static inline void
1287 lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
1288 {
1289 lck_rw_ilk_unlock(lck);
1290 ml_set_interrupts_enabled(istate);
1291 }
1292
1293
1294 #define LCK_RW_GRAB_WANT 0
1295 #define LCK_RW_GRAB_SHARED 1
1296
1297 static boolean_t
1298 lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait)
1299 {
1300 uint64_t deadline = 0;
1301 uint32_t data, prev;
1302 boolean_t do_exch;
1303
1304 #if __SMP__
1305 if (wait) {
1306 deadline = lck_rw_deadline_for_spin(lock);
1307 }
1308 #else
1309 wait = FALSE; // Don't spin on UP systems
1310 #endif
1311
1312 for (;;) {
1313 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1314 if (data & LCK_RW_INTERLOCK) {
1315 atomic_exchange_abort();
1316 lck_rw_interlock_spin(lock);
1317 continue;
1318 }
1319 do_exch = FALSE;
1320 if (mode == LCK_RW_GRAB_WANT) {
1321 if ((data & LCK_RW_WANT_EXCL) == 0) {
1322 data |= LCK_RW_WANT_EXCL;
1323 do_exch = TRUE;
1324 }
1325 } else { // LCK_RW_GRAB_SHARED
1326 if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) ||
1327 (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
1328 data += LCK_RW_SHARED_READER;
1329 do_exch = TRUE;
1330 }
1331 }
1332 if (do_exch) {
1333 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1334 return TRUE;
1335 }
1336 } else {
1337 if (wait) { // Non-waiting
1338 wait_for_event();
1339 } else {
1340 atomic_exchange_abort();
1341 }
1342 if (!wait || (mach_absolute_time() >= deadline)) {
1343 return FALSE;
1344 }
1345 }
1346 }
1347 }
1348
1349
1350 /*
1351 * Routine: lck_rw_alloc_init
1352 */
1353 lck_rw_t *
1354 lck_rw_alloc_init(
1355 lck_grp_t *grp,
1356 lck_attr_t *attr)
1357 {
1358 lck_rw_t *lck;
1359
1360 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) {
1361 lck_rw_init(lck, grp, attr);
1362 }
1363
1364 return lck;
1365 }
1366
1367 /*
1368 * Routine: lck_rw_free
1369 */
1370 void
1371 lck_rw_free(
1372 lck_rw_t *lck,
1373 lck_grp_t *grp)
1374 {
1375 lck_rw_destroy(lck, grp);
1376 kfree(lck, sizeof(lck_rw_t));
1377 }
1378
1379 /*
1380 * Routine: lck_rw_init
1381 */
1382 void
1383 lck_rw_init(
1384 lck_rw_t *lck,
1385 lck_grp_t *grp,
1386 lck_attr_t *attr)
1387 {
1388 if (attr == LCK_ATTR_NULL) {
1389 attr = &LockDefaultLckAttr;
1390 }
1391 memset(lck, 0, sizeof(lck_rw_t));
1392 lck->lck_rw_can_sleep = TRUE;
1393 if ((attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY) == 0) {
1394 lck->lck_rw_priv_excl = TRUE;
1395 }
1396
1397 lck_grp_reference(grp);
1398 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
1399 }
1400
1401
1402 /*
1403 * Routine: lck_rw_destroy
1404 */
1405 void
1406 lck_rw_destroy(
1407 lck_rw_t *lck,
1408 lck_grp_t *grp)
1409 {
1410 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) {
1411 return;
1412 }
1413 #if MACH_LDEBUG
1414 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
1415 #endif
1416 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
1417 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
1418 lck_grp_deallocate(grp);
1419 return;
1420 }
1421
1422 /*
1423 * Routine: lck_rw_lock
1424 */
1425 void
1426 lck_rw_lock(
1427 lck_rw_t *lck,
1428 lck_rw_type_t lck_rw_type)
1429 {
1430 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
1431 lck_rw_lock_shared(lck);
1432 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
1433 lck_rw_lock_exclusive(lck);
1434 } else {
1435 panic("lck_rw_lock(): Invalid RW lock type: %x", lck_rw_type);
1436 }
1437 }
1438
1439 /*
1440 * Routine: lck_rw_lock_exclusive
1441 */
1442 void
1443 lck_rw_lock_exclusive(lck_rw_t *lock)
1444 {
1445 thread_t thread = current_thread();
1446
1447 thread->rwlock_count++;
1448 if (atomic_test_and_set32(&lock->lck_rw_data,
1449 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
1450 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
1451 #if CONFIG_DTRACE
1452 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1453 #endif /* CONFIG_DTRACE */
1454 } else {
1455 lck_rw_lock_exclusive_gen(lock);
1456 }
1457 #if MACH_ASSERT
1458 thread_t owner = ordered_load_rw_owner(lock);
1459 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1460 #endif
1461 ordered_store_rw_owner(lock, thread);
1462 }
1463
1464 /*
1465 * Routine: lck_rw_lock_shared
1466 */
1467 void
1468 lck_rw_lock_shared(lck_rw_t *lock)
1469 {
1470 uint32_t data, prev;
1471
1472 current_thread()->rwlock_count++;
1473 for (;;) {
1474 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1475 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
1476 atomic_exchange_abort();
1477 lck_rw_lock_shared_gen(lock);
1478 break;
1479 }
1480 data += LCK_RW_SHARED_READER;
1481 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1482 break;
1483 }
1484 cpu_pause();
1485 }
1486 #if MACH_ASSERT
1487 thread_t owner = ordered_load_rw_owner(lock);
1488 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1489 #endif
1490 #if CONFIG_DTRACE
1491 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1492 #endif /* CONFIG_DTRACE */
1493 return;
1494 }
1495
1496 /*
1497 * Routine: lck_rw_lock_shared_to_exclusive
1498 */
1499 boolean_t
1500 lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
1501 {
1502 uint32_t data, prev;
1503
1504 for (;;) {
1505 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1506 if (data & LCK_RW_INTERLOCK) {
1507 atomic_exchange_abort();
1508 lck_rw_interlock_spin(lock);
1509 continue;
1510 }
1511 if (data & LCK_RW_WANT_UPGRADE) {
1512 data -= LCK_RW_SHARED_READER;
1513 if ((data & LCK_RW_SHARED_MASK) == 0) { /* we were the last reader */
1514 data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
1515 }
1516 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1517 return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
1518 }
1519 } else {
1520 data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
1521 data -= LCK_RW_SHARED_READER; /* and shed our read count */
1522 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1523 break;
1524 }
1525 }
1526 cpu_pause();
1527 }
1528 /* we now own the WANT_UPGRADE */
1529 if (data & LCK_RW_SHARED_MASK) { /* check to see if all of the readers are drained */
1530 lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
1531 }
1532 #if MACH_ASSERT
1533 thread_t owner = ordered_load_rw_owner(lock);
1534 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1535 #endif
1536 ordered_store_rw_owner(lock, current_thread());
1537 #if CONFIG_DTRACE
1538 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
1539 #endif /* CONFIG_DTRACE */
1540 return TRUE;
1541 }
1542
1543
1544 /*
1545 * Routine: lck_rw_lock_shared_to_exclusive_failure
1546 * Function:
1547 * Fast path code has already dropped our read
1548 * count and determined that someone else owns 'lck_rw_want_upgrade'
1549 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1550 * all we need to do here is determine if a wakeup is needed
1551 */
1552 static boolean_t
1553 lck_rw_lock_shared_to_exclusive_failure(
1554 lck_rw_t *lck,
1555 uint32_t prior_lock_state)
1556 {
1557 thread_t thread = current_thread();
1558 uint32_t rwlock_count;
1559
1560 /* Check if dropping the lock means that we need to unpromote */
1561 rwlock_count = thread->rwlock_count--;
1562 #if MACH_LDEBUG
1563 if (rwlock_count == 0) {
1564 panic("rw lock count underflow for thread %p", thread);
1565 }
1566 #endif
1567 if ((prior_lock_state & LCK_RW_W_WAITING) &&
1568 ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
1569 /*
1570 * Someone else has requested upgrade.
1571 * Since we've released the read lock, wake
1572 * him up if he's blocked waiting
1573 */
1574 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1575 }
1576
1577 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1578 /* sched_flags checked without lock, but will be rechecked while clearing */
1579 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
1580 }
1581
1582 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1583 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1584
1585 return FALSE;
1586 }
1587
1588 /*
1589 * Routine: lck_rw_lock_shared_to_exclusive_success
1590 * Function:
1591 * assembly fast path code has already dropped our read
1592 * count and successfully acquired 'lck_rw_want_upgrade'
1593 * we just need to wait for the rest of the readers to drain
1594 * and then we can return as the exclusive holder of this lock
1595 */
1596 static boolean_t
1597 lck_rw_lock_shared_to_exclusive_success(
1598 lck_rw_t *lock)
1599 {
1600 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1601 int slept = 0;
1602 lck_rw_word_t word;
1603 wait_result_t res;
1604 boolean_t istate;
1605 boolean_t not_shared;
1606
1607 #if CONFIG_DTRACE
1608 uint64_t wait_interval = 0;
1609 int readers_at_sleep = 0;
1610 boolean_t dtrace_ls_initialized = FALSE;
1611 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1612 #endif
1613
1614 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE)) {
1615 word.data = ordered_load_rw(lock);
1616 #if CONFIG_DTRACE
1617 if (dtrace_ls_initialized == FALSE) {
1618 dtrace_ls_initialized = TRUE;
1619 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1620 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1621 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1622 if (dtrace_ls_enabled) {
1623 /*
1624 * Either sleeping or spinning is happening,
1625 * start a timing of our delay interval now.
1626 */
1627 readers_at_sleep = word.shared_count;
1628 wait_interval = mach_absolute_time();
1629 }
1630 }
1631 #endif
1632
1633 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1634 trace_lck, word.shared_count, 0, 0, 0);
1635
1636 not_shared = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE);
1637
1638 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1639 trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
1640
1641 if (not_shared) {
1642 break;
1643 }
1644
1645 /*
1646 * if we get here, the spin deadline in lck_rw_wait_on_status()
1647 * has expired w/o the rw_shared_count having drained to 0
1648 * check to see if we're allowed to do a thread_block
1649 */
1650 if (word.can_sleep) {
1651 istate = lck_interlock_lock(lock);
1652
1653 word.data = ordered_load_rw(lock);
1654 if (word.shared_count != 0) {
1655 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1656 trace_lck, word.shared_count, 0, 0, 0);
1657
1658 word.w_waiting = 1;
1659 ordered_store_rw(lock, word.data);
1660
1661 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
1662 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1663 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1664 lck_interlock_unlock(lock, istate);
1665
1666 if (res == THREAD_WAITING) {
1667 res = thread_block(THREAD_CONTINUE_NULL);
1668 slept++;
1669 }
1670 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1671 trace_lck, res, slept, 0, 0);
1672 } else {
1673 lck_interlock_unlock(lock, istate);
1674 break;
1675 }
1676 }
1677 }
1678 #if CONFIG_DTRACE
1679 /*
1680 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1681 */
1682 if (dtrace_ls_enabled == TRUE) {
1683 if (slept == 0) {
1684 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
1685 } else {
1686 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
1687 mach_absolute_time() - wait_interval, 1,
1688 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1689 }
1690 }
1691 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
1692 #endif
1693 return TRUE;
1694 }
1695
1696
1697 /*
1698 * Routine: lck_rw_lock_exclusive_to_shared
1699 */
1700
1701 void
1702 lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
1703 {
1704 uint32_t data, prev;
1705
1706 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
1707 ordered_store_rw_owner(lock, THREAD_NULL);
1708 for (;;) {
1709 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
1710 if (data & LCK_RW_INTERLOCK) {
1711 #if __SMP__
1712 atomic_exchange_abort();
1713 lck_rw_interlock_spin(lock); /* wait for interlock to clear */
1714 continue;
1715 #else
1716 panic("lck_rw_lock_exclusive_to_shared(): Interlock locked (%p): %x", lock, data);
1717 #endif // __SMP__
1718 }
1719 data += LCK_RW_SHARED_READER;
1720 if (data & LCK_RW_WANT_UPGRADE) {
1721 data &= ~(LCK_RW_WANT_UPGRADE);
1722 } else {
1723 data &= ~(LCK_RW_WANT_EXCL);
1724 }
1725 if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL))) {
1726 data &= ~(LCK_RW_W_WAITING);
1727 }
1728 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
1729 break;
1730 }
1731 cpu_pause();
1732 }
1733 return lck_rw_lock_exclusive_to_shared_gen(lock, prev);
1734 }
1735
1736 /*
1737 * Routine: lck_rw_lock_exclusive_to_shared_gen
1738 * Function:
1739 * Fast path has already dropped
1740 * our exclusive state and bumped lck_rw_shared_count
1741 * all we need to do here is determine if anyone
1742 * needs to be awakened.
1743 */
1744 static void
1745 lck_rw_lock_exclusive_to_shared_gen(
1746 lck_rw_t *lck,
1747 uint32_t prior_lock_state)
1748 {
1749 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1750 lck_rw_word_t fake_lck;
1751
1752 /*
1753 * prior_lock state is a snapshot of the 1st word of the
1754 * lock in question... we'll fake up a pointer to it
1755 * and carefully not access anything beyond whats defined
1756 * in the first word of a lck_rw_t
1757 */
1758 fake_lck.data = prior_lock_state;
1759
1760 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1761 trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
1762
1763 /*
1764 * don't wake up anyone waiting to take the lock exclusively
1765 * since we hold a read count... when the read count drops to 0,
1766 * the writers will be woken.
1767 *
1768 * wake up any waiting readers if we don't have any writers waiting,
1769 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1770 */
1771 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
1772 thread_wakeup(LCK_RW_READER_EVENT(lck));
1773 }
1774
1775 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1776 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
1777
1778 #if CONFIG_DTRACE
1779 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1780 #endif
1781 }
1782
1783
1784 /*
1785 * Routine: lck_rw_try_lock
1786 */
1787 boolean_t
1788 lck_rw_try_lock(
1789 lck_rw_t *lck,
1790 lck_rw_type_t lck_rw_type)
1791 {
1792 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
1793 return lck_rw_try_lock_shared(lck);
1794 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
1795 return lck_rw_try_lock_exclusive(lck);
1796 } else {
1797 panic("lck_rw_try_lock(): Invalid rw lock type: %x", lck_rw_type);
1798 }
1799 return FALSE;
1800 }
1801
1802 /*
1803 * Routine: lck_rw_try_lock_shared
1804 */
1805
1806 boolean_t
1807 lck_rw_try_lock_shared(lck_rw_t *lock)
1808 {
1809 uint32_t data, prev;
1810
1811 for (;;) {
1812 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1813 if (data & LCK_RW_INTERLOCK) {
1814 #if __SMP__
1815 atomic_exchange_abort();
1816 lck_rw_interlock_spin(lock);
1817 continue;
1818 #else
1819 panic("lck_rw_try_lock_shared(): Interlock locked (%p): %x", lock, data);
1820 #endif
1821 }
1822 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1823 atomic_exchange_abort();
1824 return FALSE; /* lock is busy */
1825 }
1826 data += LCK_RW_SHARED_READER; /* Increment reader refcount */
1827 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1828 break;
1829 }
1830 cpu_pause();
1831 }
1832 #if MACH_ASSERT
1833 thread_t owner = ordered_load_rw_owner(lock);
1834 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1835 #endif
1836 current_thread()->rwlock_count++;
1837 #if CONFIG_DTRACE
1838 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1839 #endif /* CONFIG_DTRACE */
1840 return TRUE;
1841 }
1842
1843
1844 /*
1845 * Routine: lck_rw_try_lock_exclusive
1846 */
1847
1848 boolean_t
1849 lck_rw_try_lock_exclusive(lck_rw_t *lock)
1850 {
1851 uint32_t data, prev;
1852 thread_t thread;
1853
1854 for (;;) {
1855 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1856 if (data & LCK_RW_INTERLOCK) {
1857 #if __SMP__
1858 atomic_exchange_abort();
1859 lck_rw_interlock_spin(lock);
1860 continue;
1861 #else
1862 panic("lck_rw_try_lock_exclusive(): Interlock locked (%p): %x", lock, data);
1863 #endif
1864 }
1865 if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1866 atomic_exchange_abort();
1867 return FALSE;
1868 }
1869 data |= LCK_RW_WANT_EXCL;
1870 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1871 break;
1872 }
1873 cpu_pause();
1874 }
1875 thread = current_thread();
1876 thread->rwlock_count++;
1877 #if MACH_ASSERT
1878 thread_t owner = ordered_load_rw_owner(lock);
1879 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1880 #endif
1881 ordered_store_rw_owner(lock, thread);
1882 #if CONFIG_DTRACE
1883 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1884 #endif /* CONFIG_DTRACE */
1885 return TRUE;
1886 }
1887
1888
1889 /*
1890 * Routine: lck_rw_unlock
1891 */
1892 void
1893 lck_rw_unlock(
1894 lck_rw_t *lck,
1895 lck_rw_type_t lck_rw_type)
1896 {
1897 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
1898 lck_rw_unlock_shared(lck);
1899 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
1900 lck_rw_unlock_exclusive(lck);
1901 } else {
1902 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
1903 }
1904 }
1905
1906
1907 /*
1908 * Routine: lck_rw_unlock_shared
1909 */
1910 void
1911 lck_rw_unlock_shared(
1912 lck_rw_t *lck)
1913 {
1914 lck_rw_type_t ret;
1915
1916 assertf(lck->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
1917 assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
1918 ret = lck_rw_done(lck);
1919
1920 if (ret != LCK_RW_TYPE_SHARED) {
1921 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
1922 }
1923 }
1924
1925
1926 /*
1927 * Routine: lck_rw_unlock_exclusive
1928 */
1929 void
1930 lck_rw_unlock_exclusive(
1931 lck_rw_t *lck)
1932 {
1933 lck_rw_type_t ret;
1934
1935 assertf(lck->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
1936 ret = lck_rw_done(lck);
1937
1938 if (ret != LCK_RW_TYPE_EXCLUSIVE) {
1939 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
1940 }
1941 }
1942
1943
1944 /*
1945 * Routine: lck_rw_lock_exclusive_gen
1946 */
1947 static void
1948 lck_rw_lock_exclusive_gen(
1949 lck_rw_t *lock)
1950 {
1951 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1952 lck_rw_word_t word;
1953 int slept = 0;
1954 boolean_t gotlock = 0;
1955 boolean_t not_shared_or_upgrade = 0;
1956 wait_result_t res = 0;
1957 boolean_t istate;
1958
1959 #if CONFIG_DTRACE
1960 boolean_t dtrace_ls_initialized = FALSE;
1961 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled = FALSE;
1962 uint64_t wait_interval = 0;
1963 int readers_at_sleep = 0;
1964 #endif
1965
1966 /*
1967 * Try to acquire the lck_rw_want_excl bit.
1968 */
1969 while (!lck_rw_grab(lock, LCK_RW_GRAB_WANT, FALSE)) {
1970 #if CONFIG_DTRACE
1971 if (dtrace_ls_initialized == FALSE) {
1972 dtrace_ls_initialized = TRUE;
1973 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1974 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1975 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1976 if (dtrace_ls_enabled) {
1977 /*
1978 * Either sleeping or spinning is happening,
1979 * start a timing of our delay interval now.
1980 */
1981 readers_at_sleep = lock->lck_rw_shared_count;
1982 wait_interval = mach_absolute_time();
1983 }
1984 }
1985 #endif
1986
1987 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1988
1989 gotlock = lck_rw_grab(lock, LCK_RW_GRAB_WANT, TRUE);
1990
1991 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
1992
1993 if (gotlock) {
1994 break;
1995 }
1996 /*
1997 * if we get here, the deadline has expired w/o us
1998 * being able to grab the lock exclusively
1999 * check to see if we're allowed to do a thread_block
2000 */
2001 word.data = ordered_load_rw(lock);
2002 if (word.can_sleep) {
2003 istate = lck_interlock_lock(lock);
2004 word.data = ordered_load_rw(lock);
2005
2006 if (word.want_excl) {
2007 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
2008
2009 word.w_waiting = 1;
2010 ordered_store_rw(lock, word.data);
2011
2012 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
2013 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
2014 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
2015 lck_interlock_unlock(lock, istate);
2016
2017 if (res == THREAD_WAITING) {
2018 res = thread_block(THREAD_CONTINUE_NULL);
2019 slept++;
2020 }
2021 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
2022 } else {
2023 word.want_excl = 1;
2024 ordered_store_rw(lock, word.data);
2025 lck_interlock_unlock(lock, istate);
2026 break;
2027 }
2028 }
2029 }
2030 /*
2031 * Wait for readers (and upgrades) to finish...
2032 */
2033 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE)) {
2034 #if CONFIG_DTRACE
2035 /*
2036 * Either sleeping or spinning is happening, start
2037 * a timing of our delay interval now. If we set it
2038 * to -1 we don't have accurate data so we cannot later
2039 * decide to record a dtrace spin or sleep event.
2040 */
2041 if (dtrace_ls_initialized == FALSE) {
2042 dtrace_ls_initialized = TRUE;
2043 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
2044 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
2045 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
2046 if (dtrace_ls_enabled) {
2047 /*
2048 * Either sleeping or spinning is happening,
2049 * start a timing of our delay interval now.
2050 */
2051 readers_at_sleep = lock->lck_rw_shared_count;
2052 wait_interval = mach_absolute_time();
2053 }
2054 }
2055 #endif
2056
2057 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
2058
2059 not_shared_or_upgrade = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, TRUE);
2060
2061 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, not_shared_or_upgrade, 0);
2062
2063 if (not_shared_or_upgrade) {
2064 break;
2065 }
2066 /*
2067 * if we get here, the deadline has expired w/o us
2068 * being able to grab the lock exclusively
2069 * check to see if we're allowed to do a thread_block
2070 */
2071 word.data = ordered_load_rw(lock);
2072 if (word.can_sleep) {
2073 istate = lck_interlock_lock(lock);
2074 word.data = ordered_load_rw(lock);
2075
2076 if (word.shared_count != 0 || word.want_upgrade) {
2077 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
2078
2079 word.w_waiting = 1;
2080 ordered_store_rw(lock, word.data);
2081
2082 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
2083 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
2084 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
2085 lck_interlock_unlock(lock, istate);
2086
2087 if (res == THREAD_WAITING) {
2088 res = thread_block(THREAD_CONTINUE_NULL);
2089 slept++;
2090 }
2091 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
2092 } else {
2093 lck_interlock_unlock(lock, istate);
2094 /*
2095 * must own the lock now, since we checked for
2096 * readers or upgrade owner behind the interlock
2097 * no need for a call to 'lck_rw_drain_status'
2098 */
2099 break;
2100 }
2101 }
2102 }
2103
2104 #if CONFIG_DTRACE
2105 /*
2106 * Decide what latencies we suffered that are Dtrace events.
2107 * If we have set wait_interval, then we either spun or slept.
2108 * At least we get out from under the interlock before we record
2109 * which is the best we can do here to minimize the impact
2110 * of the tracing.
2111 * If we have set wait_interval to -1, then dtrace was not enabled when we
2112 * started sleeping/spinning so we don't record this event.
2113 */
2114 if (dtrace_ls_enabled == TRUE) {
2115 if (slept == 0) {
2116 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
2117 mach_absolute_time() - wait_interval, 1);
2118 } else {
2119 /*
2120 * For the blocking case, we also record if when we blocked
2121 * it was held for read or write, and how many readers.
2122 * Notice that above we recorded this before we dropped
2123 * the interlock so the count is accurate.
2124 */
2125 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
2126 mach_absolute_time() - wait_interval, 1,
2127 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
2128 }
2129 }
2130 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
2131 #endif /* CONFIG_DTRACE */
2132 }
2133
2134 /*
2135 * Routine: lck_rw_done
2136 */
2137
2138 lck_rw_type_t
2139 lck_rw_done(lck_rw_t *lock)
2140 {
2141 uint32_t data, prev;
2142 boolean_t once = FALSE;
2143
2144 for (;;) {
2145 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
2146 if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
2147 #if __SMP__
2148 atomic_exchange_abort();
2149 lck_rw_interlock_spin(lock);
2150 continue;
2151 #else
2152 panic("lck_rw_done(): Interlock locked (%p): %x", lock, data);
2153 #endif // __SMP__
2154 }
2155 if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
2156 assertf(lock->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
2157 data -= LCK_RW_SHARED_READER;
2158 if ((data & LCK_RW_SHARED_MASK) == 0) { /* if reader count has now gone to 0, check for waiters */
2159 goto check_waiters;
2160 }
2161 } else { /* if reader count == 0, must be exclusive lock */
2162 if (data & LCK_RW_WANT_UPGRADE) {
2163 data &= ~(LCK_RW_WANT_UPGRADE);
2164 } else {
2165 if (data & LCK_RW_WANT_EXCL) {
2166 data &= ~(LCK_RW_WANT_EXCL);
2167 } else { /* lock is not 'owned', panic */
2168 panic("Releasing non-exclusive RW lock without a reader refcount!");
2169 }
2170 }
2171 if (!once) {
2172 // Only check for holder and clear it once
2173 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
2174 ordered_store_rw_owner(lock, THREAD_NULL);
2175 once = TRUE;
2176 }
2177 check_waiters:
2178 /*
2179 * test the original values to match what
2180 * lck_rw_done_gen is going to do to determine
2181 * which wakeups need to happen...
2182 *
2183 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
2184 */
2185 if (prev & LCK_RW_W_WAITING) {
2186 data &= ~(LCK_RW_W_WAITING);
2187 if ((prev & LCK_RW_PRIV_EXCL) == 0) {
2188 data &= ~(LCK_RW_R_WAITING);
2189 }
2190 } else {
2191 data &= ~(LCK_RW_R_WAITING);
2192 }
2193 }
2194 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
2195 break;
2196 }
2197 cpu_pause();
2198 }
2199 return lck_rw_done_gen(lock, prev);
2200 }
2201
2202 /*
2203 * Routine: lck_rw_done_gen
2204 *
2205 * called from the assembly language wrapper...
2206 * prior_lock_state is the value in the 1st
2207 * word of the lock at the time of a successful
2208 * atomic compare and exchange with the new value...
2209 * it represents the state of the lock before we
2210 * decremented the rw_shared_count or cleared either
2211 * rw_want_upgrade or rw_want_write and
2212 * the lck_x_waiting bits... since the wrapper
2213 * routine has already changed the state atomically,
2214 * we just need to decide if we should
2215 * wake up anyone and what value to return... we do
2216 * this by examining the state of the lock before
2217 * we changed it
2218 */
2219 static lck_rw_type_t
2220 lck_rw_done_gen(
2221 lck_rw_t *lck,
2222 uint32_t prior_lock_state)
2223 {
2224 lck_rw_word_t fake_lck;
2225 lck_rw_type_t lock_type;
2226 thread_t thread;
2227 uint32_t rwlock_count;
2228
2229 /*
2230 * prior_lock state is a snapshot of the 1st word of the
2231 * lock in question... we'll fake up a pointer to it
2232 * and carefully not access anything beyond whats defined
2233 * in the first word of a lck_rw_t
2234 */
2235 fake_lck.data = prior_lock_state;
2236
2237 if (fake_lck.shared_count <= 1) {
2238 if (fake_lck.w_waiting) {
2239 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
2240 }
2241
2242 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
2243 thread_wakeup(LCK_RW_READER_EVENT(lck));
2244 }
2245 }
2246 if (fake_lck.shared_count) {
2247 lock_type = LCK_RW_TYPE_SHARED;
2248 } else {
2249 lock_type = LCK_RW_TYPE_EXCLUSIVE;
2250 }
2251
2252 /* Check if dropping the lock means that we need to unpromote */
2253 thread = current_thread();
2254 rwlock_count = thread->rwlock_count--;
2255 #if MACH_LDEBUG
2256 if (rwlock_count == 0) {
2257 panic("rw lock count underflow for thread %p", thread);
2258 }
2259 #endif
2260 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2261 /* sched_flags checked without lock, but will be rechecked while clearing */
2262 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
2263 }
2264 #if CONFIG_DTRACE
2265 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
2266 #endif
2267 return lock_type;
2268 }
2269
2270 /*
2271 * Routine: lck_rw_lock_shared_gen
2272 * Function:
2273 * Fast path code has determined that this lock
2274 * is held exclusively... this is where we spin/block
2275 * until we can acquire the lock in the shared mode
2276 */
2277 static void
2278 lck_rw_lock_shared_gen(
2279 lck_rw_t *lck)
2280 {
2281 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
2282 lck_rw_word_t word;
2283 boolean_t gotlock = 0;
2284 int slept = 0;
2285 wait_result_t res = 0;
2286 boolean_t istate;
2287
2288 #if CONFIG_DTRACE
2289 uint64_t wait_interval = 0;
2290 int readers_at_sleep = 0;
2291 boolean_t dtrace_ls_initialized = FALSE;
2292 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
2293 #endif /* CONFIG_DTRACE */
2294
2295 while (!lck_rw_grab(lck, LCK_RW_GRAB_SHARED, FALSE)) {
2296 #if CONFIG_DTRACE
2297 if (dtrace_ls_initialized == FALSE) {
2298 dtrace_ls_initialized = TRUE;
2299 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
2300 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
2301 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
2302 if (dtrace_ls_enabled) {
2303 /*
2304 * Either sleeping or spinning is happening,
2305 * start a timing of our delay interval now.
2306 */
2307 readers_at_sleep = lck->lck_rw_shared_count;
2308 wait_interval = mach_absolute_time();
2309 }
2310 }
2311 #endif
2312
2313 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
2314 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
2315
2316 gotlock = lck_rw_grab(lck, LCK_RW_GRAB_SHARED, TRUE);
2317
2318 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
2319 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, gotlock, 0);
2320
2321 if (gotlock) {
2322 break;
2323 }
2324 /*
2325 * if we get here, the deadline has expired w/o us
2326 * being able to grab the lock for read
2327 * check to see if we're allowed to do a thread_block
2328 */
2329 if (lck->lck_rw_can_sleep) {
2330 istate = lck_interlock_lock(lck);
2331
2332 word.data = ordered_load_rw(lck);
2333 if ((word.want_excl || word.want_upgrade) &&
2334 ((word.shared_count == 0) || word.priv_excl)) {
2335 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
2336 trace_lck, word.want_excl, word.want_upgrade, 0, 0);
2337
2338 word.r_waiting = 1;
2339 ordered_store_rw(lck, word.data);
2340
2341 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
2342 res = assert_wait(LCK_RW_READER_EVENT(lck),
2343 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
2344 lck_interlock_unlock(lck, istate);
2345
2346 if (res == THREAD_WAITING) {
2347 res = thread_block(THREAD_CONTINUE_NULL);
2348 slept++;
2349 }
2350 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
2351 trace_lck, res, slept, 0, 0);
2352 } else {
2353 word.shared_count++;
2354 ordered_store_rw(lck, word.data);
2355 lck_interlock_unlock(lck, istate);
2356 break;
2357 }
2358 }
2359 }
2360
2361 #if CONFIG_DTRACE
2362 if (dtrace_ls_enabled == TRUE) {
2363 if (slept == 0) {
2364 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
2365 } else {
2366 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
2367 mach_absolute_time() - wait_interval, 0,
2368 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
2369 }
2370 }
2371 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
2372 #endif /* CONFIG_DTRACE */
2373 }
2374
2375
2376 void
2377 lck_rw_assert(
2378 lck_rw_t *lck,
2379 unsigned int type)
2380 {
2381 switch (type) {
2382 case LCK_RW_ASSERT_SHARED:
2383 if ((lck->lck_rw_shared_count != 0) &&
2384 (lck->lck_rw_owner == THREAD_NULL)) {
2385 return;
2386 }
2387 break;
2388 case LCK_RW_ASSERT_EXCLUSIVE:
2389 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2390 (lck->lck_rw_shared_count == 0) &&
2391 (lck->lck_rw_owner == current_thread())) {
2392 return;
2393 }
2394 break;
2395 case LCK_RW_ASSERT_HELD:
2396 if (lck->lck_rw_shared_count != 0) {
2397 return; // Held shared
2398 }
2399 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2400 (lck->lck_rw_owner == current_thread())) {
2401 return; // Held exclusive
2402 }
2403 break;
2404 case LCK_RW_ASSERT_NOTHELD:
2405 if ((lck->lck_rw_shared_count == 0) &&
2406 !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2407 (lck->lck_rw_owner == THREAD_NULL)) {
2408 return;
2409 }
2410 break;
2411 default:
2412 break;
2413 }
2414 panic("rw lock (%p)%s held (mode=%u)", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type);
2415 }
2416
2417
2418 /*
2419 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
2420 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2421 */
2422 boolean_t
2423 kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck)
2424 {
2425 if (not_in_kdp) {
2426 panic("panic: rw lock exclusive check done outside of kernel debugger");
2427 }
2428 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
2429 }
2430
2431 /*
2432 * The C portion of the mutex package. These routines are only invoked
2433 * if the optimized assembler routines can't do the work.
2434 */
2435
2436 /*
2437 * Forward declaration
2438 */
2439
2440 void
2441 lck_mtx_ext_init(
2442 lck_mtx_ext_t * lck,
2443 lck_grp_t * grp,
2444 lck_attr_t * attr);
2445
2446 /*
2447 * Routine: lck_mtx_alloc_init
2448 */
2449 lck_mtx_t *
2450 lck_mtx_alloc_init(
2451 lck_grp_t * grp,
2452 lck_attr_t * attr)
2453 {
2454 lck_mtx_t *lck;
2455
2456 if ((lck = (lck_mtx_t *) kalloc(sizeof(lck_mtx_t))) != 0) {
2457 lck_mtx_init(lck, grp, attr);
2458 }
2459
2460 return lck;
2461 }
2462
2463 /*
2464 * Routine: lck_mtx_free
2465 */
2466 void
2467 lck_mtx_free(
2468 lck_mtx_t * lck,
2469 lck_grp_t * grp)
2470 {
2471 lck_mtx_destroy(lck, grp);
2472 kfree(lck, sizeof(lck_mtx_t));
2473 }
2474
2475 /*
2476 * Routine: lck_mtx_init
2477 */
2478 void
2479 lck_mtx_init(
2480 lck_mtx_t * lck,
2481 lck_grp_t * grp,
2482 lck_attr_t * attr)
2483 {
2484 #ifdef BER_XXX
2485 lck_mtx_ext_t *lck_ext;
2486 #endif
2487 lck_attr_t *lck_attr;
2488
2489 if (attr != LCK_ATTR_NULL) {
2490 lck_attr = attr;
2491 } else {
2492 lck_attr = &LockDefaultLckAttr;
2493 }
2494
2495 #ifdef BER_XXX
2496 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2497 if ((lck_ext = (lck_mtx_ext_t *) kalloc(sizeof(lck_mtx_ext_t))) != 0) {
2498 lck_mtx_ext_init(lck_ext, grp, lck_attr);
2499 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2500 lck->lck_mtx_ptr = lck_ext;
2501 lck->lck_mtx_type = LCK_MTX_TYPE;
2502 }
2503 } else
2504 #endif
2505 {
2506 lck->lck_mtx_ptr = NULL; // Clear any padding in the union fields below
2507 lck->lck_mtx_waiters = 0;
2508 lck->lck_mtx_pri = 0;
2509 lck->lck_mtx_type = LCK_MTX_TYPE;
2510 ordered_store_mtx(lck, 0);
2511 }
2512 lck_grp_reference(grp);
2513 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2514 }
2515
2516 /*
2517 * Routine: lck_mtx_init_ext
2518 */
2519 void
2520 lck_mtx_init_ext(
2521 lck_mtx_t * lck,
2522 lck_mtx_ext_t * lck_ext,
2523 lck_grp_t * grp,
2524 lck_attr_t * attr)
2525 {
2526 lck_attr_t *lck_attr;
2527
2528 if (attr != LCK_ATTR_NULL) {
2529 lck_attr = attr;
2530 } else {
2531 lck_attr = &LockDefaultLckAttr;
2532 }
2533
2534 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2535 lck_mtx_ext_init(lck_ext, grp, lck_attr);
2536 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2537 lck->lck_mtx_ptr = lck_ext;
2538 lck->lck_mtx_type = LCK_MTX_TYPE;
2539 } else {
2540 lck->lck_mtx_waiters = 0;
2541 lck->lck_mtx_pri = 0;
2542 lck->lck_mtx_type = LCK_MTX_TYPE;
2543 ordered_store_mtx(lck, 0);
2544 }
2545 lck_grp_reference(grp);
2546 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2547 }
2548
2549 /*
2550 * Routine: lck_mtx_ext_init
2551 */
2552 void
2553 lck_mtx_ext_init(
2554 lck_mtx_ext_t * lck,
2555 lck_grp_t * grp,
2556 lck_attr_t * attr)
2557 {
2558 bzero((void *) lck, sizeof(lck_mtx_ext_t));
2559
2560 lck->lck_mtx.lck_mtx_type = LCK_MTX_TYPE;
2561
2562 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2563 lck->lck_mtx_deb.type = MUTEX_TAG;
2564 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
2565 }
2566 lck->lck_mtx_grp = grp;
2567
2568 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) {
2569 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
2570 }
2571 }
2572
2573 /* The slow versions */
2574 static void lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2575 static boolean_t lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread);
2576 static void lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2577
2578 /* The adaptive spin function */
2579 static spinwait_result_t lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2580
2581 /*
2582 * Routine: lck_mtx_verify
2583 *
2584 * Verify if a mutex is valid
2585 */
2586 static inline void
2587 lck_mtx_verify(lck_mtx_t *lock)
2588 {
2589 if (lock->lck_mtx_type != LCK_MTX_TYPE) {
2590 panic("Invalid mutex %p", lock);
2591 }
2592 #if DEVELOPMENT || DEBUG
2593 if (lock->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
2594 panic("Mutex destroyed %p", lock);
2595 }
2596 #endif /* DEVELOPMENT || DEBUG */
2597 }
2598
2599 /*
2600 * Routine: lck_mtx_check_preemption
2601 *
2602 * Verify preemption is enabled when attempting to acquire a mutex.
2603 */
2604
2605 static inline void
2606 lck_mtx_check_preemption(lck_mtx_t *lock)
2607 {
2608 #if DEVELOPMENT || DEBUG
2609 int pl = get_preemption_level();
2610
2611 if (pl != 0) {
2612 panic("Attempt to take mutex with preemption disabled. Lock=%p, level=%d", lock, pl);
2613 }
2614 #else
2615 (void)lock;
2616 #endif
2617 }
2618
2619 /*
2620 * Routine: lck_mtx_lock
2621 */
2622 void
2623 lck_mtx_lock(lck_mtx_t *lock)
2624 {
2625 thread_t thread;
2626
2627 lck_mtx_verify(lock);
2628 lck_mtx_check_preemption(lock);
2629 thread = current_thread();
2630 if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread),
2631 memory_order_acquire_smp, FALSE)) {
2632 #if CONFIG_DTRACE
2633 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
2634 #endif /* CONFIG_DTRACE */
2635 return;
2636 }
2637 lck_mtx_lock_contended(lock, thread, FALSE);
2638 }
2639
2640 /*
2641 * This is the slow version of mutex locking.
2642 */
2643 static void NOINLINE
2644 lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
2645 {
2646 thread_t holding_thread;
2647 uintptr_t state;
2648 int waiters = 0;
2649 spinwait_result_t sw_res;
2650
2651 /* Loop waiting until I see that the mutex is unowned */
2652 for (;;) {
2653 sw_res = lck_mtx_lock_contended_spinwait_arm(lock, thread, interlocked);
2654 interlocked = FALSE;
2655
2656 switch (sw_res) {
2657 case SPINWAIT_ACQUIRED:
2658 goto done;
2659 case SPINWAIT_INTERLOCK:
2660 goto set_owner;
2661 default:
2662 break;
2663 }
2664
2665 state = ordered_load_mtx(lock);
2666 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2667 if (holding_thread == NULL) {
2668 break;
2669 }
2670 ordered_store_mtx(lock, (state | LCK_ILOCK | ARM_LCK_WAITERS)); // Set waiters bit and wait
2671 lck_mtx_lock_wait(lock, holding_thread);
2672 /* returns interlock unlocked */
2673 }
2674
2675 set_owner:
2676 /* Hooray, I'm the new owner! */
2677 state = ordered_load_mtx(lock);
2678
2679 if (state & ARM_LCK_WAITERS) {
2680 /* Skip lck_mtx_lock_acquire if there are no waiters. */
2681 waiters = lck_mtx_lock_acquire(lock);
2682 }
2683
2684 state = LCK_MTX_THREAD_TO_STATE(thread);
2685 if (waiters != 0) {
2686 state |= ARM_LCK_WAITERS;
2687 }
2688 #if __SMP__
2689 state |= LCK_ILOCK; // Preserve interlock
2690 ordered_store_mtx(lock, state); // Set ownership
2691 interlock_unlock(lock); // Release interlock, enable preemption
2692 #else
2693 ordered_store_mtx(lock, state); // Set ownership
2694 enable_preemption();
2695 #endif
2696
2697 done:
2698 load_memory_barrier();
2699
2700 #if CONFIG_DTRACE
2701 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
2702 #endif /* CONFIG_DTRACE */
2703 }
2704
2705 /*
2706 * Routine: lck_mtx_lock_spinwait_arm
2707 *
2708 * Invoked trying to acquire a mutex when there is contention but
2709 * the holder is running on another processor. We spin for up to a maximum
2710 * time waiting for the lock to be released.
2711 */
2712 static spinwait_result_t
2713 lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
2714 {
2715 int has_interlock = (int)interlocked;
2716 #if __SMP__
2717 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
2718 thread_t holder;
2719 uint64_t overall_deadline;
2720 uint64_t check_owner_deadline;
2721 uint64_t cur_time;
2722 spinwait_result_t retval = SPINWAIT_DID_SPIN;
2723 int loopcount = 0;
2724 uintptr_t state;
2725 boolean_t istate;
2726
2727 if (__improbable(!(lck_mtx_adaptive_spin_mode & ADAPTIVE_SPIN_ENABLE))) {
2728 if (!has_interlock) {
2729 interlock_lock(lock);
2730 }
2731
2732 return SPINWAIT_DID_NOT_SPIN;
2733 }
2734
2735 state = ordered_load_mtx(lock);
2736
2737 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
2738 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, 0, 0);
2739
2740 cur_time = mach_absolute_time();
2741 overall_deadline = cur_time + MutexSpin;
2742 check_owner_deadline = cur_time;
2743
2744 if (has_interlock) {
2745 istate = ml_get_interrupts_enabled();
2746 }
2747
2748 /* Snoop the lock state */
2749 state = ordered_load_mtx(lock);
2750
2751 /*
2752 * Spin while:
2753 * - mutex is locked, and
2754 * - it's locked as a spin lock, and
2755 * - owner is running on another processor, and
2756 * - owner (processor) is not idling, and
2757 * - we haven't spun for long enough.
2758 */
2759 do {
2760 if (!(state & LCK_ILOCK) || has_interlock) {
2761 if (!has_interlock) {
2762 has_interlock = interlock_try_disable_interrupts(lock, &istate);
2763 }
2764
2765 if (has_interlock) {
2766 state = ordered_load_mtx(lock);
2767 holder = LCK_MTX_STATE_TO_THREAD(state);
2768
2769 if (holder == NULL) {
2770 retval = SPINWAIT_INTERLOCK;
2771
2772 if (istate) {
2773 ml_set_interrupts_enabled(istate);
2774 }
2775
2776 break;
2777 }
2778
2779 if (!(holder->machine.machine_thread_flags & MACHINE_THREAD_FLAGS_ON_CPU) ||
2780 (holder->state & TH_IDLE)) {
2781 if (loopcount == 0) {
2782 retval = SPINWAIT_DID_NOT_SPIN;
2783 }
2784
2785 if (istate) {
2786 ml_set_interrupts_enabled(istate);
2787 }
2788
2789 break;
2790 }
2791
2792 interlock_unlock_enable_interrupts(lock, istate);
2793 has_interlock = 0;
2794 }
2795 }
2796
2797 cur_time = mach_absolute_time();
2798
2799 if (cur_time >= overall_deadline) {
2800 break;
2801 }
2802
2803 check_owner_deadline = cur_time + (MutexSpin / SPINWAIT_OWNER_CHECK_COUNT);
2804
2805 if (cur_time < check_owner_deadline) {
2806 machine_delay_until(check_owner_deadline - cur_time, check_owner_deadline);
2807 }
2808
2809 /* Snoop the lock state */
2810 state = ordered_load_mtx(lock);
2811
2812 if (state == 0) {
2813 /* Try to grab the lock. */
2814 if (os_atomic_cmpxchg(&lock->lck_mtx_data,
2815 0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
2816 retval = SPINWAIT_ACQUIRED;
2817 break;
2818 }
2819 }
2820
2821 loopcount++;
2822 } while (TRUE);
2823
2824 #if CONFIG_DTRACE
2825 /*
2826 * We've already kept a count via overall_deadline of how long we spun.
2827 * If dtrace is active, then we compute backwards to decide how
2828 * long we spun.
2829 *
2830 * Note that we record a different probe id depending on whether
2831 * this is a direct or indirect mutex. This allows us to
2832 * penalize only lock groups that have debug/stats enabled
2833 * with dtrace processing if desired.
2834 */
2835 if (__probable(lock->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)) {
2836 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, lock,
2837 mach_absolute_time() - (overall_deadline - MutexSpin));
2838 } else {
2839 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, lock,
2840 mach_absolute_time() - (overall_deadline - MutexSpin));
2841 }
2842 /* The lockstat acquire event is recorded by the caller. */
2843 #endif
2844
2845 state = ordered_load_mtx(lock);
2846
2847 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
2848 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, retval, 0);
2849 #else /* __SMP__ */
2850 /* Spinwaiting is not useful on UP systems. */
2851 #pragma unused(lock, thread)
2852 int retval = SPINWAIT_DID_NOT_SPIN;
2853 #endif /* __SMP__ */
2854 if ((!has_interlock) && (retval != SPINWAIT_ACQUIRED)) {
2855 /* We must own either the lock or the interlock on return. */
2856 interlock_lock(lock);
2857 }
2858
2859 return retval;
2860 }
2861
2862 /*
2863 * Common code for mutex locking as spinlock
2864 */
2865 static inline void
2866 lck_mtx_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
2867 {
2868 uintptr_t state;
2869
2870 interlock_lock(lock);
2871 state = ordered_load_mtx(lock);
2872 if (LCK_MTX_STATE_TO_THREAD(state)) {
2873 if (allow_held_as_mutex) {
2874 lck_mtx_lock_contended(lock, current_thread(), TRUE);
2875 } else {
2876 // "Always" variants can never block. If the lock is held and blocking is not allowed
2877 // then someone is mixing always and non-always calls on the same lock, which is
2878 // forbidden.
2879 panic("Attempting to block on a lock taken as spin-always %p", lock);
2880 }
2881 return;
2882 }
2883 state &= ARM_LCK_WAITERS; // Preserve waiters bit
2884 state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK); // Add spin tag and maintain interlock
2885 ordered_store_mtx(lock, state);
2886 load_memory_barrier();
2887
2888 #if CONFIG_DTRACE
2889 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
2890 #endif /* CONFIG_DTRACE */
2891 }
2892
2893 /*
2894 * Routine: lck_mtx_lock_spin
2895 */
2896 void
2897 lck_mtx_lock_spin(lck_mtx_t *lock)
2898 {
2899 lck_mtx_check_preemption(lock);
2900 lck_mtx_lock_spin_internal(lock, TRUE);
2901 }
2902
2903 /*
2904 * Routine: lck_mtx_lock_spin_always
2905 */
2906 void
2907 lck_mtx_lock_spin_always(lck_mtx_t *lock)
2908 {
2909 lck_mtx_lock_spin_internal(lock, FALSE);
2910 }
2911
2912 /*
2913 * Routine: lck_mtx_try_lock
2914 */
2915 boolean_t
2916 lck_mtx_try_lock(lck_mtx_t *lock)
2917 {
2918 thread_t thread = current_thread();
2919
2920 lck_mtx_verify(lock);
2921 if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread),
2922 memory_order_acquire_smp, FALSE)) {
2923 #if CONFIG_DTRACE
2924 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, lock, 0);
2925 #endif /* CONFIG_DTRACE */
2926 return TRUE;
2927 }
2928 return lck_mtx_try_lock_contended(lock, thread);
2929 }
2930
2931 static boolean_t NOINLINE
2932 lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread)
2933 {
2934 thread_t holding_thread;
2935 uintptr_t state;
2936 int waiters;
2937
2938 #if __SMP__
2939 interlock_lock(lock);
2940 state = ordered_load_mtx(lock);
2941 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2942 if (holding_thread) {
2943 interlock_unlock(lock);
2944 return FALSE;
2945 }
2946 #else
2947 disable_preemption_for_thread(thread);
2948 state = ordered_load_mtx(lock);
2949 if (state & LCK_ILOCK) {
2950 panic("Unexpected interlock set (%p)", lock);
2951 }
2952 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2953 if (holding_thread) {
2954 enable_preemption();
2955 return FALSE;
2956 }
2957 state |= LCK_ILOCK;
2958 ordered_store_mtx(lock, state);
2959 #endif // __SMP__
2960 waiters = lck_mtx_lock_acquire(lock);
2961 state = LCK_MTX_THREAD_TO_STATE(thread);
2962 if (waiters != 0) {
2963 state |= ARM_LCK_WAITERS;
2964 }
2965 #if __SMP__
2966 state |= LCK_ILOCK; // Preserve interlock
2967 ordered_store_mtx(lock, state); // Set ownership
2968 interlock_unlock(lock); // Release interlock, enable preemption
2969 #else
2970 ordered_store_mtx(lock, state); // Set ownership
2971 enable_preemption();
2972 #endif
2973 load_memory_barrier();
2974 return TRUE;
2975 }
2976
2977 static inline boolean_t
2978 lck_mtx_try_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
2979 {
2980 uintptr_t state;
2981
2982 if (!interlock_try(lock)) {
2983 return FALSE;
2984 }
2985 state = ordered_load_mtx(lock);
2986 if (LCK_MTX_STATE_TO_THREAD(state)) {
2987 // Lock is held as mutex
2988 if (allow_held_as_mutex) {
2989 interlock_unlock(lock);
2990 } else {
2991 // "Always" variants can never block. If the lock is held as a normal mutex
2992 // then someone is mixing always and non-always calls on the same lock, which is
2993 // forbidden.
2994 panic("Spin-mutex held as full mutex %p", lock);
2995 }
2996 return FALSE;
2997 }
2998 state &= ARM_LCK_WAITERS; // Preserve waiters bit
2999 state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK); // Add spin tag and maintain interlock
3000 ordered_store_mtx(lock, state);
3001 load_memory_barrier();
3002
3003 #if CONFIG_DTRACE
3004 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, lock, 0);
3005 #endif /* CONFIG_DTRACE */
3006 return TRUE;
3007 }
3008
3009 /*
3010 * Routine: lck_mtx_try_lock_spin
3011 */
3012 boolean_t
3013 lck_mtx_try_lock_spin(lck_mtx_t *lock)
3014 {
3015 return lck_mtx_try_lock_spin_internal(lock, TRUE);
3016 }
3017
3018 /*
3019 * Routine: lck_mtx_try_lock_spin_always
3020 */
3021 boolean_t
3022 lck_mtx_try_lock_spin_always(lck_mtx_t *lock)
3023 {
3024 return lck_mtx_try_lock_spin_internal(lock, FALSE);
3025 }
3026
3027
3028
3029 /*
3030 * Routine: lck_mtx_unlock
3031 */
3032 void
3033 lck_mtx_unlock(lck_mtx_t *lock)
3034 {
3035 thread_t thread = current_thread();
3036 uintptr_t state;
3037 boolean_t ilk_held = FALSE;
3038
3039 lck_mtx_verify(lock);
3040
3041 state = ordered_load_mtx(lock);
3042 if (state & LCK_ILOCK) {
3043 if (LCK_MTX_STATE_TO_THREAD(state) == (thread_t)LCK_MTX_SPIN_TAG) {
3044 ilk_held = TRUE; // Interlock is held by (presumably) this thread
3045 }
3046 goto slow_case;
3047 }
3048 // Locked as a mutex
3049 if (atomic_compare_exchange(&lock->lck_mtx_data, LCK_MTX_THREAD_TO_STATE(thread), 0,
3050 memory_order_release_smp, FALSE)) {
3051 #if CONFIG_DTRACE
3052 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
3053 #endif /* CONFIG_DTRACE */
3054 return;
3055 }
3056 slow_case:
3057 lck_mtx_unlock_contended(lock, thread, ilk_held);
3058 }
3059
3060 static void NOINLINE
3061 lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t ilk_held)
3062 {
3063 uintptr_t state;
3064
3065 if (ilk_held) {
3066 state = ordered_load_mtx(lock);
3067 } else {
3068 #if __SMP__
3069 interlock_lock(lock);
3070 state = ordered_load_mtx(lock);
3071 if (thread != LCK_MTX_STATE_TO_THREAD(state)) {
3072 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
3073 }
3074 #else
3075 disable_preemption_for_thread(thread);
3076 state = ordered_load_mtx(lock);
3077 if (state & LCK_ILOCK) {
3078 panic("lck_mtx_unlock(): Unexpected interlock set (%p)", lock);
3079 }
3080 if (thread != LCK_MTX_STATE_TO_THREAD(state)) {
3081 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
3082 }
3083 state |= LCK_ILOCK;
3084 ordered_store_mtx(lock, state);
3085 #endif
3086 if (state & ARM_LCK_WAITERS) {
3087 lck_mtx_unlock_wakeup(lock, thread);
3088 state = ordered_load_mtx(lock);
3089 } else {
3090 assertf(lock->lck_mtx_pri == 0, "pri=0x%x", lock->lck_mtx_pri);
3091 }
3092 }
3093 state &= ARM_LCK_WAITERS; /* Clear state, retain waiters bit */
3094 #if __SMP__
3095 state |= LCK_ILOCK;
3096 ordered_store_mtx(lock, state);
3097 interlock_unlock(lock);
3098 #else
3099 ordered_store_mtx(lock, state);
3100 enable_preemption();
3101 #endif
3102
3103 #if CONFIG_DTRACE
3104 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
3105 #endif /* CONFIG_DTRACE */
3106 }
3107
3108 /*
3109 * Routine: lck_mtx_assert
3110 */
3111 void
3112 lck_mtx_assert(lck_mtx_t *lock, unsigned int type)
3113 {
3114 thread_t thread, holder;
3115 uintptr_t state;
3116
3117 state = ordered_load_mtx(lock);
3118 holder = LCK_MTX_STATE_TO_THREAD(state);
3119 if (holder == (thread_t)LCK_MTX_SPIN_TAG) {
3120 // Lock is held in spin mode, owner is unknown.
3121 return; // Punt
3122 }
3123 thread = current_thread();
3124 if (type == LCK_MTX_ASSERT_OWNED) {
3125 if (thread != holder) {
3126 panic("lck_mtx_assert(): mutex (%p) owned", lock);
3127 }
3128 } else if (type == LCK_MTX_ASSERT_NOTOWNED) {
3129 if (thread == holder) {
3130 panic("lck_mtx_assert(): mutex (%p) not owned", lock);
3131 }
3132 } else {
3133 panic("lck_mtx_assert(): invalid arg (%u)", type);
3134 }
3135 }
3136
3137 /*
3138 * Routine: lck_mtx_ilk_unlock
3139 */
3140 boolean_t
3141 lck_mtx_ilk_unlock(lck_mtx_t *lock)
3142 {
3143 interlock_unlock(lock);
3144 return TRUE;
3145 }
3146
3147 /*
3148 * Routine: lck_mtx_convert_spin
3149 *
3150 * Convert a mutex held for spin into a held full mutex
3151 */
3152 void
3153 lck_mtx_convert_spin(lck_mtx_t *lock)
3154 {
3155 thread_t thread = current_thread();
3156 uintptr_t state;
3157 int waiters;
3158
3159 state = ordered_load_mtx(lock);
3160 if (LCK_MTX_STATE_TO_THREAD(state) == thread) {
3161 return; // Already owned as mutex, return
3162 }
3163 if ((state & LCK_ILOCK) == 0 || (LCK_MTX_STATE_TO_THREAD(state) != (thread_t)LCK_MTX_SPIN_TAG)) {
3164 panic("lck_mtx_convert_spin: Not held as spinlock (%p)", lock);
3165 }
3166 state &= ~(LCK_MTX_THREAD_MASK); // Clear the spin tag
3167 ordered_store_mtx(lock, state);
3168 waiters = lck_mtx_lock_acquire(lock); // Acquire to manage priority boosts
3169 state = LCK_MTX_THREAD_TO_STATE(thread);
3170 if (waiters != 0) {
3171 state |= ARM_LCK_WAITERS;
3172 }
3173 #if __SMP__
3174 state |= LCK_ILOCK;
3175 ordered_store_mtx(lock, state); // Set ownership
3176 interlock_unlock(lock); // Release interlock, enable preemption
3177 #else
3178 ordered_store_mtx(lock, state); // Set ownership
3179 enable_preemption();
3180 #endif
3181 }
3182
3183
3184 /*
3185 * Routine: lck_mtx_destroy
3186 */
3187 void
3188 lck_mtx_destroy(
3189 lck_mtx_t * lck,
3190 lck_grp_t * grp)
3191 {
3192 if (lck->lck_mtx_type != LCK_MTX_TYPE) {
3193 panic("Destroying invalid mutex %p", lck);
3194 }
3195 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
3196 panic("Destroying previously destroyed lock %p", lck);
3197 }
3198 lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
3199 lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED;
3200 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
3201 lck_grp_deallocate(grp);
3202 return;
3203 }
3204
3205 /*
3206 * Routine: lck_spin_assert
3207 */
3208 void
3209 lck_spin_assert(lck_spin_t *lock, unsigned int type)
3210 {
3211 thread_t thread, holder;
3212 uintptr_t state;
3213
3214 if (lock->type != LCK_SPIN_TYPE) {
3215 panic("Invalid spinlock %p", lock);
3216 }
3217
3218 state = lock->lck_spin_data;
3219 holder = (thread_t)(state & ~LCK_ILOCK);
3220 thread = current_thread();
3221 if (type == LCK_ASSERT_OWNED) {
3222 if (holder == 0) {
3223 panic("Lock not owned %p = %lx", lock, state);
3224 }
3225 if (holder != thread) {
3226 panic("Lock not owned by current thread %p = %lx", lock, state);
3227 }
3228 if ((state & LCK_ILOCK) == 0) {
3229 panic("Lock bit not set %p = %lx", lock, state);
3230 }
3231 } else if (type == LCK_ASSERT_NOTOWNED) {
3232 if (holder != 0) {
3233 if (holder == thread) {
3234 panic("Lock owned by current thread %p = %lx", lock, state);
3235 } else {
3236 panic("Lock %p owned by thread %p", lock, holder);
3237 }
3238 }
3239 if (state & LCK_ILOCK) {
3240 panic("Lock bit set %p = %lx", lock, state);
3241 }
3242 } else {
3243 panic("lck_spin_assert(): invalid arg (%u)", type);
3244 }
3245 }
3246
3247 boolean_t
3248 lck_rw_lock_yield_shared(lck_rw_t *lck, boolean_t force_yield)
3249 {
3250 lck_rw_word_t word;
3251
3252 lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
3253
3254 word.data = ordered_load_rw(lck);
3255 if (word.want_excl || word.want_upgrade || force_yield) {
3256 lck_rw_unlock_shared(lck);
3257 mutex_pause(2);
3258 lck_rw_lock_shared(lck);
3259 return TRUE;
3260 }
3261
3262 return FALSE;
3263 }
3264
3265 /*
3266 * Routine: kdp_lck_mtx_lock_spin_is_acquired
3267 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
3268 */
3269 boolean_t
3270 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
3271 {
3272 uintptr_t state;
3273
3274 if (not_in_kdp) {
3275 panic("panic: spinlock acquired check done outside of kernel debugger");
3276 }
3277 state = ordered_load_mtx(lck);
3278 if (state == LCK_MTX_TAG_DESTROYED) {
3279 return FALSE;
3280 }
3281 if (LCK_MTX_STATE_TO_THREAD(state) || (state & LCK_ILOCK)) {
3282 return TRUE;
3283 }
3284 return FALSE;
3285 }
3286
3287 void
3288 kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
3289 {
3290 lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
3291 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
3292 uintptr_t state = ordered_load_mtx(mutex);
3293 thread_t holder = LCK_MTX_STATE_TO_THREAD(state);
3294 if ((uintptr_t)holder == (uintptr_t)LCK_MTX_SPIN_TAG) {
3295 waitinfo->owner = STACKSHOT_WAITOWNER_MTXSPIN;
3296 } else {
3297 assertf(state != (uintptr_t)LCK_MTX_TAG_DESTROYED, "state=0x%llx", (uint64_t)state);
3298 assertf(state != (uintptr_t)LCK_MTX_TAG_INDIRECT, "state=0x%llx", (uint64_t)state);
3299 waitinfo->owner = thread_tid(holder);
3300 }
3301 }
3302
3303 void
3304 kdp_rwlck_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
3305 {
3306 lck_rw_t *rwlck = NULL;
3307 switch (waitinfo->wait_type) {
3308 case kThreadWaitKernelRWLockRead:
3309 rwlck = READ_EVENT_TO_RWLOCK(event);
3310 break;
3311 case kThreadWaitKernelRWLockWrite:
3312 case kThreadWaitKernelRWLockUpgrade:
3313 rwlck = WRITE_EVENT_TO_RWLOCK(event);
3314 break;
3315 default:
3316 panic("%s was called with an invalid blocking type", __FUNCTION__);
3317 break;
3318 }
3319 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
3320 waitinfo->owner = thread_tid(rwlck->lck_rw_owner);
3321 }