]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm/locks_arm.c
xnu-4570.71.2.tar.gz
[apple/xnu.git] / osfmk / arm / locks_arm.c
1 /*
2 * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
33 * Mellon University All Rights Reserved.
34 *
35 * Permission to use, copy, modify and distribute this software and its
36 * documentation is hereby granted, provided that both the copyright notice
37 * and this permission notice appear in all copies of the software,
38 * derivative works or modified versions, and any portions thereof, and that
39 * both notices appear in supporting documentation.
40 *
41 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
42 * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
43 * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
44 *
45 * Carnegie Mellon requests users of this software to return to
46 *
47 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
48 * School of Computer Science Carnegie Mellon University Pittsburgh PA
49 * 15213-3890
50 *
51 * any improvements or extensions that they make and grant Carnegie Mellon the
52 * rights to redistribute these changes.
53 */
54 /*
55 * File: kern/lock.c
56 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * Date: 1985
58 *
59 * Locking primitives implementation
60 */
61
62 #define ATOMIC_PRIVATE 1
63 #define LOCK_PRIVATE 1
64
65 #include <mach_ldebug.h>
66
67 #include <kern/kalloc.h>
68 #include <kern/locks.h>
69 #include <kern/misc_protos.h>
70 #include <kern/thread.h>
71 #include <kern/processor.h>
72 #include <kern/sched_prim.h>
73 #include <kern/xpr.h>
74 #include <kern/debug.h>
75 #include <kern/kcdata.h>
76 #include <string.h>
77
78 #include <arm/cpu_data_internal.h>
79 #include <arm/proc_reg.h>
80 #include <arm/smp.h>
81 #include <machine/atomic.h>
82 #include <machine/machine_cpu.h>
83
84 #include <sys/kdebug.h>
85
86 /*
87 * We need only enough declarations from the BSD-side to be able to
88 * test if our probe is active, and to call __dtrace_probe(). Setting
89 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
90 */
91 #if CONFIG_DTRACE
92 #define NEED_DTRACE_DEFS
93 #include <../bsd/sys/lockstat.h>
94
95 #define DTRACE_RW_SHARED 0x0 //reader
96 #define DTRACE_RW_EXCL 0x1 //writer
97 #define DTRACE_NO_FLAG 0x0 //not applicable
98
99 #endif /* CONFIG_DTRACE */
100
101 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
102 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
103 #define LCK_RW_LCK_SHARED_CODE 0x102
104 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
105 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
106 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
107
108
109 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
110
111 // Panic in tests that check lock usage correctness
112 // These are undesirable when in a panic or a debugger is runnning.
113 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
114
115 unsigned int LcksOpts = 0;
116
117 #if CONFIG_DTRACE && __SMP__
118 extern uint64_t dtrace_spin_threshold;
119 #endif
120
121 /* Forwards */
122
123
124 #if USLOCK_DEBUG
125 /*
126 * Perform simple lock checks.
127 */
128 int uslock_check = 1;
129 int max_lock_loops = 100000000;
130 decl_simple_lock_data(extern, printf_lock)
131 decl_simple_lock_data(extern, panic_lock)
132 #endif /* USLOCK_DEBUG */
133
134 extern unsigned int not_in_kdp;
135
136 /*
137 * We often want to know the addresses of the callers
138 * of the various lock routines. However, this information
139 * is only used for debugging and statistics.
140 */
141 typedef void *pc_t;
142 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
143 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
144
145 #ifdef lint
146 /*
147 * Eliminate lint complaints about unused local pc variables.
148 */
149 #define OBTAIN_PC(pc,l) ++pc
150 #else /* lint */
151 #define OBTAIN_PC(pc,l)
152 #endif /* lint */
153
154
155 /*
156 * Portable lock package implementation of usimple_locks.
157 */
158
159 #if USLOCK_DEBUG
160 #define USLDBG(stmt) stmt
161 void usld_lock_init(usimple_lock_t, unsigned short);
162 void usld_lock_pre(usimple_lock_t, pc_t);
163 void usld_lock_post(usimple_lock_t, pc_t);
164 void usld_unlock(usimple_lock_t, pc_t);
165 void usld_lock_try_pre(usimple_lock_t, pc_t);
166 void usld_lock_try_post(usimple_lock_t, pc_t);
167 int usld_lock_common_checks(usimple_lock_t, const char *);
168 #else /* USLOCK_DEBUG */
169 #define USLDBG(stmt)
170 #endif /* USLOCK_DEBUG */
171
172 /*
173 * Owner thread pointer when lock held in spin mode
174 */
175 #define LCK_MTX_SPIN_TAG 0xfffffff0
176
177
178 #define interlock_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
179 #define interlock_try(lock) hw_lock_bit_try((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
180 #define interlock_unlock(lock) hw_unlock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
181 #define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
182 #define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
183
184 #define memory_barrier() __c11_atomic_thread_fence(memory_order_acq_rel_smp)
185 #define load_memory_barrier() __c11_atomic_thread_fence(memory_order_acquire_smp)
186 #define store_memory_barrier() __c11_atomic_thread_fence(memory_order_release_smp)
187
188 // Enforce program order of loads and stores.
189 #define ordered_load(target, type) \
190 __c11_atomic_load((_Atomic type *)(target), memory_order_relaxed)
191 #define ordered_store(target, type, value) \
192 __c11_atomic_store((_Atomic type *)(target), value, memory_order_relaxed)
193
194 #define ordered_load_mtx(lock) ordered_load(&(lock)->lck_mtx_data, uintptr_t)
195 #define ordered_store_mtx(lock, value) ordered_store(&(lock)->lck_mtx_data, uintptr_t, (value))
196 #define ordered_load_rw(lock) ordered_load(&(lock)->lck_rw_data, uint32_t)
197 #define ordered_store_rw(lock, value) ordered_store(&(lock)->lck_rw_data, uint32_t, (value))
198 #define ordered_load_rw_owner(lock) ordered_load(&(lock)->lck_rw_owner, thread_t)
199 #define ordered_store_rw_owner(lock, value) ordered_store(&(lock)->lck_rw_owner, thread_t, (value))
200 #define ordered_load_hw(lock) ordered_load(&(lock)->lock_data, uintptr_t)
201 #define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, uintptr_t, (value))
202 #define ordered_load_bit(lock) ordered_load((lock), uint32_t)
203 #define ordered_store_bit(lock, value) ordered_store((lock), uint32_t, (value))
204
205
206 // Prevent the compiler from reordering memory operations around this
207 #define compiler_memory_fence() __asm__ volatile ("" ::: "memory")
208
209 #define LOCK_PANIC_TIMEOUT 0xc00000
210 #define NOINLINE __attribute__((noinline))
211
212
213 #if __arm__
214 #define interrupts_disabled(mask) (mask & PSR_INTMASK)
215 #else
216 #define interrupts_disabled(mask) (mask & DAIF_IRQF)
217 #endif
218
219
220 #if __arm__
221 #define enable_fiq() __asm__ volatile ("cpsie f" ::: "memory");
222 #define enable_interrupts() __asm__ volatile ("cpsie if" ::: "memory");
223 #endif
224
225 /*
226 * Forward declarations
227 */
228
229 static void lck_rw_lock_shared_gen(lck_rw_t *lck);
230 static void lck_rw_lock_exclusive_gen(lck_rw_t *lck);
231 static boolean_t lck_rw_lock_shared_to_exclusive_success(lck_rw_t *lck);
232 static boolean_t lck_rw_lock_shared_to_exclusive_failure(lck_rw_t *lck, uint32_t prior_lock_state);
233 static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t *lck, uint32_t prior_lock_state);
234 static lck_rw_type_t lck_rw_done_gen(lck_rw_t *lck, uint32_t prior_lock_state);
235 void lck_rw_clear_promotions_x86(thread_t thread);
236 static boolean_t lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait);
237
238 /*
239 * atomic exchange API is a low level abstraction of the operations
240 * to atomically read, modify, and write a pointer. This abstraction works
241 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
242 * well as the ARM exclusive instructions.
243 *
244 * atomic_exchange_begin() - begin exchange and retrieve current value
245 * atomic_exchange_complete() - conclude an exchange
246 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
247 */
248 static uint32_t
249 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
250 {
251 uint32_t val;
252
253 val = load_exclusive32(target, ord);
254 *previous = val;
255 return val;
256 }
257
258 static boolean_t
259 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
260 {
261 (void)previous; // Previous not needed, monitor is held
262 return store_exclusive32(target, newval, ord);
263 }
264
265 static void
266 atomic_exchange_abort(void)
267 {
268 clear_exclusive();
269 }
270
271 static boolean_t
272 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
273 {
274 uint32_t value, prev;
275
276 for ( ; ; ) {
277 value = atomic_exchange_begin32(target, &prev, ord);
278 if (value & test_mask) {
279 if (wait)
280 wait_for_event(); // Wait with monitor held
281 else
282 atomic_exchange_abort(); // Clear exclusive monitor
283 return FALSE;
284 }
285 value |= set_mask;
286 if (atomic_exchange_complete32(target, prev, value, ord))
287 return TRUE;
288 }
289 }
290
291 void _disable_preemption(void)
292 {
293 thread_t thread = current_thread();
294 unsigned int count;
295
296 count = thread->machine.preemption_count + 1;
297 ordered_store(&thread->machine.preemption_count, unsigned int, count);
298 }
299
300 void _enable_preemption(void)
301 {
302 thread_t thread = current_thread();
303 long state;
304 unsigned int count;
305 #if __arm__
306 #define INTERRUPT_MASK PSR_IRQF
307 #else // __arm__
308 #define INTERRUPT_MASK DAIF_IRQF
309 #endif // __arm__
310
311 count = thread->machine.preemption_count;
312 if (count == 0)
313 panic("Preemption count negative"); // Count will go negative when released
314 count--;
315 if (count > 0)
316 goto update_count; // Preemption is still disabled, just update
317 state = get_interrupts(); // Get interrupt state
318 if (state & INTERRUPT_MASK)
319 goto update_count; // Interrupts are already masked, can't take AST here
320
321 disable_interrupts_noread(); // Disable interrupts
322 ordered_store(&thread->machine.preemption_count, unsigned int, count);
323 if (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
324 #if __arm__
325 #if __ARM_USER_PROTECT__
326 uintptr_t up = arm_user_protect_begin(thread);
327 #endif // __ARM_USER_PROTECT__
328 enable_fiq();
329 #endif // __arm__
330 ast_taken_kernel(); // Handle urgent AST
331 #if __arm__
332 #if __ARM_USER_PROTECT__
333 arm_user_protect_end(thread, up, TRUE);
334 #endif // __ARM_USER_PROTECT__
335 enable_interrupts();
336 return; // Return early on arm only due to FIQ enabling
337 #endif // __arm__
338 }
339 restore_interrupts(state); // Enable interrupts
340 return;
341
342 update_count:
343 ordered_store(&thread->machine.preemption_count, unsigned int, count);
344 return;
345 }
346
347 int get_preemption_level(void)
348 {
349 return current_thread()->machine.preemption_count;
350 }
351
352 /* Forward declarations for unexported functions that are used externally */
353 void hw_lock_bit(hw_lock_bit_t *lock, unsigned int bit);
354 void hw_unlock_bit(hw_lock_bit_t *lock, unsigned int bit);
355
356 #if __SMP__
357 static unsigned int
358 hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout);
359 #endif
360
361 unsigned int
362 hw_lock_bit_to(hw_lock_bit_t *lock, unsigned int bit, uint32_t timeout)
363 {
364 unsigned int success = 0;
365 uint32_t mask = (1 << bit);
366 #if !__SMP__
367 uint32_t state;
368 #endif
369
370 _disable_preemption();
371 #if __SMP__
372 if (__improbable(!atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE)))
373 success = hw_lock_bit_to_contended(lock, mask, timeout);
374 else
375 success = 1;
376 #else // __SMP__
377 (void)timeout;
378 state = ordered_load_bit(lock);
379 if (!(mask & state)) {
380 ordered_store_bit(lock, state | mask);
381 success = 1;
382 }
383 #endif // __SMP__
384
385 #if CONFIG_DTRACE
386 if (success)
387 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, bit);
388 #endif
389
390 return success;
391 }
392
393 #if __SMP__
394 static unsigned int NOINLINE
395 hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout)
396 {
397 uint64_t end = 0;
398 int i;
399 #if CONFIG_DTRACE
400 uint64_t begin;
401 boolean_t dtrace_enabled = lockstat_probemap[LS_LCK_SPIN_LOCK_SPIN] != 0;
402 if (__improbable(dtrace_enabled))
403 begin = mach_absolute_time();
404 #endif
405 for ( ; ; ) {
406 for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
407 // Always load-exclusive before wfe
408 // This grabs the monitor and wakes up on a release event
409 if (atomic_test_and_set32(lock, mask, mask, memory_order_acquire, TRUE)) {
410 goto end;
411 }
412 }
413 if (end == 0)
414 end = ml_get_timebase() + timeout;
415 else if (ml_get_timebase() >= end)
416 break;
417 }
418 return 0;
419 end:
420 #if CONFIG_DTRACE
421 if (__improbable(dtrace_enabled)) {
422 uint64_t spintime = mach_absolute_time() - begin;
423 if (spintime > dtrace_spin_threshold)
424 LOCKSTAT_RECORD2(LS_LCK_SPIN_LOCK_SPIN, lock, spintime, mask);
425 }
426 #endif
427 return 1;
428 }
429 #endif // __SMP__
430
431 void
432 hw_lock_bit(hw_lock_bit_t *lock, unsigned int bit)
433 {
434 if (hw_lock_bit_to(lock, bit, LOCK_PANIC_TIMEOUT))
435 return;
436 #if __SMP__
437 panic("hw_lock_bit(): timed out (%p)", lock);
438 #else
439 panic("hw_lock_bit(): interlock held (%p)", lock);
440 #endif
441 }
442
443 unsigned int
444 hw_lock_bit_try(hw_lock_bit_t *lock, unsigned int bit)
445 {
446 long intmask;
447 uint32_t mask = (1 << bit);
448 #if !__SMP__
449 uint32_t state;
450 #endif
451 boolean_t success = FALSE;
452
453 intmask = disable_interrupts();
454 #if __SMP__
455 // TODO: consider weak (non-looping) atomic test-and-set
456 success = atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE);
457 #else
458 state = ordered_load_bit(lock);
459 if (!(mask & state)) {
460 ordered_store_bit(lock, state | mask);
461 success = TRUE;
462 }
463 #endif // __SMP__
464 if (success)
465 disable_preemption();
466 restore_interrupts(intmask);
467
468 #if CONFIG_DTRACE
469 if (success)
470 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, bit);
471 #endif
472
473 return success;
474 }
475
476 /*
477 * Routine: hw_unlock_bit
478 *
479 * Release spin-lock. The second parameter is the bit number to test and set.
480 * Decrement the preemption level.
481 */
482 void
483 hw_unlock_bit(hw_lock_bit_t *lock, unsigned int bit)
484 {
485 uint32_t mask = (1 << bit);
486 #if !__SMP__
487 uint32_t state;
488 #endif
489
490 #if __SMP__
491 __c11_atomic_fetch_and((_Atomic uint32_t *)lock, ~mask, memory_order_release);
492 set_event();
493 #else // __SMP__
494 state = ordered_load_bit(lock);
495 ordered_store_bit(lock, state & ~mask);
496 #endif // __SMP__
497 #if CONFIG_DTRACE
498 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
499 #endif
500 enable_preemption();
501 }
502
503
504 /*
505 * Routine: lck_spin_alloc_init
506 */
507 lck_spin_t *
508 lck_spin_alloc_init(
509 lck_grp_t * grp,
510 lck_attr_t * attr)
511 {
512 lck_spin_t *lck;
513
514 if ((lck = (lck_spin_t *) kalloc(sizeof(lck_spin_t))) != 0)
515 lck_spin_init(lck, grp, attr);
516
517 return (lck);
518 }
519
520 /*
521 * Routine: lck_spin_free
522 */
523 void
524 lck_spin_free(
525 lck_spin_t * lck,
526 lck_grp_t * grp)
527 {
528 lck_spin_destroy(lck, grp);
529 kfree((void *) lck, sizeof(lck_spin_t));
530 }
531
532 /*
533 * Routine: lck_spin_init
534 */
535 void
536 lck_spin_init(
537 lck_spin_t * lck,
538 lck_grp_t * grp,
539 __unused lck_attr_t * attr)
540 {
541 hw_lock_init(&lck->hwlock);
542 lck->type = LCK_SPIN_TYPE;
543 lck_grp_reference(grp);
544 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
545 store_memory_barrier();
546 }
547
548 /*
549 * arm_usimple_lock is a lck_spin_t without a group or attributes
550 */
551 void inline
552 arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
553 {
554 lck->type = LCK_SPIN_TYPE;
555 hw_lock_init(&lck->hwlock);
556 store_memory_barrier();
557 }
558
559
560 /*
561 * Routine: lck_spin_lock
562 */
563 void
564 lck_spin_lock(lck_spin_t *lock)
565 {
566 #if DEVELOPMENT || DEBUG
567 if (lock->type != LCK_SPIN_TYPE)
568 panic("Invalid spinlock %p", lock);
569 #endif // DEVELOPMENT || DEBUG
570 hw_lock_lock(&lock->hwlock);
571 }
572
573 /*
574 * Routine: lck_spin_try_lock
575 */
576 int
577 lck_spin_try_lock(lck_spin_t *lock)
578 {
579 return hw_lock_try(&lock->hwlock);
580 }
581
582 /*
583 * Routine: lck_spin_unlock
584 */
585 void
586 lck_spin_unlock(lck_spin_t *lock)
587 {
588 #if DEVELOPMENT || DEBUG
589 if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC())
590 panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
591 if (lock->type != LCK_SPIN_TYPE)
592 panic("Invalid spinlock type %p", lock);
593 #endif // DEVELOPMENT || DEBUG
594 hw_lock_unlock(&lock->hwlock);
595 }
596
597 /*
598 * Routine: lck_spin_destroy
599 */
600 void
601 lck_spin_destroy(
602 lck_spin_t * lck,
603 lck_grp_t * grp)
604 {
605 if (lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED)
606 return;
607 lck->lck_spin_data = LCK_SPIN_TAG_DESTROYED;
608 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
609 lck_grp_deallocate(grp);
610 }
611
612 /*
613 * Routine: kdp_lck_spin_is_acquired
614 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
615 */
616 boolean_t
617 kdp_lck_spin_is_acquired(lck_spin_t *lck) {
618 if (not_in_kdp) {
619 panic("panic: spinlock acquired check done outside of kernel debugger");
620 }
621 return ((lck->lck_spin_data & ~LCK_SPIN_TAG_DESTROYED) != 0) ? TRUE:FALSE;
622 }
623
624 /*
625 * Initialize a usimple_lock.
626 *
627 * No change in preemption state.
628 */
629 void
630 usimple_lock_init(
631 usimple_lock_t l,
632 unsigned short tag)
633 {
634 #ifndef MACHINE_SIMPLE_LOCK
635 USLDBG(usld_lock_init(l, tag));
636 hw_lock_init(&l->lck_spin_data);
637 #else
638 simple_lock_init((simple_lock_t) l, tag);
639 #endif
640 }
641
642
643 /*
644 * Acquire a usimple_lock.
645 *
646 * Returns with preemption disabled. Note
647 * that the hw_lock routines are responsible for
648 * maintaining preemption state.
649 */
650 void
651 usimple_lock(
652 usimple_lock_t l)
653 {
654 #ifndef MACHINE_SIMPLE_LOCK
655 pc_t pc;
656
657 OBTAIN_PC(pc, l);
658 USLDBG(usld_lock_pre(l, pc));
659
660 if (!hw_lock_to(&l->lck_spin_data, LockTimeOut)) /* Try to get the lock
661 * with a timeout */
662 panic("simple lock deadlock detection - l=%p, cpu=%d, ret=%p", &l, cpu_number(), pc);
663
664 USLDBG(usld_lock_post(l, pc));
665 #else
666 simple_lock((simple_lock_t) l);
667 #endif
668 }
669
670
671 extern void sync(void);
672
673 /*
674 * Release a usimple_lock.
675 *
676 * Returns with preemption enabled. Note
677 * that the hw_lock routines are responsible for
678 * maintaining preemption state.
679 */
680 void
681 usimple_unlock(
682 usimple_lock_t l)
683 {
684 #ifndef MACHINE_SIMPLE_LOCK
685 pc_t pc;
686
687 OBTAIN_PC(pc, l);
688 USLDBG(usld_unlock(l, pc));
689 sync();
690 hw_lock_unlock(&l->lck_spin_data);
691 #else
692 simple_unlock((simple_lock_t) l);
693 #endif
694 }
695
696
697 /*
698 * Conditionally acquire a usimple_lock.
699 *
700 * On success, returns with preemption disabled.
701 * On failure, returns with preemption in the same state
702 * as when first invoked. Note that the hw_lock routines
703 * are responsible for maintaining preemption state.
704 *
705 * XXX No stats are gathered on a miss; I preserved this
706 * behavior from the original assembly-language code, but
707 * doesn't it make sense to log misses? XXX
708 */
709 unsigned int
710 usimple_lock_try(
711 usimple_lock_t l)
712 {
713 #ifndef MACHINE_SIMPLE_LOCK
714 pc_t pc;
715 unsigned int success;
716
717 OBTAIN_PC(pc, l);
718 USLDBG(usld_lock_try_pre(l, pc));
719 if ((success = hw_lock_try(&l->lck_spin_data))) {
720 USLDBG(usld_lock_try_post(l, pc));
721 }
722 return success;
723 #else
724 return (simple_lock_try((simple_lock_t) l));
725 #endif
726 }
727
728 #if USLOCK_DEBUG
729 /*
730 * States of a usimple_lock. The default when initializing
731 * a usimple_lock is setting it up for debug checking.
732 */
733 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
734 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
735 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
736 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
737 #define USLOCK_CHECKING(l) (uslock_check && \
738 ((l)->debug.state & USLOCK_CHECKED))
739
740 /*
741 * Trace activities of a particularly interesting lock.
742 */
743 void usl_trace(usimple_lock_t, int, pc_t, const char *);
744
745
746 /*
747 * Initialize the debugging information contained
748 * in a usimple_lock.
749 */
750 void
751 usld_lock_init(
752 usimple_lock_t l,
753 __unused unsigned short tag)
754 {
755 if (l == USIMPLE_LOCK_NULL)
756 panic("lock initialization: null lock pointer");
757 l->lock_type = USLOCK_TAG;
758 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
759 l->debug.lock_cpu = l->debug.unlock_cpu = 0;
760 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
761 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
762 l->debug.duration[0] = l->debug.duration[1] = 0;
763 l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
764 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
765 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
766 }
767
768
769 /*
770 * These checks apply to all usimple_locks, not just
771 * those with USLOCK_CHECKED turned on.
772 */
773 int
774 usld_lock_common_checks(
775 usimple_lock_t l,
776 const char *caller)
777 {
778 if (l == USIMPLE_LOCK_NULL)
779 panic("%s: null lock pointer", caller);
780 if (l->lock_type != USLOCK_TAG)
781 panic("%s: 0x%x is not a usimple lock", caller, (integer_t) l);
782 if (!(l->debug.state & USLOCK_INIT))
783 panic("%s: 0x%x is not an initialized lock",
784 caller, (integer_t) l);
785 return USLOCK_CHECKING(l);
786 }
787
788
789 /*
790 * Debug checks on a usimple_lock just before attempting
791 * to acquire it.
792 */
793 /* ARGSUSED */
794 void
795 usld_lock_pre(
796 usimple_lock_t l,
797 pc_t pc)
798 {
799 const char *caller = "usimple_lock";
800
801
802 if (!usld_lock_common_checks(l, caller))
803 return;
804
805 /*
806 * Note that we have a weird case where we are getting a lock when we are]
807 * in the process of putting the system to sleep. We are running with no
808 * current threads, therefore we can't tell if we are trying to retake a lock
809 * we have or someone on the other processor has it. Therefore we just
810 * ignore this test if the locking thread is 0.
811 */
812
813 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
814 l->debug.lock_thread == (void *) current_thread()) {
815 printf("%s: lock 0x%x already locked (at %p) by",
816 caller, (integer_t) l, l->debug.lock_pc);
817 printf(" current thread %p (new attempt at pc %p)\n",
818 l->debug.lock_thread, pc);
819 panic("%s", caller);
820 }
821 mp_disable_preemption();
822 usl_trace(l, cpu_number(), pc, caller);
823 mp_enable_preemption();
824 }
825
826
827 /*
828 * Debug checks on a usimple_lock just after acquiring it.
829 *
830 * Pre-emption has been disabled at this point,
831 * so we are safe in using cpu_number.
832 */
833 void
834 usld_lock_post(
835 usimple_lock_t l,
836 pc_t pc)
837 {
838 int mycpu;
839 const char *caller = "successful usimple_lock";
840
841
842 if (!usld_lock_common_checks(l, caller))
843 return;
844
845 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
846 panic("%s: lock 0x%x became uninitialized",
847 caller, (integer_t) l);
848 if ((l->debug.state & USLOCK_TAKEN))
849 panic("%s: lock 0x%x became TAKEN by someone else",
850 caller, (integer_t) l);
851
852 mycpu = cpu_number();
853 l->debug.lock_thread = (void *) current_thread();
854 l->debug.state |= USLOCK_TAKEN;
855 l->debug.lock_pc = pc;
856 l->debug.lock_cpu = mycpu;
857
858 usl_trace(l, mycpu, pc, caller);
859 }
860
861
862 /*
863 * Debug checks on a usimple_lock just before
864 * releasing it. Note that the caller has not
865 * yet released the hardware lock.
866 *
867 * Preemption is still disabled, so there's
868 * no problem using cpu_number.
869 */
870 void
871 usld_unlock(
872 usimple_lock_t l,
873 pc_t pc)
874 {
875 int mycpu;
876 const char *caller = "usimple_unlock";
877
878
879 if (!usld_lock_common_checks(l, caller))
880 return;
881
882 mycpu = cpu_number();
883
884 if (!(l->debug.state & USLOCK_TAKEN))
885 panic("%s: lock 0x%x hasn't been taken",
886 caller, (integer_t) l);
887 if (l->debug.lock_thread != (void *) current_thread())
888 panic("%s: unlocking lock 0x%x, owned by thread %p",
889 caller, (integer_t) l, l->debug.lock_thread);
890 if (l->debug.lock_cpu != mycpu) {
891 printf("%s: unlocking lock 0x%x on cpu 0x%x",
892 caller, (integer_t) l, mycpu);
893 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
894 panic("%s", caller);
895 }
896 usl_trace(l, mycpu, pc, caller);
897
898 l->debug.unlock_thread = l->debug.lock_thread;
899 l->debug.lock_thread = INVALID_PC;
900 l->debug.state &= ~USLOCK_TAKEN;
901 l->debug.unlock_pc = pc;
902 l->debug.unlock_cpu = mycpu;
903 }
904
905
906 /*
907 * Debug checks on a usimple_lock just before
908 * attempting to acquire it.
909 *
910 * Preemption isn't guaranteed to be disabled.
911 */
912 void
913 usld_lock_try_pre(
914 usimple_lock_t l,
915 pc_t pc)
916 {
917 const char *caller = "usimple_lock_try";
918
919 if (!usld_lock_common_checks(l, caller))
920 return;
921 mp_disable_preemption();
922 usl_trace(l, cpu_number(), pc, caller);
923 mp_enable_preemption();
924 }
925
926
927 /*
928 * Debug checks on a usimple_lock just after
929 * successfully attempting to acquire it.
930 *
931 * Preemption has been disabled by the
932 * lock acquisition attempt, so it's safe
933 * to use cpu_number.
934 */
935 void
936 usld_lock_try_post(
937 usimple_lock_t l,
938 pc_t pc)
939 {
940 int mycpu;
941 const char *caller = "successful usimple_lock_try";
942
943 if (!usld_lock_common_checks(l, caller))
944 return;
945
946 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
947 panic("%s: lock 0x%x became uninitialized",
948 caller, (integer_t) l);
949 if ((l->debug.state & USLOCK_TAKEN))
950 panic("%s: lock 0x%x became TAKEN by someone else",
951 caller, (integer_t) l);
952
953 mycpu = cpu_number();
954 l->debug.lock_thread = (void *) current_thread();
955 l->debug.state |= USLOCK_TAKEN;
956 l->debug.lock_pc = pc;
957 l->debug.lock_cpu = mycpu;
958
959 usl_trace(l, mycpu, pc, caller);
960 }
961
962
963 /*
964 * For very special cases, set traced_lock to point to a
965 * specific lock of interest. The result is a series of
966 * XPRs showing lock operations on that lock. The lock_seq
967 * value is used to show the order of those operations.
968 */
969 usimple_lock_t traced_lock;
970 unsigned int lock_seq;
971
972 void
973 usl_trace(
974 usimple_lock_t l,
975 int mycpu,
976 pc_t pc,
977 const char *op_name)
978 {
979 if (traced_lock == l) {
980 XPR(XPR_SLOCK,
981 "seq %d, cpu %d, %s @ %x\n",
982 (integer_t) lock_seq, (integer_t) mycpu,
983 (integer_t) op_name, (integer_t) pc, 0);
984 lock_seq++;
985 }
986 }
987
988
989 #endif /* USLOCK_DEBUG */
990
991 /*
992 * The C portion of the shared/exclusive locks package.
993 */
994
995 /*
996 * compute the deadline to spin against when
997 * waiting for a change of state on a lck_rw_t
998 */
999 #if __SMP__
1000 static inline uint64_t
1001 lck_rw_deadline_for_spin(lck_rw_t *lck)
1002 {
1003 lck_rw_word_t word;
1004
1005 word.data = ordered_load_rw(lck);
1006 if (word.can_sleep) {
1007 if (word.r_waiting || word.w_waiting || (word.shared_count > machine_info.max_cpus)) {
1008 /*
1009 * there are already threads waiting on this lock... this
1010 * implies that they have spun beyond their deadlines waiting for
1011 * the desired state to show up so we will not bother spinning at this time...
1012 * or
1013 * the current number of threads sharing this lock exceeds our capacity to run them
1014 * concurrently and since all states we're going to spin for require the rw_shared_count
1015 * to be at 0, we'll not bother spinning since the latency for this to happen is
1016 * unpredictable...
1017 */
1018 return (mach_absolute_time());
1019 }
1020 return (mach_absolute_time() + MutexSpin);
1021 } else
1022 return (mach_absolute_time() + (100000LL * 1000000000LL));
1023 }
1024 #endif // __SMP__
1025
1026 static boolean_t
1027 lck_rw_drain_status(lck_rw_t *lock, uint32_t status_mask, boolean_t wait __unused)
1028 {
1029 #if __SMP__
1030 uint64_t deadline = 0;
1031 uint32_t data;
1032
1033 if (wait)
1034 deadline = lck_rw_deadline_for_spin(lock);
1035
1036 for ( ; ; ) {
1037 data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp);
1038 if ((data & status_mask) == 0)
1039 break;
1040 if (wait)
1041 wait_for_event();
1042 else
1043 clear_exclusive();
1044 if (!wait || (mach_absolute_time() >= deadline))
1045 return FALSE;
1046 }
1047 clear_exclusive();
1048 return TRUE;
1049 #else
1050 uint32_t data;
1051
1052 data = ordered_load_rw(lock);
1053 if ((data & status_mask) == 0)
1054 return TRUE;
1055 else
1056 return FALSE;
1057 #endif // __SMP__
1058 }
1059
1060 /*
1061 * Spin while interlock is held.
1062 */
1063 static inline void
1064 lck_rw_interlock_spin(lck_rw_t *lock)
1065 {
1066 #if __SMP__
1067 uint32_t data;
1068
1069 for ( ; ; ) {
1070 data = load_exclusive32(&lock->lck_rw_data, memory_order_relaxed);
1071 if (data & LCK_RW_INTERLOCK)
1072 wait_for_event();
1073 else {
1074 clear_exclusive();
1075 return;
1076 }
1077 }
1078 #else
1079 panic("lck_rw_interlock_spin(): Interlock locked %p %x", lock, lock->lck_rw_data);
1080 #endif
1081 }
1082
1083 /*
1084 * We disable interrupts while holding the RW interlock to prevent an
1085 * interrupt from exacerbating hold time.
1086 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
1087 */
1088 static inline boolean_t
1089 lck_interlock_lock(lck_rw_t *lck)
1090 {
1091 boolean_t istate;
1092
1093 istate = ml_set_interrupts_enabled(FALSE);
1094 lck_rw_ilk_lock(lck);
1095 return istate;
1096 }
1097
1098 static inline void
1099 lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
1100 {
1101 lck_rw_ilk_unlock(lck);
1102 ml_set_interrupts_enabled(istate);
1103 }
1104
1105
1106 #define LCK_RW_GRAB_WANT 0
1107 #define LCK_RW_GRAB_SHARED 1
1108
1109 static boolean_t
1110 lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait)
1111 {
1112 uint64_t deadline = 0;
1113 uint32_t data, prev;
1114 boolean_t do_exch;
1115
1116 #if __SMP__
1117 if (wait)
1118 deadline = lck_rw_deadline_for_spin(lock);
1119 #else
1120 wait = FALSE; // Don't spin on UP systems
1121 #endif
1122
1123 for ( ; ; ) {
1124 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1125 if (data & LCK_RW_INTERLOCK) {
1126 atomic_exchange_abort();
1127 lck_rw_interlock_spin(lock);
1128 continue;
1129 }
1130 do_exch = FALSE;
1131 if (mode == LCK_RW_GRAB_WANT) {
1132 if ((data & LCK_RW_WANT_EXCL) == 0) {
1133 data |= LCK_RW_WANT_EXCL;
1134 do_exch = TRUE;
1135 }
1136 } else { // LCK_RW_GRAB_SHARED
1137 if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) ||
1138 (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
1139 data += LCK_RW_SHARED_READER;
1140 do_exch = TRUE;
1141 }
1142 }
1143 if (do_exch) {
1144 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1145 return TRUE;
1146 } else {
1147 if (wait) // Non-waiting
1148 wait_for_event();
1149 else
1150 atomic_exchange_abort();
1151 if (!wait || (mach_absolute_time() >= deadline))
1152 return FALSE;
1153 }
1154 }
1155 }
1156
1157
1158 /*
1159 * Routine: lck_rw_alloc_init
1160 */
1161 lck_rw_t *
1162 lck_rw_alloc_init(
1163 lck_grp_t *grp,
1164 lck_attr_t *attr)
1165 {
1166 lck_rw_t *lck;
1167
1168 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0)
1169 lck_rw_init(lck, grp, attr);
1170
1171 return lck;
1172 }
1173
1174 /*
1175 * Routine: lck_rw_free
1176 */
1177 void
1178 lck_rw_free(
1179 lck_rw_t *lck,
1180 lck_grp_t *grp)
1181 {
1182 lck_rw_destroy(lck, grp);
1183 kfree(lck, sizeof(lck_rw_t));
1184 }
1185
1186 /*
1187 * Routine: lck_rw_init
1188 */
1189 void
1190 lck_rw_init(
1191 lck_rw_t *lck,
1192 lck_grp_t *grp,
1193 lck_attr_t *attr)
1194 {
1195 if (attr == LCK_ATTR_NULL)
1196 attr = &LockDefaultLckAttr;
1197 memset(lck, 0, sizeof(lck_rw_t));
1198 lck->lck_rw_can_sleep = TRUE;
1199 if ((attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY) == 0)
1200 lck->lck_rw_priv_excl = TRUE;
1201
1202 lck_grp_reference(grp);
1203 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
1204 }
1205
1206
1207 /*
1208 * Routine: lck_rw_destroy
1209 */
1210 void
1211 lck_rw_destroy(
1212 lck_rw_t *lck,
1213 lck_grp_t *grp)
1214 {
1215 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
1216 return;
1217 #if MACH_LDEBUG
1218 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
1219 #endif
1220 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
1221 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
1222 lck_grp_deallocate(grp);
1223 return;
1224 }
1225
1226 /*
1227 * Routine: lck_rw_lock
1228 */
1229 void
1230 lck_rw_lock(
1231 lck_rw_t *lck,
1232 lck_rw_type_t lck_rw_type)
1233 {
1234 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1235 lck_rw_lock_shared(lck);
1236 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1237 lck_rw_lock_exclusive(lck);
1238 else
1239 panic("lck_rw_lock(): Invalid RW lock type: %x", lck_rw_type);
1240 }
1241
1242 /*
1243 * Routine: lck_rw_lock_exclusive
1244 */
1245 void
1246 lck_rw_lock_exclusive(lck_rw_t *lock)
1247 {
1248 thread_t thread = current_thread();
1249
1250 thread->rwlock_count++;
1251 if (atomic_test_and_set32(&lock->lck_rw_data,
1252 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
1253 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
1254 #if CONFIG_DTRACE
1255 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1256 #endif /* CONFIG_DTRACE */
1257 } else
1258 lck_rw_lock_exclusive_gen(lock);
1259 #if MACH_ASSERT
1260 thread_t owner = ordered_load_rw_owner(lock);
1261 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1262 #endif
1263 ordered_store_rw_owner(lock, thread);
1264 }
1265
1266 /*
1267 * Routine: lck_rw_lock_shared
1268 */
1269 void
1270 lck_rw_lock_shared(lck_rw_t *lock)
1271 {
1272 uint32_t data, prev;
1273
1274 current_thread()->rwlock_count++;
1275 for ( ; ; ) {
1276 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1277 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
1278 atomic_exchange_abort();
1279 lck_rw_lock_shared_gen(lock);
1280 break;
1281 }
1282 data += LCK_RW_SHARED_READER;
1283 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1284 break;
1285 cpu_pause();
1286 }
1287 #if MACH_ASSERT
1288 thread_t owner = ordered_load_rw_owner(lock);
1289 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1290 #endif
1291 #if CONFIG_DTRACE
1292 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1293 #endif /* CONFIG_DTRACE */
1294 return;
1295 }
1296
1297 /*
1298 * Routine: lck_rw_lock_shared_to_exclusive
1299 */
1300 boolean_t
1301 lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
1302 {
1303 uint32_t data, prev;
1304
1305 for ( ; ; ) {
1306 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1307 if (data & LCK_RW_INTERLOCK) {
1308 atomic_exchange_abort();
1309 lck_rw_interlock_spin(lock);
1310 continue;
1311 }
1312 if (data & LCK_RW_WANT_UPGRADE) {
1313 data -= LCK_RW_SHARED_READER;
1314 if ((data & LCK_RW_SHARED_MASK) == 0) /* we were the last reader */
1315 data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
1316 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1317 return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
1318 } else {
1319 data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
1320 data -= LCK_RW_SHARED_READER; /* and shed our read count */
1321 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1322 break;
1323 }
1324 cpu_pause();
1325 }
1326 /* we now own the WANT_UPGRADE */
1327 if (data & LCK_RW_SHARED_MASK) /* check to see if all of the readers are drained */
1328 lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
1329 #if MACH_ASSERT
1330 thread_t owner = ordered_load_rw_owner(lock);
1331 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1332 #endif
1333 ordered_store_rw_owner(lock, current_thread());
1334 #if CONFIG_DTRACE
1335 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
1336 #endif /* CONFIG_DTRACE */
1337 return TRUE;
1338 }
1339
1340
1341 /*
1342 * Routine: lck_rw_lock_shared_to_exclusive_failure
1343 * Function:
1344 * Fast path code has already dropped our read
1345 * count and determined that someone else owns 'lck_rw_want_upgrade'
1346 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1347 * all we need to do here is determine if a wakeup is needed
1348 */
1349 static boolean_t
1350 lck_rw_lock_shared_to_exclusive_failure(
1351 lck_rw_t *lck,
1352 uint32_t prior_lock_state)
1353 {
1354 thread_t thread = current_thread();
1355 uint32_t rwlock_count;
1356
1357 /* Check if dropping the lock means that we need to unpromote */
1358 rwlock_count = thread->rwlock_count--;
1359 #if MACH_LDEBUG
1360 if (rwlock_count == 0) {
1361 panic("rw lock count underflow for thread %p", thread);
1362 }
1363 #endif
1364 if ((prior_lock_state & LCK_RW_W_WAITING) &&
1365 ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
1366 /*
1367 * Someone else has requested upgrade.
1368 * Since we've released the read lock, wake
1369 * him up if he's blocked waiting
1370 */
1371 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1372 }
1373
1374 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1375 /* sched_flags checked without lock, but will be rechecked while clearing */
1376 lck_rw_clear_promotion(thread);
1377 }
1378
1379 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1380 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1381
1382 return (FALSE);
1383 }
1384
1385 /*
1386 * Routine: lck_rw_lock_shared_to_exclusive_success
1387 * Function:
1388 * assembly fast path code has already dropped our read
1389 * count and successfully acquired 'lck_rw_want_upgrade'
1390 * we just need to wait for the rest of the readers to drain
1391 * and then we can return as the exclusive holder of this lock
1392 */
1393 static boolean_t
1394 lck_rw_lock_shared_to_exclusive_success(
1395 lck_rw_t *lock)
1396 {
1397 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1398 int slept = 0;
1399 lck_rw_word_t word;
1400 wait_result_t res;
1401 boolean_t istate;
1402 boolean_t not_shared;
1403
1404 #if CONFIG_DTRACE
1405 uint64_t wait_interval = 0;
1406 int readers_at_sleep = 0;
1407 boolean_t dtrace_ls_initialized = FALSE;
1408 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1409 #endif
1410
1411 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE)) {
1412
1413 word.data = ordered_load_rw(lock);
1414 #if CONFIG_DTRACE
1415 if (dtrace_ls_initialized == FALSE) {
1416 dtrace_ls_initialized = TRUE;
1417 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1418 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1419 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1420 if (dtrace_ls_enabled) {
1421 /*
1422 * Either sleeping or spinning is happening,
1423 * start a timing of our delay interval now.
1424 */
1425 readers_at_sleep = word.shared_count;
1426 wait_interval = mach_absolute_time();
1427 }
1428 }
1429 #endif
1430
1431 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1432 trace_lck, word.shared_count, 0, 0, 0);
1433
1434 not_shared = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE);
1435
1436 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1437 trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
1438
1439 if (not_shared)
1440 break;
1441
1442 /*
1443 * if we get here, the spin deadline in lck_rw_wait_on_status()
1444 * has expired w/o the rw_shared_count having drained to 0
1445 * check to see if we're allowed to do a thread_block
1446 */
1447 if (word.can_sleep) {
1448
1449 istate = lck_interlock_lock(lock);
1450
1451 word.data = ordered_load_rw(lock);
1452 if (word.shared_count != 0) {
1453 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1454 trace_lck, word.shared_count, 0, 0, 0);
1455
1456 word.w_waiting = 1;
1457 ordered_store_rw(lock, word.data);
1458
1459 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
1460 res = assert_wait(LCK_RW_WRITER_EVENT(lock), THREAD_UNINT);
1461 lck_interlock_unlock(lock, istate);
1462
1463 if (res == THREAD_WAITING) {
1464 res = thread_block(THREAD_CONTINUE_NULL);
1465 slept++;
1466 }
1467 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1468 trace_lck, res, slept, 0, 0);
1469 } else {
1470 lck_interlock_unlock(lock, istate);
1471 break;
1472 }
1473 }
1474 }
1475 #if CONFIG_DTRACE
1476 /*
1477 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1478 */
1479 if (dtrace_ls_enabled == TRUE) {
1480 if (slept == 0) {
1481 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
1482 } else {
1483 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
1484 mach_absolute_time() - wait_interval, 1,
1485 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1486 }
1487 }
1488 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
1489 #endif
1490 return (TRUE);
1491 }
1492
1493
1494 /*
1495 * Routine: lck_rw_lock_exclusive_to_shared
1496 */
1497
1498 void lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
1499 {
1500 uint32_t data, prev;
1501
1502 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
1503 ordered_store_rw_owner(lock, THREAD_NULL);
1504 for ( ; ; ) {
1505 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
1506 if (data & LCK_RW_INTERLOCK) {
1507 #if __SMP__
1508 atomic_exchange_abort();
1509 lck_rw_interlock_spin(lock); /* wait for interlock to clear */
1510 continue;
1511 #else
1512 panic("lck_rw_lock_exclusive_to_shared(): Interlock locked (%p): %x", lock, data);
1513 #endif // __SMP__
1514 }
1515 data += LCK_RW_SHARED_READER;
1516 if (data & LCK_RW_WANT_UPGRADE)
1517 data &= ~(LCK_RW_WANT_UPGRADE);
1518 else
1519 data &= ~(LCK_RW_WANT_EXCL);
1520 if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL)))
1521 data &= ~(LCK_RW_W_WAITING);
1522 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp))
1523 break;
1524 cpu_pause();
1525 }
1526 return lck_rw_lock_exclusive_to_shared_gen(lock, prev);
1527 }
1528
1529 /*
1530 * Routine: lck_rw_lock_exclusive_to_shared_gen
1531 * Function:
1532 * Fast path has already dropped
1533 * our exclusive state and bumped lck_rw_shared_count
1534 * all we need to do here is determine if anyone
1535 * needs to be awakened.
1536 */
1537 static void
1538 lck_rw_lock_exclusive_to_shared_gen(
1539 lck_rw_t *lck,
1540 uint32_t prior_lock_state)
1541 {
1542 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1543 lck_rw_word_t fake_lck;
1544
1545 /*
1546 * prior_lock state is a snapshot of the 1st word of the
1547 * lock in question... we'll fake up a pointer to it
1548 * and carefully not access anything beyond whats defined
1549 * in the first word of a lck_rw_t
1550 */
1551 fake_lck.data = prior_lock_state;
1552
1553 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1554 trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
1555
1556 /*
1557 * don't wake up anyone waiting to take the lock exclusively
1558 * since we hold a read count... when the read count drops to 0,
1559 * the writers will be woken.
1560 *
1561 * wake up any waiting readers if we don't have any writers waiting,
1562 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1563 */
1564 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting)
1565 thread_wakeup(LCK_RW_READER_EVENT(lck));
1566
1567 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1568 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
1569
1570 #if CONFIG_DTRACE
1571 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1572 #endif
1573 }
1574
1575
1576 /*
1577 * Routine: lck_rw_try_lock
1578 */
1579 boolean_t
1580 lck_rw_try_lock(
1581 lck_rw_t *lck,
1582 lck_rw_type_t lck_rw_type)
1583 {
1584 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1585 return lck_rw_try_lock_shared(lck);
1586 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1587 return lck_rw_try_lock_exclusive(lck);
1588 else
1589 panic("lck_rw_try_lock(): Invalid rw lock type: %x", lck_rw_type);
1590 return FALSE;
1591 }
1592
1593 /*
1594 * Routine: lck_rw_try_lock_shared
1595 */
1596
1597 boolean_t lck_rw_try_lock_shared(lck_rw_t *lock)
1598 {
1599 uint32_t data, prev;
1600
1601 for ( ; ; ) {
1602 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1603 if (data & LCK_RW_INTERLOCK) {
1604 #if __SMP__
1605 atomic_exchange_abort();
1606 lck_rw_interlock_spin(lock);
1607 continue;
1608 #else
1609 panic("lck_rw_try_lock_shared(): Interlock locked (%p): %x", lock, data);
1610 #endif
1611 }
1612 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1613 atomic_exchange_abort();
1614 return FALSE; /* lock is busy */
1615 }
1616 data += LCK_RW_SHARED_READER; /* Increment reader refcount */
1617 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1618 break;
1619 cpu_pause();
1620 }
1621 #if MACH_ASSERT
1622 thread_t owner = ordered_load_rw_owner(lock);
1623 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1624 #endif
1625 current_thread()->rwlock_count++;
1626 #if CONFIG_DTRACE
1627 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1628 #endif /* CONFIG_DTRACE */
1629 return TRUE;
1630 }
1631
1632
1633 /*
1634 * Routine: lck_rw_try_lock_exclusive
1635 */
1636
1637 boolean_t lck_rw_try_lock_exclusive(lck_rw_t *lock)
1638 {
1639 uint32_t data, prev;
1640 thread_t thread;
1641
1642 for ( ; ; ) {
1643 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1644 if (data & LCK_RW_INTERLOCK) {
1645 #if __SMP__
1646 atomic_exchange_abort();
1647 lck_rw_interlock_spin(lock);
1648 continue;
1649 #else
1650 panic("lck_rw_try_lock_exclusive(): Interlock locked (%p): %x", lock, data);
1651 #endif
1652 }
1653 if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1654 atomic_exchange_abort();
1655 return FALSE;
1656 }
1657 data |= LCK_RW_WANT_EXCL;
1658 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1659 break;
1660 cpu_pause();
1661 }
1662 thread = current_thread();
1663 thread->rwlock_count++;
1664 #if MACH_ASSERT
1665 thread_t owner = ordered_load_rw_owner(lock);
1666 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1667 #endif
1668 ordered_store_rw_owner(lock, thread);
1669 #if CONFIG_DTRACE
1670 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1671 #endif /* CONFIG_DTRACE */
1672 return TRUE;
1673 }
1674
1675
1676 /*
1677 * Routine: lck_rw_unlock
1678 */
1679 void
1680 lck_rw_unlock(
1681 lck_rw_t *lck,
1682 lck_rw_type_t lck_rw_type)
1683 {
1684 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1685 lck_rw_unlock_shared(lck);
1686 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1687 lck_rw_unlock_exclusive(lck);
1688 else
1689 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
1690 }
1691
1692
1693 /*
1694 * Routine: lck_rw_unlock_shared
1695 */
1696 void
1697 lck_rw_unlock_shared(
1698 lck_rw_t *lck)
1699 {
1700 lck_rw_type_t ret;
1701
1702 assertf(lck->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
1703 assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
1704 ret = lck_rw_done(lck);
1705
1706 if (ret != LCK_RW_TYPE_SHARED)
1707 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
1708 }
1709
1710
1711 /*
1712 * Routine: lck_rw_unlock_exclusive
1713 */
1714 void
1715 lck_rw_unlock_exclusive(
1716 lck_rw_t *lck)
1717 {
1718 lck_rw_type_t ret;
1719
1720 assertf(lck->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
1721 ret = lck_rw_done(lck);
1722
1723 if (ret != LCK_RW_TYPE_EXCLUSIVE)
1724 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
1725 }
1726
1727
1728 /*
1729 * Routine: lck_rw_lock_exclusive_gen
1730 */
1731 static void
1732 lck_rw_lock_exclusive_gen(
1733 lck_rw_t *lock)
1734 {
1735 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1736 lck_rw_word_t word;
1737 int slept = 0;
1738 boolean_t gotlock = 0;
1739 boolean_t not_shared_or_upgrade = 0;
1740 wait_result_t res = 0;
1741 boolean_t istate;
1742
1743 #if CONFIG_DTRACE
1744 boolean_t dtrace_ls_initialized = FALSE;
1745 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE;
1746 uint64_t wait_interval = 0;
1747 int readers_at_sleep = 0;
1748 #endif
1749
1750 /*
1751 * Try to acquire the lck_rw_want_excl bit.
1752 */
1753 while (!lck_rw_grab(lock, LCK_RW_GRAB_WANT, FALSE)) {
1754
1755 #if CONFIG_DTRACE
1756 if (dtrace_ls_initialized == FALSE) {
1757 dtrace_ls_initialized = TRUE;
1758 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1759 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1760 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1761 if (dtrace_ls_enabled) {
1762 /*
1763 * Either sleeping or spinning is happening,
1764 * start a timing of our delay interval now.
1765 */
1766 readers_at_sleep = lock->lck_rw_shared_count;
1767 wait_interval = mach_absolute_time();
1768 }
1769 }
1770 #endif
1771
1772 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1773
1774 gotlock = lck_rw_grab(lock, LCK_RW_GRAB_WANT, TRUE);
1775
1776 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
1777
1778 if (gotlock)
1779 break;
1780 /*
1781 * if we get here, the deadline has expired w/o us
1782 * being able to grab the lock exclusively
1783 * check to see if we're allowed to do a thread_block
1784 */
1785 word.data = ordered_load_rw(lock);
1786 if (word.can_sleep) {
1787
1788 istate = lck_interlock_lock(lock);
1789 word.data = ordered_load_rw(lock);
1790
1791 if (word.want_excl) {
1792
1793 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1794
1795 word.w_waiting = 1;
1796 ordered_store_rw(lock, word.data);
1797
1798 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
1799 res = assert_wait(LCK_RW_WRITER_EVENT(lock), THREAD_UNINT);
1800 lck_interlock_unlock(lock, istate);
1801
1802 if (res == THREAD_WAITING) {
1803 res = thread_block(THREAD_CONTINUE_NULL);
1804 slept++;
1805 }
1806 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1807 } else {
1808 word.want_excl = 1;
1809 ordered_store_rw(lock, word.data);
1810 lck_interlock_unlock(lock, istate);
1811 break;
1812 }
1813 }
1814 }
1815 /*
1816 * Wait for readers (and upgrades) to finish...
1817 */
1818 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE)) {
1819
1820 #if CONFIG_DTRACE
1821 /*
1822 * Either sleeping or spinning is happening, start
1823 * a timing of our delay interval now. If we set it
1824 * to -1 we don't have accurate data so we cannot later
1825 * decide to record a dtrace spin or sleep event.
1826 */
1827 if (dtrace_ls_initialized == FALSE) {
1828 dtrace_ls_initialized = TRUE;
1829 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1830 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1831 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1832 if (dtrace_ls_enabled) {
1833 /*
1834 * Either sleeping or spinning is happening,
1835 * start a timing of our delay interval now.
1836 */
1837 readers_at_sleep = lock->lck_rw_shared_count;
1838 wait_interval = mach_absolute_time();
1839 }
1840 }
1841 #endif
1842
1843 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1844
1845 not_shared_or_upgrade = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, TRUE);
1846
1847 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, not_shared_or_upgrade, 0);
1848
1849 if (not_shared_or_upgrade)
1850 break;
1851 /*
1852 * if we get here, the deadline has expired w/o us
1853 * being able to grab the lock exclusively
1854 * check to see if we're allowed to do a thread_block
1855 */
1856 word.data = ordered_load_rw(lock);
1857 if (word.can_sleep) {
1858
1859 istate = lck_interlock_lock(lock);
1860 word.data = ordered_load_rw(lock);
1861
1862 if (word.shared_count != 0 || word.want_upgrade) {
1863 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1864
1865 word.w_waiting = 1;
1866 ordered_store_rw(lock, word.data);
1867
1868 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
1869 res = assert_wait(LCK_RW_WRITER_EVENT(lock), THREAD_UNINT);
1870 lck_interlock_unlock(lock, istate);
1871
1872 if (res == THREAD_WAITING) {
1873 res = thread_block(THREAD_CONTINUE_NULL);
1874 slept++;
1875 }
1876 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1877 } else {
1878 lck_interlock_unlock(lock, istate);
1879 /*
1880 * must own the lock now, since we checked for
1881 * readers or upgrade owner behind the interlock
1882 * no need for a call to 'lck_rw_drain_status'
1883 */
1884 break;
1885 }
1886 }
1887 }
1888
1889 #if CONFIG_DTRACE
1890 /*
1891 * Decide what latencies we suffered that are Dtrace events.
1892 * If we have set wait_interval, then we either spun or slept.
1893 * At least we get out from under the interlock before we record
1894 * which is the best we can do here to minimize the impact
1895 * of the tracing.
1896 * If we have set wait_interval to -1, then dtrace was not enabled when we
1897 * started sleeping/spinning so we don't record this event.
1898 */
1899 if (dtrace_ls_enabled == TRUE) {
1900 if (slept == 0) {
1901 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
1902 mach_absolute_time() - wait_interval, 1);
1903 } else {
1904 /*
1905 * For the blocking case, we also record if when we blocked
1906 * it was held for read or write, and how many readers.
1907 * Notice that above we recorded this before we dropped
1908 * the interlock so the count is accurate.
1909 */
1910 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
1911 mach_absolute_time() - wait_interval, 1,
1912 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1913 }
1914 }
1915 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
1916 #endif /* CONFIG_DTRACE */
1917 }
1918
1919 /*
1920 * Routine: lck_rw_done
1921 */
1922
1923 lck_rw_type_t lck_rw_done(lck_rw_t *lock)
1924 {
1925 uint32_t data, prev;
1926 boolean_t once = FALSE;
1927
1928 for ( ; ; ) {
1929 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
1930 if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
1931 #if __SMP__
1932 atomic_exchange_abort();
1933 lck_rw_interlock_spin(lock);
1934 continue;
1935 #else
1936 panic("lck_rw_done(): Interlock locked (%p): %x", lock, data);
1937 #endif // __SMP__
1938 }
1939 if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
1940 assertf(lock->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
1941 data -= LCK_RW_SHARED_READER;
1942 if ((data & LCK_RW_SHARED_MASK) == 0) /* if reader count has now gone to 0, check for waiters */
1943 goto check_waiters;
1944 } else { /* if reader count == 0, must be exclusive lock */
1945 if (data & LCK_RW_WANT_UPGRADE) {
1946 data &= ~(LCK_RW_WANT_UPGRADE);
1947 } else {
1948 if (data & LCK_RW_WANT_EXCL)
1949 data &= ~(LCK_RW_WANT_EXCL);
1950 else /* lock is not 'owned', panic */
1951 panic("Releasing non-exclusive RW lock without a reader refcount!");
1952 }
1953 if (!once) {
1954 // Only check for holder and clear it once
1955 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
1956 ordered_store_rw_owner(lock, THREAD_NULL);
1957 once = TRUE;
1958 }
1959 check_waiters:
1960 /*
1961 * test the original values to match what
1962 * lck_rw_done_gen is going to do to determine
1963 * which wakeups need to happen...
1964 *
1965 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
1966 */
1967 if (prev & LCK_RW_W_WAITING) {
1968 data &= ~(LCK_RW_W_WAITING);
1969 if ((prev & LCK_RW_PRIV_EXCL) == 0)
1970 data &= ~(LCK_RW_R_WAITING);
1971 } else
1972 data &= ~(LCK_RW_R_WAITING);
1973 }
1974 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp))
1975 break;
1976 cpu_pause();
1977 }
1978 return lck_rw_done_gen(lock, prev);
1979 }
1980
1981 /*
1982 * Routine: lck_rw_done_gen
1983 *
1984 * called from the assembly language wrapper...
1985 * prior_lock_state is the value in the 1st
1986 * word of the lock at the time of a successful
1987 * atomic compare and exchange with the new value...
1988 * it represents the state of the lock before we
1989 * decremented the rw_shared_count or cleared either
1990 * rw_want_upgrade or rw_want_write and
1991 * the lck_x_waiting bits... since the wrapper
1992 * routine has already changed the state atomically,
1993 * we just need to decide if we should
1994 * wake up anyone and what value to return... we do
1995 * this by examining the state of the lock before
1996 * we changed it
1997 */
1998 static lck_rw_type_t
1999 lck_rw_done_gen(
2000 lck_rw_t *lck,
2001 uint32_t prior_lock_state)
2002 {
2003 lck_rw_word_t fake_lck;
2004 lck_rw_type_t lock_type;
2005 thread_t thread;
2006 uint32_t rwlock_count;
2007
2008 /*
2009 * prior_lock state is a snapshot of the 1st word of the
2010 * lock in question... we'll fake up a pointer to it
2011 * and carefully not access anything beyond whats defined
2012 * in the first word of a lck_rw_t
2013 */
2014 fake_lck.data = prior_lock_state;
2015
2016 if (fake_lck.shared_count <= 1) {
2017 if (fake_lck.w_waiting)
2018 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
2019
2020 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting)
2021 thread_wakeup(LCK_RW_READER_EVENT(lck));
2022 }
2023 if (fake_lck.shared_count)
2024 lock_type = LCK_RW_TYPE_SHARED;
2025 else
2026 lock_type = LCK_RW_TYPE_EXCLUSIVE;
2027
2028 /* Check if dropping the lock means that we need to unpromote */
2029 thread = current_thread();
2030 rwlock_count = thread->rwlock_count--;
2031 #if MACH_LDEBUG
2032 if (rwlock_count == 0)
2033 panic("rw lock count underflow for thread %p", thread);
2034 #endif
2035 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2036 /* sched_flags checked without lock, but will be rechecked while clearing */
2037 lck_rw_clear_promotion(thread);
2038 }
2039 #if CONFIG_DTRACE
2040 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
2041 #endif
2042 return lock_type;
2043 }
2044
2045 /*
2046 * Routine: lck_rw_lock_shared_gen
2047 * Function:
2048 * Fast path code has determined that this lock
2049 * is held exclusively... this is where we spin/block
2050 * until we can acquire the lock in the shared mode
2051 */
2052 static void
2053 lck_rw_lock_shared_gen(
2054 lck_rw_t *lck)
2055 {
2056 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
2057 lck_rw_word_t word;
2058 boolean_t gotlock = 0;
2059 int slept = 0;
2060 wait_result_t res = 0;
2061 boolean_t istate;
2062
2063 #if CONFIG_DTRACE
2064 uint64_t wait_interval = 0;
2065 int readers_at_sleep = 0;
2066 boolean_t dtrace_ls_initialized = FALSE;
2067 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
2068 #endif /* CONFIG_DTRACE */
2069
2070 while ( !lck_rw_grab(lck, LCK_RW_GRAB_SHARED, FALSE)) {
2071
2072 #if CONFIG_DTRACE
2073 if (dtrace_ls_initialized == FALSE) {
2074 dtrace_ls_initialized = TRUE;
2075 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
2076 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
2077 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
2078 if (dtrace_ls_enabled) {
2079 /*
2080 * Either sleeping or spinning is happening,
2081 * start a timing of our delay interval now.
2082 */
2083 readers_at_sleep = lck->lck_rw_shared_count;
2084 wait_interval = mach_absolute_time();
2085 }
2086 }
2087 #endif
2088
2089 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
2090 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
2091
2092 gotlock = lck_rw_grab(lck, LCK_RW_GRAB_SHARED, TRUE);
2093
2094 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
2095 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, gotlock, 0);
2096
2097 if (gotlock)
2098 break;
2099 /*
2100 * if we get here, the deadline has expired w/o us
2101 * being able to grab the lock for read
2102 * check to see if we're allowed to do a thread_block
2103 */
2104 if (lck->lck_rw_can_sleep) {
2105
2106 istate = lck_interlock_lock(lck);
2107
2108 word.data = ordered_load_rw(lck);
2109 if ((word.want_excl || word.want_upgrade) &&
2110 ((word.shared_count == 0) || word.priv_excl)) {
2111
2112 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
2113 trace_lck, word.want_excl, word.want_upgrade, 0, 0);
2114
2115 word.r_waiting = 1;
2116 ordered_store_rw(lck, word.data);
2117
2118 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
2119 res = assert_wait(LCK_RW_READER_EVENT(lck), THREAD_UNINT);
2120 lck_interlock_unlock(lck, istate);
2121
2122 if (res == THREAD_WAITING) {
2123 res = thread_block(THREAD_CONTINUE_NULL);
2124 slept++;
2125 }
2126 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
2127 trace_lck, res, slept, 0, 0);
2128 } else {
2129 word.shared_count++;
2130 ordered_store_rw(lck, word.data);
2131 lck_interlock_unlock(lck, istate);
2132 break;
2133 }
2134 }
2135 }
2136
2137 #if CONFIG_DTRACE
2138 if (dtrace_ls_enabled == TRUE) {
2139 if (slept == 0) {
2140 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
2141 } else {
2142 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
2143 mach_absolute_time() - wait_interval, 0,
2144 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
2145 }
2146 }
2147 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
2148 #endif /* CONFIG_DTRACE */
2149 }
2150
2151
2152 void
2153 lck_rw_assert(
2154 lck_rw_t *lck,
2155 unsigned int type)
2156 {
2157 switch (type) {
2158 case LCK_RW_ASSERT_SHARED:
2159 if ((lck->lck_rw_shared_count != 0) &&
2160 (lck->lck_rw_owner == THREAD_NULL)) {
2161 return;
2162 }
2163 break;
2164 case LCK_RW_ASSERT_EXCLUSIVE:
2165 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2166 (lck->lck_rw_shared_count == 0) &&
2167 (lck->lck_rw_owner == current_thread())) {
2168 return;
2169 }
2170 break;
2171 case LCK_RW_ASSERT_HELD:
2172 if (lck->lck_rw_shared_count != 0)
2173 return; // Held shared
2174 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2175 (lck->lck_rw_owner == current_thread())) {
2176 return; // Held exclusive
2177 }
2178 break;
2179 case LCK_RW_ASSERT_NOTHELD:
2180 if ((lck->lck_rw_shared_count == 0) &&
2181 !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2182 (lck->lck_rw_owner == THREAD_NULL)) {
2183 return;
2184 }
2185 break;
2186 default:
2187 break;
2188 }
2189 panic("rw lock (%p)%s held (mode=%u)", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type);
2190 }
2191
2192
2193 /*
2194 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
2195 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2196 */
2197 boolean_t
2198 kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck) {
2199 if (not_in_kdp) {
2200 panic("panic: rw lock exclusive check done outside of kernel debugger");
2201 }
2202 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
2203 }
2204
2205 /*
2206 * The C portion of the mutex package. These routines are only invoked
2207 * if the optimized assembler routines can't do the work.
2208 */
2209
2210 /*
2211 * Forward declaration
2212 */
2213
2214 void
2215 lck_mtx_ext_init(
2216 lck_mtx_ext_t * lck,
2217 lck_grp_t * grp,
2218 lck_attr_t * attr);
2219
2220 /*
2221 * Routine: lck_mtx_alloc_init
2222 */
2223 lck_mtx_t *
2224 lck_mtx_alloc_init(
2225 lck_grp_t * grp,
2226 lck_attr_t * attr)
2227 {
2228 lck_mtx_t *lck;
2229
2230 if ((lck = (lck_mtx_t *) kalloc(sizeof(lck_mtx_t))) != 0)
2231 lck_mtx_init(lck, grp, attr);
2232
2233 return (lck);
2234 }
2235
2236 /*
2237 * Routine: lck_mtx_free
2238 */
2239 void
2240 lck_mtx_free(
2241 lck_mtx_t * lck,
2242 lck_grp_t * grp)
2243 {
2244 lck_mtx_destroy(lck, grp);
2245 kfree((void *) lck, sizeof(lck_mtx_t));
2246 }
2247
2248 /*
2249 * Routine: lck_mtx_init
2250 */
2251 void
2252 lck_mtx_init(
2253 lck_mtx_t * lck,
2254 lck_grp_t * grp,
2255 lck_attr_t * attr)
2256 {
2257 #ifdef BER_XXX
2258 lck_mtx_ext_t *lck_ext;
2259 #endif
2260 lck_attr_t *lck_attr;
2261
2262 if (attr != LCK_ATTR_NULL)
2263 lck_attr = attr;
2264 else
2265 lck_attr = &LockDefaultLckAttr;
2266
2267 #ifdef BER_XXX
2268 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2269 if ((lck_ext = (lck_mtx_ext_t *) kalloc(sizeof(lck_mtx_ext_t))) != 0) {
2270 lck_mtx_ext_init(lck_ext, grp, lck_attr);
2271 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2272 lck->lck_mtx_ptr = lck_ext;
2273 lck->lck_mtx_type = LCK_MTX_TYPE;
2274 }
2275 } else
2276 #endif
2277 {
2278 lck->lck_mtx_ptr = NULL; // Clear any padding in the union fields below
2279 lck->lck_mtx_waiters = 0;
2280 lck->lck_mtx_pri = 0;
2281 lck->lck_mtx_type = LCK_MTX_TYPE;
2282 ordered_store_mtx(lck, 0);
2283 }
2284 lck_grp_reference(grp);
2285 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2286 }
2287
2288 /*
2289 * Routine: lck_mtx_init_ext
2290 */
2291 void
2292 lck_mtx_init_ext(
2293 lck_mtx_t * lck,
2294 lck_mtx_ext_t * lck_ext,
2295 lck_grp_t * grp,
2296 lck_attr_t * attr)
2297 {
2298 lck_attr_t *lck_attr;
2299
2300 if (attr != LCK_ATTR_NULL)
2301 lck_attr = attr;
2302 else
2303 lck_attr = &LockDefaultLckAttr;
2304
2305 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2306 lck_mtx_ext_init(lck_ext, grp, lck_attr);
2307 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2308 lck->lck_mtx_ptr = lck_ext;
2309 lck->lck_mtx_type = LCK_MTX_TYPE;
2310 } else {
2311 lck->lck_mtx_waiters = 0;
2312 lck->lck_mtx_pri = 0;
2313 lck->lck_mtx_type = LCK_MTX_TYPE;
2314 ordered_store_mtx(lck, 0);
2315 }
2316 lck_grp_reference(grp);
2317 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2318 }
2319
2320 /*
2321 * Routine: lck_mtx_ext_init
2322 */
2323 void
2324 lck_mtx_ext_init(
2325 lck_mtx_ext_t * lck,
2326 lck_grp_t * grp,
2327 lck_attr_t * attr)
2328 {
2329 bzero((void *) lck, sizeof(lck_mtx_ext_t));
2330
2331 lck->lck_mtx.lck_mtx_type = LCK_MTX_TYPE;
2332
2333 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2334 lck->lck_mtx_deb.type = MUTEX_TAG;
2335 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
2336 }
2337 lck->lck_mtx_grp = grp;
2338
2339 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
2340 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
2341 }
2342
2343 /* The slow versions */
2344 static void lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2345 static boolean_t lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread);
2346 static void lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2347
2348 /*
2349 * Routine: lck_mtx_verify
2350 *
2351 * Verify if a mutex is valid
2352 */
2353 static inline void
2354 lck_mtx_verify(lck_mtx_t *lock)
2355 {
2356 if (lock->lck_mtx_type != LCK_MTX_TYPE)
2357 panic("Invalid mutex %p", lock);
2358 #if DEVELOPMENT || DEBUG
2359 if (lock->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
2360 panic("Mutex destroyed %p", lock);
2361 #endif /* DEVELOPMENT || DEBUG */
2362 }
2363
2364 /*
2365 * Routine: lck_mtx_check_preemption
2366 *
2367 * Verify preemption is enabled when attempting to acquire a mutex.
2368 */
2369
2370 static inline void
2371 lck_mtx_check_preemption(lck_mtx_t *lock)
2372 {
2373 #if DEVELOPMENT || DEBUG
2374 int pl = get_preemption_level();
2375
2376 if (pl != 0)
2377 panic("Attempt to take mutex with preemption disabled. Lock=%p, level=%d", lock, pl);
2378 #else
2379 (void)lock;
2380 #endif
2381 }
2382
2383 /*
2384 * Routine: lck_mtx_lock
2385 */
2386 void
2387 lck_mtx_lock(lck_mtx_t *lock)
2388 {
2389 thread_t thread;
2390
2391 lck_mtx_verify(lock);
2392 lck_mtx_check_preemption(lock);
2393 thread = current_thread();
2394 if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread),
2395 memory_order_acquire_smp, FALSE)) {
2396 #if CONFIG_DTRACE
2397 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
2398 #endif /* CONFIG_DTRACE */
2399 return;
2400 }
2401 lck_mtx_lock_contended(lock, thread, FALSE);
2402 }
2403
2404 /*
2405 This is the slow version of mutex locking.
2406 */
2407 static void NOINLINE
2408 lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
2409 {
2410 thread_t holding_thread;
2411 uintptr_t state;
2412 int waiters;
2413
2414 if (interlocked)
2415 goto interlock_held;
2416
2417 for ( ; ; ) {
2418 if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread),
2419 memory_order_acquire_smp, FALSE))
2420 return;
2421 interlock_lock(lock);
2422 interlock_held:
2423 state = ordered_load_mtx(lock);
2424 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2425 if (holding_thread == NULL)
2426 break;
2427 ordered_store_mtx(lock, (state | LCK_ILOCK | ARM_LCK_WAITERS)); // Set waiters bit and wait
2428 lck_mtx_lock_wait(lock, holding_thread);
2429 }
2430 waiters = lck_mtx_lock_acquire(lock);
2431 state = LCK_MTX_THREAD_TO_STATE(thread);
2432 if (waiters != 0)
2433 state |= ARM_LCK_WAITERS;
2434 #if __SMP__
2435 state |= LCK_ILOCK; // Preserve interlock
2436 ordered_store_mtx(lock, state); // Set ownership
2437 interlock_unlock(lock); // Release interlock, enable preemption
2438 #else
2439 ordered_store_mtx(lock, state); // Set ownership
2440 enable_preemption();
2441 #endif
2442 load_memory_barrier();
2443
2444 #if CONFIG_DTRACE
2445 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
2446 #endif /* CONFIG_DTRACE */
2447 }
2448
2449 /*
2450 * Common code for mutex locking as spinlock
2451 */
2452 static inline void
2453 lck_mtx_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
2454 {
2455 uintptr_t state;
2456
2457 interlock_lock(lock);
2458 state = ordered_load_mtx(lock);
2459 if (LCK_MTX_STATE_TO_THREAD(state)) {
2460 if (allow_held_as_mutex)
2461 lck_mtx_lock_contended(lock, current_thread(), TRUE);
2462 else
2463 // "Always" variants can never block. If the lock is held and blocking is not allowed
2464 // then someone is mixing always and non-always calls on the same lock, which is
2465 // forbidden.
2466 panic("Attempting to block on a lock taken as spin-always %p", lock);
2467 return;
2468 }
2469 state &= ARM_LCK_WAITERS; // Preserve waiters bit
2470 state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK); // Add spin tag and maintain interlock
2471 ordered_store_mtx(lock, state);
2472 load_memory_barrier();
2473
2474 #if CONFIG_DTRACE
2475 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
2476 #endif /* CONFIG_DTRACE */
2477 }
2478
2479 /*
2480 * Routine: lck_mtx_lock_spin
2481 */
2482 void
2483 lck_mtx_lock_spin(lck_mtx_t *lock)
2484 {
2485 lck_mtx_check_preemption(lock);
2486 lck_mtx_lock_spin_internal(lock, TRUE);
2487 }
2488
2489 /*
2490 * Routine: lck_mtx_lock_spin_always
2491 */
2492 void
2493 lck_mtx_lock_spin_always(lck_mtx_t *lock)
2494 {
2495 lck_mtx_lock_spin_internal(lock, FALSE);
2496 }
2497
2498 /*
2499 * Routine: lck_mtx_try_lock
2500 */
2501 boolean_t
2502 lck_mtx_try_lock(lck_mtx_t *lock)
2503 {
2504 thread_t thread = current_thread();
2505
2506 lck_mtx_verify(lock);
2507 if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread),
2508 memory_order_acquire_smp, FALSE)) {
2509 #if CONFIG_DTRACE
2510 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, lock, 0);
2511 #endif /* CONFIG_DTRACE */
2512 return TRUE;
2513 }
2514 return lck_mtx_try_lock_contended(lock, thread);
2515 }
2516
2517 static boolean_t NOINLINE
2518 lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread)
2519 {
2520 thread_t holding_thread;
2521 uintptr_t state;
2522 int waiters;
2523
2524 #if __SMP__
2525 interlock_lock(lock);
2526 state = ordered_load_mtx(lock);
2527 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2528 if (holding_thread) {
2529 interlock_unlock(lock);
2530 return FALSE;
2531 }
2532 #else
2533 disable_preemption_for_thread(thread);
2534 state = ordered_load_mtx(lock);
2535 if (state & LCK_ILOCK)
2536 panic("Unexpected interlock set (%p)", lock);
2537 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2538 if (holding_thread) {
2539 enable_preemption();
2540 return FALSE;
2541 }
2542 state |= LCK_ILOCK;
2543 ordered_store_mtx(lock, state);
2544 #endif // __SMP__
2545 waiters = lck_mtx_lock_acquire(lock);
2546 state = LCK_MTX_THREAD_TO_STATE(thread);
2547 if (waiters != 0)
2548 state |= ARM_LCK_WAITERS;
2549 #if __SMP__
2550 state |= LCK_ILOCK; // Preserve interlock
2551 ordered_store_mtx(lock, state); // Set ownership
2552 interlock_unlock(lock); // Release interlock, enable preemption
2553 #else
2554 ordered_store_mtx(lock, state); // Set ownership
2555 enable_preemption();
2556 #endif
2557 load_memory_barrier();
2558 return TRUE;
2559 }
2560
2561 static inline boolean_t
2562 lck_mtx_try_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
2563 {
2564 uintptr_t state;
2565
2566 if (!interlock_try(lock))
2567 return FALSE;
2568 state = ordered_load_mtx(lock);
2569 if(LCK_MTX_STATE_TO_THREAD(state)) {
2570 // Lock is held as mutex
2571 if (allow_held_as_mutex)
2572 interlock_unlock(lock);
2573 else
2574 // "Always" variants can never block. If the lock is held as a normal mutex
2575 // then someone is mixing always and non-always calls on the same lock, which is
2576 // forbidden.
2577 panic("Spin-mutex held as full mutex %p", lock);
2578 return FALSE;
2579 }
2580 state &= ARM_LCK_WAITERS; // Preserve waiters bit
2581 state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK); // Add spin tag and maintain interlock
2582 ordered_store_mtx(lock, state);
2583 load_memory_barrier();
2584
2585 #if CONFIG_DTRACE
2586 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, lock, 0);
2587 #endif /* CONFIG_DTRACE */
2588 return TRUE;
2589 }
2590
2591 /*
2592 * Routine: lck_mtx_try_lock_spin
2593 */
2594 boolean_t
2595 lck_mtx_try_lock_spin(lck_mtx_t *lock)
2596 {
2597 return lck_mtx_try_lock_spin_internal(lock, TRUE);
2598 }
2599
2600 /*
2601 * Routine: lck_mtx_try_lock_spin_always
2602 */
2603 boolean_t
2604 lck_mtx_try_lock_spin_always(lck_mtx_t *lock)
2605 {
2606 return lck_mtx_try_lock_spin_internal(lock, FALSE);
2607 }
2608
2609
2610
2611 /*
2612 * Routine: lck_mtx_unlock
2613 */
2614 void
2615 lck_mtx_unlock(lck_mtx_t *lock)
2616 {
2617 thread_t thread = current_thread();
2618 uintptr_t state;
2619 boolean_t ilk_held = FALSE;
2620
2621 lck_mtx_verify(lock);
2622
2623 state = ordered_load_mtx(lock);
2624 if (state & LCK_ILOCK) {
2625 if(LCK_MTX_STATE_TO_THREAD(state) == (thread_t)LCK_MTX_SPIN_TAG)
2626 ilk_held = TRUE; // Interlock is held by (presumably) this thread
2627 goto slow_case;
2628 }
2629 // Locked as a mutex
2630 if (atomic_compare_exchange(&lock->lck_mtx_data, LCK_MTX_THREAD_TO_STATE(thread), 0,
2631 memory_order_release_smp, FALSE)) {
2632 #if CONFIG_DTRACE
2633 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
2634 #endif /* CONFIG_DTRACE */
2635 return;
2636 }
2637 slow_case:
2638 lck_mtx_unlock_contended(lock, thread, ilk_held);
2639 }
2640
2641 static void NOINLINE
2642 lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t ilk_held)
2643 {
2644 uintptr_t state;
2645
2646 if (ilk_held) {
2647 state = ordered_load_mtx(lock);
2648 } else {
2649 #if __SMP__
2650 interlock_lock(lock);
2651 state = ordered_load_mtx(lock);
2652 if (thread != LCK_MTX_STATE_TO_THREAD(state))
2653 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
2654 #else
2655 disable_preemption_for_thread(thread);
2656 state = ordered_load_mtx(lock);
2657 if (state & LCK_ILOCK)
2658 panic("lck_mtx_unlock(): Unexpected interlock set (%p)", lock);
2659 if (thread != LCK_MTX_STATE_TO_THREAD(state))
2660 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
2661 state |= LCK_ILOCK;
2662 ordered_store_mtx(lock, state);
2663 #endif
2664 }
2665 if (state & ARM_LCK_WAITERS) {
2666 lck_mtx_unlock_wakeup(lock, thread);
2667 state = ordered_load_mtx(lock);
2668 } else {
2669 assertf(lock->lck_mtx_pri == 0, "pri=0x%x", lock->lck_mtx_pri);
2670 }
2671 state &= ARM_LCK_WAITERS; // Retain waiters bit
2672 #if __SMP__
2673 state |= LCK_ILOCK;
2674 ordered_store_mtx(lock, state);
2675 interlock_unlock(lock);
2676 #else
2677 ordered_store_mtx(lock, state);
2678 enable_preemption();
2679 #endif
2680
2681 #if CONFIG_DTRACE
2682 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
2683 #endif /* CONFIG_DTRACE */
2684 }
2685
2686 /*
2687 * Routine: lck_mtx_assert
2688 */
2689 void
2690 lck_mtx_assert(lck_mtx_t *lock, unsigned int type)
2691 {
2692 thread_t thread, holder;
2693 uintptr_t state;
2694
2695 state = ordered_load_mtx(lock);
2696 holder = LCK_MTX_STATE_TO_THREAD(state);
2697 if (holder == (thread_t)LCK_MTX_SPIN_TAG) {
2698 // Lock is held in spin mode, owner is unknown.
2699 return; // Punt
2700 }
2701 thread = current_thread();
2702 if (type == LCK_MTX_ASSERT_OWNED) {
2703 if (thread != holder)
2704 panic("lck_mtx_assert(): mutex (%p) owned", lock);
2705 } else if (type == LCK_MTX_ASSERT_NOTOWNED) {
2706 if (thread == holder)
2707 panic("lck_mtx_assert(): mutex (%p) not owned", lock);
2708 } else
2709 panic("lck_mtx_assert(): invalid arg (%u)", type);
2710 }
2711
2712 /*
2713 * Routine: lck_mtx_ilk_unlock
2714 */
2715 boolean_t
2716 lck_mtx_ilk_unlock(lck_mtx_t *lock)
2717 {
2718 interlock_unlock(lock);
2719 return TRUE;
2720 }
2721
2722 /*
2723 * Routine: lck_mtx_convert_spin
2724 *
2725 * Convert a mutex held for spin into a held full mutex
2726 */
2727 void
2728 lck_mtx_convert_spin(lck_mtx_t *lock)
2729 {
2730 thread_t thread = current_thread();
2731 uintptr_t state;
2732 int waiters;
2733
2734 state = ordered_load_mtx(lock);
2735 if (LCK_MTX_STATE_TO_THREAD(state) == thread)
2736 return; // Already owned as mutex, return
2737 if ((state & LCK_ILOCK) == 0 || (LCK_MTX_STATE_TO_THREAD(state) != (thread_t)LCK_MTX_SPIN_TAG))
2738 panic("lck_mtx_convert_spin: Not held as spinlock (%p)", lock);
2739 state &= ~(LCK_MTX_THREAD_MASK); // Clear the spin tag
2740 ordered_store_mtx(lock, state);
2741 waiters = lck_mtx_lock_acquire(lock); // Acquire to manage priority boosts
2742 state = LCK_MTX_THREAD_TO_STATE(thread);
2743 if (waiters != 0)
2744 state |= ARM_LCK_WAITERS;
2745 #if __SMP__
2746 state |= LCK_ILOCK;
2747 ordered_store_mtx(lock, state); // Set ownership
2748 interlock_unlock(lock); // Release interlock, enable preemption
2749 #else
2750 ordered_store_mtx(lock, state); // Set ownership
2751 enable_preemption();
2752 #endif
2753 }
2754
2755
2756 /*
2757 * Routine: lck_mtx_destroy
2758 */
2759 void
2760 lck_mtx_destroy(
2761 lck_mtx_t * lck,
2762 lck_grp_t * grp)
2763 {
2764 if (lck->lck_mtx_type != LCK_MTX_TYPE)
2765 panic("Destroying invalid mutex %p", lck);
2766 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
2767 panic("Destroying previously destroyed lock %p", lck);
2768 lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
2769 lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED;
2770 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
2771 lck_grp_deallocate(grp);
2772 return;
2773 }
2774
2775 /*
2776 * Routine: lck_spin_assert
2777 */
2778 void
2779 lck_spin_assert(lck_spin_t *lock, unsigned int type)
2780 {
2781 thread_t thread, holder;
2782 uintptr_t state;
2783
2784 if (lock->type != LCK_SPIN_TYPE)
2785 panic("Invalid spinlock %p", lock);
2786
2787 state = lock->lck_spin_data;
2788 holder = (thread_t)(state & ~LCK_ILOCK);
2789 thread = current_thread();
2790 if (type == LCK_ASSERT_OWNED) {
2791 if (holder == 0)
2792 panic("Lock not owned %p = %lx", lock, state);
2793 if (holder != thread)
2794 panic("Lock not owned by current thread %p = %lx", lock, state);
2795 if ((state & LCK_ILOCK) == 0)
2796 panic("Lock bit not set %p = %lx", lock, state);
2797 } else if (type == LCK_ASSERT_NOTOWNED) {
2798 if (holder != 0) {
2799 if (holder == thread)
2800 panic("Lock owned by current thread %p = %lx", lock, state);
2801 else
2802 panic("Lock %p owned by thread %p", lock, holder);
2803 }
2804 if (state & LCK_ILOCK)
2805 panic("Lock bit set %p = %lx", lock, state);
2806 } else
2807 panic("lck_spin_assert(): invalid arg (%u)", type);
2808 }
2809
2810 boolean_t
2811 lck_rw_lock_yield_shared(lck_rw_t *lck, boolean_t force_yield)
2812 {
2813 lck_rw_word_t word;
2814
2815 lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
2816
2817 word.data = ordered_load_rw(lck);
2818 if (word.want_excl || word.want_upgrade || force_yield) {
2819 lck_rw_unlock_shared(lck);
2820 mutex_pause(2);
2821 lck_rw_lock_shared(lck);
2822 return TRUE;
2823 }
2824
2825 return FALSE;
2826 }
2827
2828 /*
2829 * Routine: kdp_lck_mtx_lock_spin_is_acquired
2830 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2831 */
2832 boolean_t
2833 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
2834 {
2835 uintptr_t state;
2836
2837 if (not_in_kdp) {
2838 panic("panic: spinlock acquired check done outside of kernel debugger");
2839 }
2840 state = ordered_load_mtx(lck);
2841 if (state == LCK_MTX_TAG_DESTROYED)
2842 return FALSE;
2843 if (LCK_MTX_STATE_TO_THREAD(state) || (state & LCK_ILOCK))
2844 return TRUE;
2845 return FALSE;
2846 }
2847
2848 void
2849 kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2850 {
2851 lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
2852 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
2853 uintptr_t state = ordered_load_mtx(mutex);
2854 thread_t holder = LCK_MTX_STATE_TO_THREAD(state);
2855 if ((uintptr_t)holder == (uintptr_t)LCK_MTX_SPIN_TAG) {
2856 waitinfo->owner = STACKSHOT_WAITOWNER_MTXSPIN;
2857 } else {
2858 assertf(state != (uintptr_t)LCK_MTX_TAG_DESTROYED, "state=0x%llx", (uint64_t)state);
2859 assertf(state != (uintptr_t)LCK_MTX_TAG_INDIRECT, "state=0x%llx", (uint64_t)state);
2860 waitinfo->owner = thread_tid(holder);
2861 }
2862 }
2863
2864 void
2865 kdp_rwlck_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2866 {
2867 lck_rw_t *rwlck = NULL;
2868 switch(waitinfo->wait_type) {
2869 case kThreadWaitKernelRWLockRead:
2870 rwlck = READ_EVENT_TO_RWLOCK(event);
2871 break;
2872 case kThreadWaitKernelRWLockWrite:
2873 case kThreadWaitKernelRWLockUpgrade:
2874 rwlck = WRITE_EVENT_TO_RWLOCK(event);
2875 break;
2876 default:
2877 panic("%s was called with an invalid blocking type", __FUNCTION__);
2878 break;
2879 }
2880 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
2881 waitinfo->owner = thread_tid(rwlck->lck_rw_owner);
2882 }