]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm/locks_arm.c
xnu-4903.241.1.tar.gz
[apple/xnu.git] / osfmk / arm / locks_arm.c
1 /*
2 * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
33 * Mellon University All Rights Reserved.
34 *
35 * Permission to use, copy, modify and distribute this software and its
36 * documentation is hereby granted, provided that both the copyright notice
37 * and this permission notice appear in all copies of the software,
38 * derivative works or modified versions, and any portions thereof, and that
39 * both notices appear in supporting documentation.
40 *
41 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
42 * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
43 * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
44 *
45 * Carnegie Mellon requests users of this software to return to
46 *
47 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
48 * School of Computer Science Carnegie Mellon University Pittsburgh PA
49 * 15213-3890
50 *
51 * any improvements or extensions that they make and grant Carnegie Mellon the
52 * rights to redistribute these changes.
53 */
54 /*
55 * File: kern/lock.c
56 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * Date: 1985
58 *
59 * Locking primitives implementation
60 */
61
62 #define ATOMIC_PRIVATE 1
63 #define LOCK_PRIVATE 1
64
65 #include <mach_ldebug.h>
66
67 #include <kern/kalloc.h>
68 #include <kern/locks.h>
69 #include <kern/misc_protos.h>
70 #include <kern/thread.h>
71 #include <kern/processor.h>
72 #include <kern/sched_prim.h>
73 #include <kern/xpr.h>
74 #include <kern/debug.h>
75 #include <kern/kcdata.h>
76 #include <string.h>
77
78 #include <arm/cpu_data_internal.h>
79 #include <arm/proc_reg.h>
80 #include <arm/smp.h>
81 #include <machine/atomic.h>
82 #include <machine/machine_cpu.h>
83
84 #include <sys/kdebug.h>
85
86 /*
87 * We need only enough declarations from the BSD-side to be able to
88 * test if our probe is active, and to call __dtrace_probe(). Setting
89 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
90 */
91 #if CONFIG_DTRACE
92 #define NEED_DTRACE_DEFS
93 #include <../bsd/sys/lockstat.h>
94
95 #define DTRACE_RW_SHARED 0x0 //reader
96 #define DTRACE_RW_EXCL 0x1 //writer
97 #define DTRACE_NO_FLAG 0x0 //not applicable
98
99 #endif /* CONFIG_DTRACE */
100
101 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
102 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
103 #define LCK_RW_LCK_SHARED_CODE 0x102
104 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
105 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
106 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
107
108
109 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
110
111 // Panic in tests that check lock usage correctness
112 // These are undesirable when in a panic or a debugger is runnning.
113 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
114
115 unsigned int LcksOpts = 0;
116
117 #if CONFIG_DTRACE && __SMP__
118 extern uint64_t dtrace_spin_threshold;
119 #endif
120
121 /* Forwards */
122
123
124 #if USLOCK_DEBUG
125 /*
126 * Perform simple lock checks.
127 */
128 int uslock_check = 1;
129 int max_lock_loops = 100000000;
130 decl_simple_lock_data(extern, printf_lock)
131 decl_simple_lock_data(extern, panic_lock)
132 #endif /* USLOCK_DEBUG */
133
134 extern unsigned int not_in_kdp;
135
136 /*
137 * We often want to know the addresses of the callers
138 * of the various lock routines. However, this information
139 * is only used for debugging and statistics.
140 */
141 typedef void *pc_t;
142 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
143 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
144
145 #ifdef lint
146 /*
147 * Eliminate lint complaints about unused local pc variables.
148 */
149 #define OBTAIN_PC(pc,l) ++pc
150 #else /* lint */
151 #define OBTAIN_PC(pc,l)
152 #endif /* lint */
153
154
155 /*
156 * Portable lock package implementation of usimple_locks.
157 */
158
159 #if USLOCK_DEBUG
160 #define USLDBG(stmt) stmt
161 void usld_lock_init(usimple_lock_t, unsigned short);
162 void usld_lock_pre(usimple_lock_t, pc_t);
163 void usld_lock_post(usimple_lock_t, pc_t);
164 void usld_unlock(usimple_lock_t, pc_t);
165 void usld_lock_try_pre(usimple_lock_t, pc_t);
166 void usld_lock_try_post(usimple_lock_t, pc_t);
167 int usld_lock_common_checks(usimple_lock_t, const char *);
168 #else /* USLOCK_DEBUG */
169 #define USLDBG(stmt)
170 #endif /* USLOCK_DEBUG */
171
172 /*
173 * Owner thread pointer when lock held in spin mode
174 */
175 #define LCK_MTX_SPIN_TAG 0xfffffff0
176
177
178 #define interlock_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
179 #define interlock_try(lock) hw_lock_bit_try((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
180 #define interlock_unlock(lock) hw_unlock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
181 #define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
182 #define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
183
184 #define memory_barrier() __c11_atomic_thread_fence(memory_order_acq_rel_smp)
185 #define load_memory_barrier() __c11_atomic_thread_fence(memory_order_acquire_smp)
186 #define store_memory_barrier() __c11_atomic_thread_fence(memory_order_release_smp)
187
188 // Enforce program order of loads and stores.
189 #define ordered_load(target, type) \
190 __c11_atomic_load((_Atomic type *)(target), memory_order_relaxed)
191 #define ordered_store(target, type, value) \
192 __c11_atomic_store((_Atomic type *)(target), value, memory_order_relaxed)
193
194 #define ordered_load_mtx(lock) ordered_load(&(lock)->lck_mtx_data, uintptr_t)
195 #define ordered_store_mtx(lock, value) ordered_store(&(lock)->lck_mtx_data, uintptr_t, (value))
196 #define ordered_load_rw(lock) ordered_load(&(lock)->lck_rw_data, uint32_t)
197 #define ordered_store_rw(lock, value) ordered_store(&(lock)->lck_rw_data, uint32_t, (value))
198 #define ordered_load_rw_owner(lock) ordered_load(&(lock)->lck_rw_owner, thread_t)
199 #define ordered_store_rw_owner(lock, value) ordered_store(&(lock)->lck_rw_owner, thread_t, (value))
200 #define ordered_load_hw(lock) ordered_load(&(lock)->lock_data, uintptr_t)
201 #define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, uintptr_t, (value))
202 #define ordered_load_bit(lock) ordered_load((lock), uint32_t)
203 #define ordered_store_bit(lock, value) ordered_store((lock), uint32_t, (value))
204
205
206 // Prevent the compiler from reordering memory operations around this
207 #define compiler_memory_fence() __asm__ volatile ("" ::: "memory")
208
209 #define LOCK_PANIC_TIMEOUT 0xc00000
210 #define NOINLINE __attribute__((noinline))
211
212
213 #if __arm__
214 #define interrupts_disabled(mask) (mask & PSR_INTMASK)
215 #else
216 #define interrupts_disabled(mask) (mask & DAIF_IRQF)
217 #endif
218
219
220 #if __arm__
221 #define enable_fiq() __asm__ volatile ("cpsie f" ::: "memory");
222 #define enable_interrupts() __asm__ volatile ("cpsie if" ::: "memory");
223 #endif
224
225 /*
226 * Forward declarations
227 */
228
229 static void lck_rw_lock_shared_gen(lck_rw_t *lck);
230 static void lck_rw_lock_exclusive_gen(lck_rw_t *lck);
231 static boolean_t lck_rw_lock_shared_to_exclusive_success(lck_rw_t *lck);
232 static boolean_t lck_rw_lock_shared_to_exclusive_failure(lck_rw_t *lck, uint32_t prior_lock_state);
233 static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t *lck, uint32_t prior_lock_state);
234 static lck_rw_type_t lck_rw_done_gen(lck_rw_t *lck, uint32_t prior_lock_state);
235 static boolean_t lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait);
236
237 /*
238 * atomic exchange API is a low level abstraction of the operations
239 * to atomically read, modify, and write a pointer. This abstraction works
240 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
241 * well as the ARM exclusive instructions.
242 *
243 * atomic_exchange_begin() - begin exchange and retrieve current value
244 * atomic_exchange_complete() - conclude an exchange
245 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
246 */
247 static uint32_t
248 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
249 {
250 uint32_t val;
251
252 val = load_exclusive32(target, ord);
253 *previous = val;
254 return val;
255 }
256
257 static boolean_t
258 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
259 {
260 (void)previous; // Previous not needed, monitor is held
261 return store_exclusive32(target, newval, ord);
262 }
263
264 static void
265 atomic_exchange_abort(void)
266 {
267 clear_exclusive();
268 }
269
270 static boolean_t
271 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
272 {
273 uint32_t value, prev;
274
275 for ( ; ; ) {
276 value = atomic_exchange_begin32(target, &prev, ord);
277 if (value & test_mask) {
278 if (wait)
279 wait_for_event(); // Wait with monitor held
280 else
281 atomic_exchange_abort(); // Clear exclusive monitor
282 return FALSE;
283 }
284 value |= set_mask;
285 if (atomic_exchange_complete32(target, prev, value, ord))
286 return TRUE;
287 }
288 }
289
290 void _disable_preemption(void)
291 {
292 thread_t thread = current_thread();
293 unsigned int count;
294
295 count = thread->machine.preemption_count + 1;
296 ordered_store(&thread->machine.preemption_count, unsigned int, count);
297 }
298
299 void _enable_preemption(void)
300 {
301 thread_t thread = current_thread();
302 long state;
303 unsigned int count;
304 #if __arm__
305 #define INTERRUPT_MASK PSR_IRQF
306 #else // __arm__
307 #define INTERRUPT_MASK DAIF_IRQF
308 #endif // __arm__
309
310 count = thread->machine.preemption_count;
311 if (count == 0)
312 panic("Preemption count negative"); // Count will go negative when released
313 count--;
314 if (count > 0)
315 goto update_count; // Preemption is still disabled, just update
316 state = get_interrupts(); // Get interrupt state
317 if (state & INTERRUPT_MASK)
318 goto update_count; // Interrupts are already masked, can't take AST here
319
320 disable_interrupts_noread(); // Disable interrupts
321 ordered_store(&thread->machine.preemption_count, unsigned int, count);
322 if (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
323 #if __arm__
324 #if __ARM_USER_PROTECT__
325 uintptr_t up = arm_user_protect_begin(thread);
326 #endif // __ARM_USER_PROTECT__
327 enable_fiq();
328 #endif // __arm__
329 ast_taken_kernel(); // Handle urgent AST
330 #if __arm__
331 #if __ARM_USER_PROTECT__
332 arm_user_protect_end(thread, up, TRUE);
333 #endif // __ARM_USER_PROTECT__
334 enable_interrupts();
335 return; // Return early on arm only due to FIQ enabling
336 #endif // __arm__
337 }
338 restore_interrupts(state); // Enable interrupts
339 return;
340
341 update_count:
342 ordered_store(&thread->machine.preemption_count, unsigned int, count);
343 return;
344 }
345
346 int get_preemption_level(void)
347 {
348 return current_thread()->machine.preemption_count;
349 }
350
351 /* Forward declarations for unexported functions that are used externally */
352 void hw_lock_bit(hw_lock_bit_t *lock, unsigned int bit);
353 void hw_unlock_bit(hw_lock_bit_t *lock, unsigned int bit);
354
355 #if __SMP__
356 static unsigned int
357 hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout);
358 #endif
359
360 static inline unsigned int
361 hw_lock_bit_to_internal(hw_lock_bit_t *lock, unsigned int bit, uint32_t timeout)
362 {
363 unsigned int success = 0;
364 uint32_t mask = (1 << bit);
365 #if !__SMP__
366 uint32_t state;
367 #endif
368
369 #if __SMP__
370 if (__improbable(!atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE)))
371 success = hw_lock_bit_to_contended(lock, mask, timeout);
372 else
373 success = 1;
374 #else // __SMP__
375 (void)timeout;
376 state = ordered_load_bit(lock);
377 if (!(mask & state)) {
378 ordered_store_bit(lock, state | mask);
379 success = 1;
380 }
381 #endif // __SMP__
382
383 #if CONFIG_DTRACE
384 if (success)
385 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, bit);
386 #endif
387
388 return success;
389 }
390
391 unsigned int
392 hw_lock_bit_to(hw_lock_bit_t *lock, unsigned int bit, uint32_t timeout)
393 {
394 _disable_preemption();
395 return hw_lock_bit_to_internal(lock, bit, timeout);
396 }
397
398 #if __SMP__
399 static unsigned int NOINLINE
400 hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout)
401 {
402 uint64_t end = 0;
403 int i;
404 #if CONFIG_DTRACE
405 uint64_t begin;
406 boolean_t dtrace_enabled = lockstat_probemap[LS_LCK_SPIN_LOCK_SPIN] != 0;
407 if (__improbable(dtrace_enabled))
408 begin = mach_absolute_time();
409 #endif
410 for ( ; ; ) {
411 for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
412 // Always load-exclusive before wfe
413 // This grabs the monitor and wakes up on a release event
414 if (atomic_test_and_set32(lock, mask, mask, memory_order_acquire, TRUE)) {
415 goto end;
416 }
417 }
418 if (end == 0)
419 end = ml_get_timebase() + timeout;
420 else if (ml_get_timebase() >= end)
421 break;
422 }
423 return 0;
424 end:
425 #if CONFIG_DTRACE
426 if (__improbable(dtrace_enabled)) {
427 uint64_t spintime = mach_absolute_time() - begin;
428 if (spintime > dtrace_spin_threshold)
429 LOCKSTAT_RECORD2(LS_LCK_SPIN_LOCK_SPIN, lock, spintime, mask);
430 }
431 #endif
432 return 1;
433 }
434 #endif // __SMP__
435
436 void
437 hw_lock_bit(hw_lock_bit_t *lock, unsigned int bit)
438 {
439 if (hw_lock_bit_to(lock, bit, LOCK_PANIC_TIMEOUT))
440 return;
441 #if __SMP__
442 panic("hw_lock_bit(): timed out (%p)", lock);
443 #else
444 panic("hw_lock_bit(): interlock held (%p)", lock);
445 #endif
446 }
447
448 void
449 hw_lock_bit_nopreempt(hw_lock_bit_t *lock, unsigned int bit)
450 {
451 if (__improbable(get_preemption_level() == 0))
452 panic("Attempt to take no-preempt bitlock %p in preemptible context", lock);
453 if (hw_lock_bit_to_internal(lock, bit, LOCK_PANIC_TIMEOUT))
454 return;
455 #if __SMP__
456 panic("hw_lock_bit_nopreempt(): timed out (%p)", lock);
457 #else
458 panic("hw_lock_bit_nopreempt(): interlock held (%p)", lock);
459 #endif
460 }
461
462 unsigned int
463 hw_lock_bit_try(hw_lock_bit_t *lock, unsigned int bit)
464 {
465 uint32_t mask = (1 << bit);
466 #if !__SMP__
467 uint32_t state;
468 #endif
469 boolean_t success = FALSE;
470
471 _disable_preemption();
472 #if __SMP__
473 // TODO: consider weak (non-looping) atomic test-and-set
474 success = atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE);
475 #else
476 state = ordered_load_bit(lock);
477 if (!(mask & state)) {
478 ordered_store_bit(lock, state | mask);
479 success = TRUE;
480 }
481 #endif // __SMP__
482 if (!success)
483 _enable_preemption();
484
485 #if CONFIG_DTRACE
486 if (success)
487 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, bit);
488 #endif
489
490 return success;
491 }
492
493 static inline void
494 hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
495 {
496 uint32_t mask = (1 << bit);
497 #if !__SMP__
498 uint32_t state;
499 #endif
500
501 #if __SMP__
502 __c11_atomic_fetch_and((_Atomic uint32_t *)lock, ~mask, memory_order_release);
503 set_event();
504 #else // __SMP__
505 state = ordered_load_bit(lock);
506 ordered_store_bit(lock, state & ~mask);
507 #endif // __SMP__
508 #if CONFIG_DTRACE
509 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
510 #endif
511 }
512
513 /*
514 * Routine: hw_unlock_bit
515 *
516 * Release spin-lock. The second parameter is the bit number to test and set.
517 * Decrement the preemption level.
518 */
519 void
520 hw_unlock_bit(hw_lock_bit_t *lock, unsigned int bit)
521 {
522 hw_unlock_bit_internal(lock, bit);
523 _enable_preemption();
524 }
525
526 void
527 hw_unlock_bit_nopreempt(hw_lock_bit_t *lock, unsigned int bit)
528 {
529 if (__improbable(get_preemption_level() == 0))
530 panic("Attempt to release no-preempt bitlock %p in preemptible context", lock);
531 hw_unlock_bit_internal(lock, bit);
532 }
533
534 /*
535 * Routine: lck_spin_alloc_init
536 */
537 lck_spin_t *
538 lck_spin_alloc_init(
539 lck_grp_t * grp,
540 lck_attr_t * attr)
541 {
542 lck_spin_t *lck;
543
544 if ((lck = (lck_spin_t *) kalloc(sizeof(lck_spin_t))) != 0)
545 lck_spin_init(lck, grp, attr);
546
547 return (lck);
548 }
549
550 /*
551 * Routine: lck_spin_free
552 */
553 void
554 lck_spin_free(
555 lck_spin_t * lck,
556 lck_grp_t * grp)
557 {
558 lck_spin_destroy(lck, grp);
559 kfree((void *) lck, sizeof(lck_spin_t));
560 }
561
562 /*
563 * Routine: lck_spin_init
564 */
565 void
566 lck_spin_init(
567 lck_spin_t * lck,
568 lck_grp_t * grp,
569 __unused lck_attr_t * attr)
570 {
571 hw_lock_init(&lck->hwlock);
572 lck->type = LCK_SPIN_TYPE;
573 lck_grp_reference(grp);
574 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
575 store_memory_barrier();
576 }
577
578 /*
579 * arm_usimple_lock is a lck_spin_t without a group or attributes
580 */
581 void inline
582 arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
583 {
584 lck->type = LCK_SPIN_TYPE;
585 hw_lock_init(&lck->hwlock);
586 store_memory_barrier();
587 }
588
589
590 /*
591 * Routine: lck_spin_lock
592 */
593 void
594 lck_spin_lock(lck_spin_t *lock)
595 {
596 #if DEVELOPMENT || DEBUG
597 if (lock->type != LCK_SPIN_TYPE)
598 panic("Invalid spinlock %p", lock);
599 #endif // DEVELOPMENT || DEBUG
600 hw_lock_lock(&lock->hwlock);
601 }
602
603 /*
604 * Routine: lck_spin_lock_nopreempt
605 */
606 void
607 lck_spin_lock_nopreempt(lck_spin_t *lock)
608 {
609 #if DEVELOPMENT || DEBUG
610 if (lock->type != LCK_SPIN_TYPE)
611 panic("Invalid spinlock %p", lock);
612 #endif // DEVELOPMENT || DEBUG
613 hw_lock_lock_nopreempt(&lock->hwlock);
614 }
615
616 /*
617 * Routine: lck_spin_try_lock
618 */
619 int
620 lck_spin_try_lock(lck_spin_t *lock)
621 {
622 return hw_lock_try(&lock->hwlock);
623 }
624
625 /*
626 * Routine: lck_spin_try_lock_nopreempt
627 */
628 int
629 lck_spin_try_lock_nopreempt(lck_spin_t *lock)
630 {
631 return hw_lock_try_nopreempt(&lock->hwlock);
632 }
633
634 /*
635 * Routine: lck_spin_unlock
636 */
637 void
638 lck_spin_unlock(lck_spin_t *lock)
639 {
640 #if DEVELOPMENT || DEBUG
641 if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC())
642 panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
643 if (lock->type != LCK_SPIN_TYPE)
644 panic("Invalid spinlock type %p", lock);
645 #endif // DEVELOPMENT || DEBUG
646 hw_lock_unlock(&lock->hwlock);
647 }
648
649 /*
650 * Routine: lck_spin_unlock_nopreempt
651 */
652 void
653 lck_spin_unlock_nopreempt(lck_spin_t *lock)
654 {
655 #if DEVELOPMENT || DEBUG
656 if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC())
657 panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
658 if (lock->type != LCK_SPIN_TYPE)
659 panic("Invalid spinlock type %p", lock);
660 #endif // DEVELOPMENT || DEBUG
661 hw_lock_unlock_nopreempt(&lock->hwlock);
662 }
663
664 /*
665 * Routine: lck_spin_destroy
666 */
667 void
668 lck_spin_destroy(
669 lck_spin_t * lck,
670 lck_grp_t * grp)
671 {
672 if (lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED)
673 return;
674 lck->lck_spin_data = LCK_SPIN_TAG_DESTROYED;
675 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
676 lck_grp_deallocate(grp);
677 }
678
679 /*
680 * Routine: kdp_lck_spin_is_acquired
681 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
682 */
683 boolean_t
684 kdp_lck_spin_is_acquired(lck_spin_t *lck) {
685 if (not_in_kdp) {
686 panic("panic: spinlock acquired check done outside of kernel debugger");
687 }
688 return ((lck->lck_spin_data & ~LCK_SPIN_TAG_DESTROYED) != 0) ? TRUE:FALSE;
689 }
690
691 /*
692 * Initialize a usimple_lock.
693 *
694 * No change in preemption state.
695 */
696 void
697 usimple_lock_init(
698 usimple_lock_t l,
699 unsigned short tag)
700 {
701 #ifndef MACHINE_SIMPLE_LOCK
702 USLDBG(usld_lock_init(l, tag));
703 hw_lock_init(&l->lck_spin_data);
704 #else
705 simple_lock_init((simple_lock_t) l, tag);
706 #endif
707 }
708
709
710 /*
711 * Acquire a usimple_lock.
712 *
713 * Returns with preemption disabled. Note
714 * that the hw_lock routines are responsible for
715 * maintaining preemption state.
716 */
717 void
718 usimple_lock(
719 usimple_lock_t l)
720 {
721 #ifndef MACHINE_SIMPLE_LOCK
722 pc_t pc;
723
724 OBTAIN_PC(pc, l);
725 USLDBG(usld_lock_pre(l, pc));
726
727 if (!hw_lock_to(&l->lck_spin_data, LockTimeOut)) /* Try to get the lock
728 * with a timeout */
729 panic("simple lock deadlock detection - l=%p, cpu=%d, ret=%p", &l, cpu_number(), pc);
730
731 USLDBG(usld_lock_post(l, pc));
732 #else
733 simple_lock((simple_lock_t) l);
734 #endif
735 }
736
737
738 extern void sync(void);
739
740 /*
741 * Release a usimple_lock.
742 *
743 * Returns with preemption enabled. Note
744 * that the hw_lock routines are responsible for
745 * maintaining preemption state.
746 */
747 void
748 usimple_unlock(
749 usimple_lock_t l)
750 {
751 #ifndef MACHINE_SIMPLE_LOCK
752 pc_t pc;
753
754 OBTAIN_PC(pc, l);
755 USLDBG(usld_unlock(l, pc));
756 sync();
757 hw_lock_unlock(&l->lck_spin_data);
758 #else
759 simple_unlock((simple_lock_t) l);
760 #endif
761 }
762
763
764 /*
765 * Conditionally acquire a usimple_lock.
766 *
767 * On success, returns with preemption disabled.
768 * On failure, returns with preemption in the same state
769 * as when first invoked. Note that the hw_lock routines
770 * are responsible for maintaining preemption state.
771 *
772 * XXX No stats are gathered on a miss; I preserved this
773 * behavior from the original assembly-language code, but
774 * doesn't it make sense to log misses? XXX
775 */
776 unsigned int
777 usimple_lock_try(
778 usimple_lock_t l)
779 {
780 #ifndef MACHINE_SIMPLE_LOCK
781 pc_t pc;
782 unsigned int success;
783
784 OBTAIN_PC(pc, l);
785 USLDBG(usld_lock_try_pre(l, pc));
786 if ((success = hw_lock_try(&l->lck_spin_data))) {
787 USLDBG(usld_lock_try_post(l, pc));
788 }
789 return success;
790 #else
791 return (simple_lock_try((simple_lock_t) l));
792 #endif
793 }
794
795 #if USLOCK_DEBUG
796 /*
797 * States of a usimple_lock. The default when initializing
798 * a usimple_lock is setting it up for debug checking.
799 */
800 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
801 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
802 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
803 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
804 #define USLOCK_CHECKING(l) (uslock_check && \
805 ((l)->debug.state & USLOCK_CHECKED))
806
807 /*
808 * Trace activities of a particularly interesting lock.
809 */
810 void usl_trace(usimple_lock_t, int, pc_t, const char *);
811
812
813 /*
814 * Initialize the debugging information contained
815 * in a usimple_lock.
816 */
817 void
818 usld_lock_init(
819 usimple_lock_t l,
820 __unused unsigned short tag)
821 {
822 if (l == USIMPLE_LOCK_NULL)
823 panic("lock initialization: null lock pointer");
824 l->lock_type = USLOCK_TAG;
825 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
826 l->debug.lock_cpu = l->debug.unlock_cpu = 0;
827 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
828 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
829 l->debug.duration[0] = l->debug.duration[1] = 0;
830 l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
831 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
832 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
833 }
834
835
836 /*
837 * These checks apply to all usimple_locks, not just
838 * those with USLOCK_CHECKED turned on.
839 */
840 int
841 usld_lock_common_checks(
842 usimple_lock_t l,
843 const char *caller)
844 {
845 if (l == USIMPLE_LOCK_NULL)
846 panic("%s: null lock pointer", caller);
847 if (l->lock_type != USLOCK_TAG)
848 panic("%s: 0x%x is not a usimple lock", caller, (integer_t) l);
849 if (!(l->debug.state & USLOCK_INIT))
850 panic("%s: 0x%x is not an initialized lock",
851 caller, (integer_t) l);
852 return USLOCK_CHECKING(l);
853 }
854
855
856 /*
857 * Debug checks on a usimple_lock just before attempting
858 * to acquire it.
859 */
860 /* ARGSUSED */
861 void
862 usld_lock_pre(
863 usimple_lock_t l,
864 pc_t pc)
865 {
866 const char *caller = "usimple_lock";
867
868
869 if (!usld_lock_common_checks(l, caller))
870 return;
871
872 /*
873 * Note that we have a weird case where we are getting a lock when we are]
874 * in the process of putting the system to sleep. We are running with no
875 * current threads, therefore we can't tell if we are trying to retake a lock
876 * we have or someone on the other processor has it. Therefore we just
877 * ignore this test if the locking thread is 0.
878 */
879
880 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
881 l->debug.lock_thread == (void *) current_thread()) {
882 printf("%s: lock 0x%x already locked (at %p) by",
883 caller, (integer_t) l, l->debug.lock_pc);
884 printf(" current thread %p (new attempt at pc %p)\n",
885 l->debug.lock_thread, pc);
886 panic("%s", caller);
887 }
888 mp_disable_preemption();
889 usl_trace(l, cpu_number(), pc, caller);
890 mp_enable_preemption();
891 }
892
893
894 /*
895 * Debug checks on a usimple_lock just after acquiring it.
896 *
897 * Pre-emption has been disabled at this point,
898 * so we are safe in using cpu_number.
899 */
900 void
901 usld_lock_post(
902 usimple_lock_t l,
903 pc_t pc)
904 {
905 int mycpu;
906 const char *caller = "successful usimple_lock";
907
908
909 if (!usld_lock_common_checks(l, caller))
910 return;
911
912 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
913 panic("%s: lock 0x%x became uninitialized",
914 caller, (integer_t) l);
915 if ((l->debug.state & USLOCK_TAKEN))
916 panic("%s: lock 0x%x became TAKEN by someone else",
917 caller, (integer_t) l);
918
919 mycpu = cpu_number();
920 l->debug.lock_thread = (void *) current_thread();
921 l->debug.state |= USLOCK_TAKEN;
922 l->debug.lock_pc = pc;
923 l->debug.lock_cpu = mycpu;
924
925 usl_trace(l, mycpu, pc, caller);
926 }
927
928
929 /*
930 * Debug checks on a usimple_lock just before
931 * releasing it. Note that the caller has not
932 * yet released the hardware lock.
933 *
934 * Preemption is still disabled, so there's
935 * no problem using cpu_number.
936 */
937 void
938 usld_unlock(
939 usimple_lock_t l,
940 pc_t pc)
941 {
942 int mycpu;
943 const char *caller = "usimple_unlock";
944
945
946 if (!usld_lock_common_checks(l, caller))
947 return;
948
949 mycpu = cpu_number();
950
951 if (!(l->debug.state & USLOCK_TAKEN))
952 panic("%s: lock 0x%x hasn't been taken",
953 caller, (integer_t) l);
954 if (l->debug.lock_thread != (void *) current_thread())
955 panic("%s: unlocking lock 0x%x, owned by thread %p",
956 caller, (integer_t) l, l->debug.lock_thread);
957 if (l->debug.lock_cpu != mycpu) {
958 printf("%s: unlocking lock 0x%x on cpu 0x%x",
959 caller, (integer_t) l, mycpu);
960 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
961 panic("%s", caller);
962 }
963 usl_trace(l, mycpu, pc, caller);
964
965 l->debug.unlock_thread = l->debug.lock_thread;
966 l->debug.lock_thread = INVALID_PC;
967 l->debug.state &= ~USLOCK_TAKEN;
968 l->debug.unlock_pc = pc;
969 l->debug.unlock_cpu = mycpu;
970 }
971
972
973 /*
974 * Debug checks on a usimple_lock just before
975 * attempting to acquire it.
976 *
977 * Preemption isn't guaranteed to be disabled.
978 */
979 void
980 usld_lock_try_pre(
981 usimple_lock_t l,
982 pc_t pc)
983 {
984 const char *caller = "usimple_lock_try";
985
986 if (!usld_lock_common_checks(l, caller))
987 return;
988 mp_disable_preemption();
989 usl_trace(l, cpu_number(), pc, caller);
990 mp_enable_preemption();
991 }
992
993
994 /*
995 * Debug checks on a usimple_lock just after
996 * successfully attempting to acquire it.
997 *
998 * Preemption has been disabled by the
999 * lock acquisition attempt, so it's safe
1000 * to use cpu_number.
1001 */
1002 void
1003 usld_lock_try_post(
1004 usimple_lock_t l,
1005 pc_t pc)
1006 {
1007 int mycpu;
1008 const char *caller = "successful usimple_lock_try";
1009
1010 if (!usld_lock_common_checks(l, caller))
1011 return;
1012
1013 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
1014 panic("%s: lock 0x%x became uninitialized",
1015 caller, (integer_t) l);
1016 if ((l->debug.state & USLOCK_TAKEN))
1017 panic("%s: lock 0x%x became TAKEN by someone else",
1018 caller, (integer_t) l);
1019
1020 mycpu = cpu_number();
1021 l->debug.lock_thread = (void *) current_thread();
1022 l->debug.state |= USLOCK_TAKEN;
1023 l->debug.lock_pc = pc;
1024 l->debug.lock_cpu = mycpu;
1025
1026 usl_trace(l, mycpu, pc, caller);
1027 }
1028
1029
1030 /*
1031 * For very special cases, set traced_lock to point to a
1032 * specific lock of interest. The result is a series of
1033 * XPRs showing lock operations on that lock. The lock_seq
1034 * value is used to show the order of those operations.
1035 */
1036 usimple_lock_t traced_lock;
1037 unsigned int lock_seq;
1038
1039 void
1040 usl_trace(
1041 usimple_lock_t l,
1042 int mycpu,
1043 pc_t pc,
1044 const char *op_name)
1045 {
1046 if (traced_lock == l) {
1047 XPR(XPR_SLOCK,
1048 "seq %d, cpu %d, %s @ %x\n",
1049 (integer_t) lock_seq, (integer_t) mycpu,
1050 (integer_t) op_name, (integer_t) pc, 0);
1051 lock_seq++;
1052 }
1053 }
1054
1055
1056 #endif /* USLOCK_DEBUG */
1057
1058 /*
1059 * The C portion of the shared/exclusive locks package.
1060 */
1061
1062 /*
1063 * compute the deadline to spin against when
1064 * waiting for a change of state on a lck_rw_t
1065 */
1066 #if __SMP__
1067 static inline uint64_t
1068 lck_rw_deadline_for_spin(lck_rw_t *lck)
1069 {
1070 lck_rw_word_t word;
1071
1072 word.data = ordered_load_rw(lck);
1073 if (word.can_sleep) {
1074 if (word.r_waiting || word.w_waiting || (word.shared_count > machine_info.max_cpus)) {
1075 /*
1076 * there are already threads waiting on this lock... this
1077 * implies that they have spun beyond their deadlines waiting for
1078 * the desired state to show up so we will not bother spinning at this time...
1079 * or
1080 * the current number of threads sharing this lock exceeds our capacity to run them
1081 * concurrently and since all states we're going to spin for require the rw_shared_count
1082 * to be at 0, we'll not bother spinning since the latency for this to happen is
1083 * unpredictable...
1084 */
1085 return (mach_absolute_time());
1086 }
1087 return (mach_absolute_time() + MutexSpin);
1088 } else
1089 return (mach_absolute_time() + (100000LL * 1000000000LL));
1090 }
1091 #endif // __SMP__
1092
1093 static boolean_t
1094 lck_rw_drain_status(lck_rw_t *lock, uint32_t status_mask, boolean_t wait __unused)
1095 {
1096 #if __SMP__
1097 uint64_t deadline = 0;
1098 uint32_t data;
1099
1100 if (wait)
1101 deadline = lck_rw_deadline_for_spin(lock);
1102
1103 for ( ; ; ) {
1104 data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp);
1105 if ((data & status_mask) == 0)
1106 break;
1107 if (wait)
1108 wait_for_event();
1109 else
1110 clear_exclusive();
1111 if (!wait || (mach_absolute_time() >= deadline))
1112 return FALSE;
1113 }
1114 clear_exclusive();
1115 return TRUE;
1116 #else
1117 uint32_t data;
1118
1119 data = ordered_load_rw(lock);
1120 if ((data & status_mask) == 0)
1121 return TRUE;
1122 else
1123 return FALSE;
1124 #endif // __SMP__
1125 }
1126
1127 /*
1128 * Spin while interlock is held.
1129 */
1130 static inline void
1131 lck_rw_interlock_spin(lck_rw_t *lock)
1132 {
1133 #if __SMP__
1134 uint32_t data;
1135
1136 for ( ; ; ) {
1137 data = load_exclusive32(&lock->lck_rw_data, memory_order_relaxed);
1138 if (data & LCK_RW_INTERLOCK)
1139 wait_for_event();
1140 else {
1141 clear_exclusive();
1142 return;
1143 }
1144 }
1145 #else
1146 panic("lck_rw_interlock_spin(): Interlock locked %p %x", lock, lock->lck_rw_data);
1147 #endif
1148 }
1149
1150 /*
1151 * We disable interrupts while holding the RW interlock to prevent an
1152 * interrupt from exacerbating hold time.
1153 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
1154 */
1155 static inline boolean_t
1156 lck_interlock_lock(lck_rw_t *lck)
1157 {
1158 boolean_t istate;
1159
1160 istate = ml_set_interrupts_enabled(FALSE);
1161 lck_rw_ilk_lock(lck);
1162 return istate;
1163 }
1164
1165 static inline void
1166 lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
1167 {
1168 lck_rw_ilk_unlock(lck);
1169 ml_set_interrupts_enabled(istate);
1170 }
1171
1172
1173 #define LCK_RW_GRAB_WANT 0
1174 #define LCK_RW_GRAB_SHARED 1
1175
1176 static boolean_t
1177 lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait)
1178 {
1179 uint64_t deadline = 0;
1180 uint32_t data, prev;
1181 boolean_t do_exch;
1182
1183 #if __SMP__
1184 if (wait)
1185 deadline = lck_rw_deadline_for_spin(lock);
1186 #else
1187 wait = FALSE; // Don't spin on UP systems
1188 #endif
1189
1190 for ( ; ; ) {
1191 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1192 if (data & LCK_RW_INTERLOCK) {
1193 atomic_exchange_abort();
1194 lck_rw_interlock_spin(lock);
1195 continue;
1196 }
1197 do_exch = FALSE;
1198 if (mode == LCK_RW_GRAB_WANT) {
1199 if ((data & LCK_RW_WANT_EXCL) == 0) {
1200 data |= LCK_RW_WANT_EXCL;
1201 do_exch = TRUE;
1202 }
1203 } else { // LCK_RW_GRAB_SHARED
1204 if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) ||
1205 (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
1206 data += LCK_RW_SHARED_READER;
1207 do_exch = TRUE;
1208 }
1209 }
1210 if (do_exch) {
1211 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1212 return TRUE;
1213 } else {
1214 if (wait) // Non-waiting
1215 wait_for_event();
1216 else
1217 atomic_exchange_abort();
1218 if (!wait || (mach_absolute_time() >= deadline))
1219 return FALSE;
1220 }
1221 }
1222 }
1223
1224
1225 /*
1226 * Routine: lck_rw_alloc_init
1227 */
1228 lck_rw_t *
1229 lck_rw_alloc_init(
1230 lck_grp_t *grp,
1231 lck_attr_t *attr)
1232 {
1233 lck_rw_t *lck;
1234
1235 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0)
1236 lck_rw_init(lck, grp, attr);
1237
1238 return lck;
1239 }
1240
1241 /*
1242 * Routine: lck_rw_free
1243 */
1244 void
1245 lck_rw_free(
1246 lck_rw_t *lck,
1247 lck_grp_t *grp)
1248 {
1249 lck_rw_destroy(lck, grp);
1250 kfree(lck, sizeof(lck_rw_t));
1251 }
1252
1253 /*
1254 * Routine: lck_rw_init
1255 */
1256 void
1257 lck_rw_init(
1258 lck_rw_t *lck,
1259 lck_grp_t *grp,
1260 lck_attr_t *attr)
1261 {
1262 if (attr == LCK_ATTR_NULL)
1263 attr = &LockDefaultLckAttr;
1264 memset(lck, 0, sizeof(lck_rw_t));
1265 lck->lck_rw_can_sleep = TRUE;
1266 if ((attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY) == 0)
1267 lck->lck_rw_priv_excl = TRUE;
1268
1269 lck_grp_reference(grp);
1270 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
1271 }
1272
1273
1274 /*
1275 * Routine: lck_rw_destroy
1276 */
1277 void
1278 lck_rw_destroy(
1279 lck_rw_t *lck,
1280 lck_grp_t *grp)
1281 {
1282 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
1283 return;
1284 #if MACH_LDEBUG
1285 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
1286 #endif
1287 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
1288 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
1289 lck_grp_deallocate(grp);
1290 return;
1291 }
1292
1293 /*
1294 * Routine: lck_rw_lock
1295 */
1296 void
1297 lck_rw_lock(
1298 lck_rw_t *lck,
1299 lck_rw_type_t lck_rw_type)
1300 {
1301 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1302 lck_rw_lock_shared(lck);
1303 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1304 lck_rw_lock_exclusive(lck);
1305 else
1306 panic("lck_rw_lock(): Invalid RW lock type: %x", lck_rw_type);
1307 }
1308
1309 /*
1310 * Routine: lck_rw_lock_exclusive
1311 */
1312 void
1313 lck_rw_lock_exclusive(lck_rw_t *lock)
1314 {
1315 thread_t thread = current_thread();
1316
1317 thread->rwlock_count++;
1318 if (atomic_test_and_set32(&lock->lck_rw_data,
1319 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
1320 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
1321 #if CONFIG_DTRACE
1322 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1323 #endif /* CONFIG_DTRACE */
1324 } else
1325 lck_rw_lock_exclusive_gen(lock);
1326 #if MACH_ASSERT
1327 thread_t owner = ordered_load_rw_owner(lock);
1328 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1329 #endif
1330 ordered_store_rw_owner(lock, thread);
1331 }
1332
1333 /*
1334 * Routine: lck_rw_lock_shared
1335 */
1336 void
1337 lck_rw_lock_shared(lck_rw_t *lock)
1338 {
1339 uint32_t data, prev;
1340
1341 current_thread()->rwlock_count++;
1342 for ( ; ; ) {
1343 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1344 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
1345 atomic_exchange_abort();
1346 lck_rw_lock_shared_gen(lock);
1347 break;
1348 }
1349 data += LCK_RW_SHARED_READER;
1350 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1351 break;
1352 cpu_pause();
1353 }
1354 #if MACH_ASSERT
1355 thread_t owner = ordered_load_rw_owner(lock);
1356 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1357 #endif
1358 #if CONFIG_DTRACE
1359 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1360 #endif /* CONFIG_DTRACE */
1361 return;
1362 }
1363
1364 /*
1365 * Routine: lck_rw_lock_shared_to_exclusive
1366 */
1367 boolean_t
1368 lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
1369 {
1370 uint32_t data, prev;
1371
1372 for ( ; ; ) {
1373 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1374 if (data & LCK_RW_INTERLOCK) {
1375 atomic_exchange_abort();
1376 lck_rw_interlock_spin(lock);
1377 continue;
1378 }
1379 if (data & LCK_RW_WANT_UPGRADE) {
1380 data -= LCK_RW_SHARED_READER;
1381 if ((data & LCK_RW_SHARED_MASK) == 0) /* we were the last reader */
1382 data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
1383 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1384 return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
1385 } else {
1386 data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
1387 data -= LCK_RW_SHARED_READER; /* and shed our read count */
1388 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1389 break;
1390 }
1391 cpu_pause();
1392 }
1393 /* we now own the WANT_UPGRADE */
1394 if (data & LCK_RW_SHARED_MASK) /* check to see if all of the readers are drained */
1395 lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
1396 #if MACH_ASSERT
1397 thread_t owner = ordered_load_rw_owner(lock);
1398 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1399 #endif
1400 ordered_store_rw_owner(lock, current_thread());
1401 #if CONFIG_DTRACE
1402 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
1403 #endif /* CONFIG_DTRACE */
1404 return TRUE;
1405 }
1406
1407
1408 /*
1409 * Routine: lck_rw_lock_shared_to_exclusive_failure
1410 * Function:
1411 * Fast path code has already dropped our read
1412 * count and determined that someone else owns 'lck_rw_want_upgrade'
1413 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1414 * all we need to do here is determine if a wakeup is needed
1415 */
1416 static boolean_t
1417 lck_rw_lock_shared_to_exclusive_failure(
1418 lck_rw_t *lck,
1419 uint32_t prior_lock_state)
1420 {
1421 thread_t thread = current_thread();
1422 uint32_t rwlock_count;
1423
1424 /* Check if dropping the lock means that we need to unpromote */
1425 rwlock_count = thread->rwlock_count--;
1426 #if MACH_LDEBUG
1427 if (rwlock_count == 0) {
1428 panic("rw lock count underflow for thread %p", thread);
1429 }
1430 #endif
1431 if ((prior_lock_state & LCK_RW_W_WAITING) &&
1432 ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
1433 /*
1434 * Someone else has requested upgrade.
1435 * Since we've released the read lock, wake
1436 * him up if he's blocked waiting
1437 */
1438 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1439 }
1440
1441 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1442 /* sched_flags checked without lock, but will be rechecked while clearing */
1443 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
1444 }
1445
1446 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1447 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1448
1449 return (FALSE);
1450 }
1451
1452 /*
1453 * Routine: lck_rw_lock_shared_to_exclusive_success
1454 * Function:
1455 * assembly fast path code has already dropped our read
1456 * count and successfully acquired 'lck_rw_want_upgrade'
1457 * we just need to wait for the rest of the readers to drain
1458 * and then we can return as the exclusive holder of this lock
1459 */
1460 static boolean_t
1461 lck_rw_lock_shared_to_exclusive_success(
1462 lck_rw_t *lock)
1463 {
1464 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1465 int slept = 0;
1466 lck_rw_word_t word;
1467 wait_result_t res;
1468 boolean_t istate;
1469 boolean_t not_shared;
1470
1471 #if CONFIG_DTRACE
1472 uint64_t wait_interval = 0;
1473 int readers_at_sleep = 0;
1474 boolean_t dtrace_ls_initialized = FALSE;
1475 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1476 #endif
1477
1478 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE)) {
1479
1480 word.data = ordered_load_rw(lock);
1481 #if CONFIG_DTRACE
1482 if (dtrace_ls_initialized == FALSE) {
1483 dtrace_ls_initialized = TRUE;
1484 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1485 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1486 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1487 if (dtrace_ls_enabled) {
1488 /*
1489 * Either sleeping or spinning is happening,
1490 * start a timing of our delay interval now.
1491 */
1492 readers_at_sleep = word.shared_count;
1493 wait_interval = mach_absolute_time();
1494 }
1495 }
1496 #endif
1497
1498 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1499 trace_lck, word.shared_count, 0, 0, 0);
1500
1501 not_shared = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE);
1502
1503 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1504 trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
1505
1506 if (not_shared)
1507 break;
1508
1509 /*
1510 * if we get here, the spin deadline in lck_rw_wait_on_status()
1511 * has expired w/o the rw_shared_count having drained to 0
1512 * check to see if we're allowed to do a thread_block
1513 */
1514 if (word.can_sleep) {
1515
1516 istate = lck_interlock_lock(lock);
1517
1518 word.data = ordered_load_rw(lock);
1519 if (word.shared_count != 0) {
1520 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1521 trace_lck, word.shared_count, 0, 0, 0);
1522
1523 word.w_waiting = 1;
1524 ordered_store_rw(lock, word.data);
1525
1526 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
1527 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1528 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1529 lck_interlock_unlock(lock, istate);
1530
1531 if (res == THREAD_WAITING) {
1532 res = thread_block(THREAD_CONTINUE_NULL);
1533 slept++;
1534 }
1535 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1536 trace_lck, res, slept, 0, 0);
1537 } else {
1538 lck_interlock_unlock(lock, istate);
1539 break;
1540 }
1541 }
1542 }
1543 #if CONFIG_DTRACE
1544 /*
1545 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1546 */
1547 if (dtrace_ls_enabled == TRUE) {
1548 if (slept == 0) {
1549 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
1550 } else {
1551 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
1552 mach_absolute_time() - wait_interval, 1,
1553 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1554 }
1555 }
1556 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
1557 #endif
1558 return (TRUE);
1559 }
1560
1561
1562 /*
1563 * Routine: lck_rw_lock_exclusive_to_shared
1564 */
1565
1566 void lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
1567 {
1568 uint32_t data, prev;
1569
1570 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
1571 ordered_store_rw_owner(lock, THREAD_NULL);
1572 for ( ; ; ) {
1573 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
1574 if (data & LCK_RW_INTERLOCK) {
1575 #if __SMP__
1576 atomic_exchange_abort();
1577 lck_rw_interlock_spin(lock); /* wait for interlock to clear */
1578 continue;
1579 #else
1580 panic("lck_rw_lock_exclusive_to_shared(): Interlock locked (%p): %x", lock, data);
1581 #endif // __SMP__
1582 }
1583 data += LCK_RW_SHARED_READER;
1584 if (data & LCK_RW_WANT_UPGRADE)
1585 data &= ~(LCK_RW_WANT_UPGRADE);
1586 else
1587 data &= ~(LCK_RW_WANT_EXCL);
1588 if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL)))
1589 data &= ~(LCK_RW_W_WAITING);
1590 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp))
1591 break;
1592 cpu_pause();
1593 }
1594 return lck_rw_lock_exclusive_to_shared_gen(lock, prev);
1595 }
1596
1597 /*
1598 * Routine: lck_rw_lock_exclusive_to_shared_gen
1599 * Function:
1600 * Fast path has already dropped
1601 * our exclusive state and bumped lck_rw_shared_count
1602 * all we need to do here is determine if anyone
1603 * needs to be awakened.
1604 */
1605 static void
1606 lck_rw_lock_exclusive_to_shared_gen(
1607 lck_rw_t *lck,
1608 uint32_t prior_lock_state)
1609 {
1610 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1611 lck_rw_word_t fake_lck;
1612
1613 /*
1614 * prior_lock state is a snapshot of the 1st word of the
1615 * lock in question... we'll fake up a pointer to it
1616 * and carefully not access anything beyond whats defined
1617 * in the first word of a lck_rw_t
1618 */
1619 fake_lck.data = prior_lock_state;
1620
1621 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1622 trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
1623
1624 /*
1625 * don't wake up anyone waiting to take the lock exclusively
1626 * since we hold a read count... when the read count drops to 0,
1627 * the writers will be woken.
1628 *
1629 * wake up any waiting readers if we don't have any writers waiting,
1630 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1631 */
1632 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting)
1633 thread_wakeup(LCK_RW_READER_EVENT(lck));
1634
1635 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1636 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
1637
1638 #if CONFIG_DTRACE
1639 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1640 #endif
1641 }
1642
1643
1644 /*
1645 * Routine: lck_rw_try_lock
1646 */
1647 boolean_t
1648 lck_rw_try_lock(
1649 lck_rw_t *lck,
1650 lck_rw_type_t lck_rw_type)
1651 {
1652 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1653 return lck_rw_try_lock_shared(lck);
1654 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1655 return lck_rw_try_lock_exclusive(lck);
1656 else
1657 panic("lck_rw_try_lock(): Invalid rw lock type: %x", lck_rw_type);
1658 return FALSE;
1659 }
1660
1661 /*
1662 * Routine: lck_rw_try_lock_shared
1663 */
1664
1665 boolean_t lck_rw_try_lock_shared(lck_rw_t *lock)
1666 {
1667 uint32_t data, prev;
1668
1669 for ( ; ; ) {
1670 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1671 if (data & LCK_RW_INTERLOCK) {
1672 #if __SMP__
1673 atomic_exchange_abort();
1674 lck_rw_interlock_spin(lock);
1675 continue;
1676 #else
1677 panic("lck_rw_try_lock_shared(): Interlock locked (%p): %x", lock, data);
1678 #endif
1679 }
1680 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1681 atomic_exchange_abort();
1682 return FALSE; /* lock is busy */
1683 }
1684 data += LCK_RW_SHARED_READER; /* Increment reader refcount */
1685 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1686 break;
1687 cpu_pause();
1688 }
1689 #if MACH_ASSERT
1690 thread_t owner = ordered_load_rw_owner(lock);
1691 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1692 #endif
1693 current_thread()->rwlock_count++;
1694 #if CONFIG_DTRACE
1695 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1696 #endif /* CONFIG_DTRACE */
1697 return TRUE;
1698 }
1699
1700
1701 /*
1702 * Routine: lck_rw_try_lock_exclusive
1703 */
1704
1705 boolean_t lck_rw_try_lock_exclusive(lck_rw_t *lock)
1706 {
1707 uint32_t data, prev;
1708 thread_t thread;
1709
1710 for ( ; ; ) {
1711 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1712 if (data & LCK_RW_INTERLOCK) {
1713 #if __SMP__
1714 atomic_exchange_abort();
1715 lck_rw_interlock_spin(lock);
1716 continue;
1717 #else
1718 panic("lck_rw_try_lock_exclusive(): Interlock locked (%p): %x", lock, data);
1719 #endif
1720 }
1721 if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1722 atomic_exchange_abort();
1723 return FALSE;
1724 }
1725 data |= LCK_RW_WANT_EXCL;
1726 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1727 break;
1728 cpu_pause();
1729 }
1730 thread = current_thread();
1731 thread->rwlock_count++;
1732 #if MACH_ASSERT
1733 thread_t owner = ordered_load_rw_owner(lock);
1734 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1735 #endif
1736 ordered_store_rw_owner(lock, thread);
1737 #if CONFIG_DTRACE
1738 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1739 #endif /* CONFIG_DTRACE */
1740 return TRUE;
1741 }
1742
1743
1744 /*
1745 * Routine: lck_rw_unlock
1746 */
1747 void
1748 lck_rw_unlock(
1749 lck_rw_t *lck,
1750 lck_rw_type_t lck_rw_type)
1751 {
1752 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1753 lck_rw_unlock_shared(lck);
1754 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1755 lck_rw_unlock_exclusive(lck);
1756 else
1757 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
1758 }
1759
1760
1761 /*
1762 * Routine: lck_rw_unlock_shared
1763 */
1764 void
1765 lck_rw_unlock_shared(
1766 lck_rw_t *lck)
1767 {
1768 lck_rw_type_t ret;
1769
1770 assertf(lck->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
1771 assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
1772 ret = lck_rw_done(lck);
1773
1774 if (ret != LCK_RW_TYPE_SHARED)
1775 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
1776 }
1777
1778
1779 /*
1780 * Routine: lck_rw_unlock_exclusive
1781 */
1782 void
1783 lck_rw_unlock_exclusive(
1784 lck_rw_t *lck)
1785 {
1786 lck_rw_type_t ret;
1787
1788 assertf(lck->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
1789 ret = lck_rw_done(lck);
1790
1791 if (ret != LCK_RW_TYPE_EXCLUSIVE)
1792 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
1793 }
1794
1795
1796 /*
1797 * Routine: lck_rw_lock_exclusive_gen
1798 */
1799 static void
1800 lck_rw_lock_exclusive_gen(
1801 lck_rw_t *lock)
1802 {
1803 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1804 lck_rw_word_t word;
1805 int slept = 0;
1806 boolean_t gotlock = 0;
1807 boolean_t not_shared_or_upgrade = 0;
1808 wait_result_t res = 0;
1809 boolean_t istate;
1810
1811 #if CONFIG_DTRACE
1812 boolean_t dtrace_ls_initialized = FALSE;
1813 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE;
1814 uint64_t wait_interval = 0;
1815 int readers_at_sleep = 0;
1816 #endif
1817
1818 /*
1819 * Try to acquire the lck_rw_want_excl bit.
1820 */
1821 while (!lck_rw_grab(lock, LCK_RW_GRAB_WANT, FALSE)) {
1822
1823 #if CONFIG_DTRACE
1824 if (dtrace_ls_initialized == FALSE) {
1825 dtrace_ls_initialized = TRUE;
1826 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1827 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1828 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1829 if (dtrace_ls_enabled) {
1830 /*
1831 * Either sleeping or spinning is happening,
1832 * start a timing of our delay interval now.
1833 */
1834 readers_at_sleep = lock->lck_rw_shared_count;
1835 wait_interval = mach_absolute_time();
1836 }
1837 }
1838 #endif
1839
1840 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1841
1842 gotlock = lck_rw_grab(lock, LCK_RW_GRAB_WANT, TRUE);
1843
1844 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
1845
1846 if (gotlock)
1847 break;
1848 /*
1849 * if we get here, the deadline has expired w/o us
1850 * being able to grab the lock exclusively
1851 * check to see if we're allowed to do a thread_block
1852 */
1853 word.data = ordered_load_rw(lock);
1854 if (word.can_sleep) {
1855
1856 istate = lck_interlock_lock(lock);
1857 word.data = ordered_load_rw(lock);
1858
1859 if (word.want_excl) {
1860
1861 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1862
1863 word.w_waiting = 1;
1864 ordered_store_rw(lock, word.data);
1865
1866 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
1867 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1868 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1869 lck_interlock_unlock(lock, istate);
1870
1871 if (res == THREAD_WAITING) {
1872 res = thread_block(THREAD_CONTINUE_NULL);
1873 slept++;
1874 }
1875 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1876 } else {
1877 word.want_excl = 1;
1878 ordered_store_rw(lock, word.data);
1879 lck_interlock_unlock(lock, istate);
1880 break;
1881 }
1882 }
1883 }
1884 /*
1885 * Wait for readers (and upgrades) to finish...
1886 */
1887 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE)) {
1888
1889 #if CONFIG_DTRACE
1890 /*
1891 * Either sleeping or spinning is happening, start
1892 * a timing of our delay interval now. If we set it
1893 * to -1 we don't have accurate data so we cannot later
1894 * decide to record a dtrace spin or sleep event.
1895 */
1896 if (dtrace_ls_initialized == FALSE) {
1897 dtrace_ls_initialized = TRUE;
1898 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1899 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1900 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1901 if (dtrace_ls_enabled) {
1902 /*
1903 * Either sleeping or spinning is happening,
1904 * start a timing of our delay interval now.
1905 */
1906 readers_at_sleep = lock->lck_rw_shared_count;
1907 wait_interval = mach_absolute_time();
1908 }
1909 }
1910 #endif
1911
1912 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1913
1914 not_shared_or_upgrade = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, TRUE);
1915
1916 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, not_shared_or_upgrade, 0);
1917
1918 if (not_shared_or_upgrade)
1919 break;
1920 /*
1921 * if we get here, the deadline has expired w/o us
1922 * being able to grab the lock exclusively
1923 * check to see if we're allowed to do a thread_block
1924 */
1925 word.data = ordered_load_rw(lock);
1926 if (word.can_sleep) {
1927
1928 istate = lck_interlock_lock(lock);
1929 word.data = ordered_load_rw(lock);
1930
1931 if (word.shared_count != 0 || word.want_upgrade) {
1932 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1933
1934 word.w_waiting = 1;
1935 ordered_store_rw(lock, word.data);
1936
1937 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
1938 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1939 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1940 lck_interlock_unlock(lock, istate);
1941
1942 if (res == THREAD_WAITING) {
1943 res = thread_block(THREAD_CONTINUE_NULL);
1944 slept++;
1945 }
1946 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1947 } else {
1948 lck_interlock_unlock(lock, istate);
1949 /*
1950 * must own the lock now, since we checked for
1951 * readers or upgrade owner behind the interlock
1952 * no need for a call to 'lck_rw_drain_status'
1953 */
1954 break;
1955 }
1956 }
1957 }
1958
1959 #if CONFIG_DTRACE
1960 /*
1961 * Decide what latencies we suffered that are Dtrace events.
1962 * If we have set wait_interval, then we either spun or slept.
1963 * At least we get out from under the interlock before we record
1964 * which is the best we can do here to minimize the impact
1965 * of the tracing.
1966 * If we have set wait_interval to -1, then dtrace was not enabled when we
1967 * started sleeping/spinning so we don't record this event.
1968 */
1969 if (dtrace_ls_enabled == TRUE) {
1970 if (slept == 0) {
1971 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
1972 mach_absolute_time() - wait_interval, 1);
1973 } else {
1974 /*
1975 * For the blocking case, we also record if when we blocked
1976 * it was held for read or write, and how many readers.
1977 * Notice that above we recorded this before we dropped
1978 * the interlock so the count is accurate.
1979 */
1980 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
1981 mach_absolute_time() - wait_interval, 1,
1982 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1983 }
1984 }
1985 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
1986 #endif /* CONFIG_DTRACE */
1987 }
1988
1989 /*
1990 * Routine: lck_rw_done
1991 */
1992
1993 lck_rw_type_t lck_rw_done(lck_rw_t *lock)
1994 {
1995 uint32_t data, prev;
1996 boolean_t once = FALSE;
1997
1998 for ( ; ; ) {
1999 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
2000 if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
2001 #if __SMP__
2002 atomic_exchange_abort();
2003 lck_rw_interlock_spin(lock);
2004 continue;
2005 #else
2006 panic("lck_rw_done(): Interlock locked (%p): %x", lock, data);
2007 #endif // __SMP__
2008 }
2009 if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
2010 assertf(lock->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
2011 data -= LCK_RW_SHARED_READER;
2012 if ((data & LCK_RW_SHARED_MASK) == 0) /* if reader count has now gone to 0, check for waiters */
2013 goto check_waiters;
2014 } else { /* if reader count == 0, must be exclusive lock */
2015 if (data & LCK_RW_WANT_UPGRADE) {
2016 data &= ~(LCK_RW_WANT_UPGRADE);
2017 } else {
2018 if (data & LCK_RW_WANT_EXCL)
2019 data &= ~(LCK_RW_WANT_EXCL);
2020 else /* lock is not 'owned', panic */
2021 panic("Releasing non-exclusive RW lock without a reader refcount!");
2022 }
2023 if (!once) {
2024 // Only check for holder and clear it once
2025 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
2026 ordered_store_rw_owner(lock, THREAD_NULL);
2027 once = TRUE;
2028 }
2029 check_waiters:
2030 /*
2031 * test the original values to match what
2032 * lck_rw_done_gen is going to do to determine
2033 * which wakeups need to happen...
2034 *
2035 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
2036 */
2037 if (prev & LCK_RW_W_WAITING) {
2038 data &= ~(LCK_RW_W_WAITING);
2039 if ((prev & LCK_RW_PRIV_EXCL) == 0)
2040 data &= ~(LCK_RW_R_WAITING);
2041 } else
2042 data &= ~(LCK_RW_R_WAITING);
2043 }
2044 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp))
2045 break;
2046 cpu_pause();
2047 }
2048 return lck_rw_done_gen(lock, prev);
2049 }
2050
2051 /*
2052 * Routine: lck_rw_done_gen
2053 *
2054 * called from the assembly language wrapper...
2055 * prior_lock_state is the value in the 1st
2056 * word of the lock at the time of a successful
2057 * atomic compare and exchange with the new value...
2058 * it represents the state of the lock before we
2059 * decremented the rw_shared_count or cleared either
2060 * rw_want_upgrade or rw_want_write and
2061 * the lck_x_waiting bits... since the wrapper
2062 * routine has already changed the state atomically,
2063 * we just need to decide if we should
2064 * wake up anyone and what value to return... we do
2065 * this by examining the state of the lock before
2066 * we changed it
2067 */
2068 static lck_rw_type_t
2069 lck_rw_done_gen(
2070 lck_rw_t *lck,
2071 uint32_t prior_lock_state)
2072 {
2073 lck_rw_word_t fake_lck;
2074 lck_rw_type_t lock_type;
2075 thread_t thread;
2076 uint32_t rwlock_count;
2077
2078 /*
2079 * prior_lock state is a snapshot of the 1st word of the
2080 * lock in question... we'll fake up a pointer to it
2081 * and carefully not access anything beyond whats defined
2082 * in the first word of a lck_rw_t
2083 */
2084 fake_lck.data = prior_lock_state;
2085
2086 if (fake_lck.shared_count <= 1) {
2087 if (fake_lck.w_waiting)
2088 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
2089
2090 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting)
2091 thread_wakeup(LCK_RW_READER_EVENT(lck));
2092 }
2093 if (fake_lck.shared_count)
2094 lock_type = LCK_RW_TYPE_SHARED;
2095 else
2096 lock_type = LCK_RW_TYPE_EXCLUSIVE;
2097
2098 /* Check if dropping the lock means that we need to unpromote */
2099 thread = current_thread();
2100 rwlock_count = thread->rwlock_count--;
2101 #if MACH_LDEBUG
2102 if (rwlock_count == 0)
2103 panic("rw lock count underflow for thread %p", thread);
2104 #endif
2105 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2106 /* sched_flags checked without lock, but will be rechecked while clearing */
2107 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
2108 }
2109 #if CONFIG_DTRACE
2110 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
2111 #endif
2112 return lock_type;
2113 }
2114
2115 /*
2116 * Routine: lck_rw_lock_shared_gen
2117 * Function:
2118 * Fast path code has determined that this lock
2119 * is held exclusively... this is where we spin/block
2120 * until we can acquire the lock in the shared mode
2121 */
2122 static void
2123 lck_rw_lock_shared_gen(
2124 lck_rw_t *lck)
2125 {
2126 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
2127 lck_rw_word_t word;
2128 boolean_t gotlock = 0;
2129 int slept = 0;
2130 wait_result_t res = 0;
2131 boolean_t istate;
2132
2133 #if CONFIG_DTRACE
2134 uint64_t wait_interval = 0;
2135 int readers_at_sleep = 0;
2136 boolean_t dtrace_ls_initialized = FALSE;
2137 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
2138 #endif /* CONFIG_DTRACE */
2139
2140 while ( !lck_rw_grab(lck, LCK_RW_GRAB_SHARED, FALSE)) {
2141
2142 #if CONFIG_DTRACE
2143 if (dtrace_ls_initialized == FALSE) {
2144 dtrace_ls_initialized = TRUE;
2145 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
2146 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
2147 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
2148 if (dtrace_ls_enabled) {
2149 /*
2150 * Either sleeping or spinning is happening,
2151 * start a timing of our delay interval now.
2152 */
2153 readers_at_sleep = lck->lck_rw_shared_count;
2154 wait_interval = mach_absolute_time();
2155 }
2156 }
2157 #endif
2158
2159 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
2160 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
2161
2162 gotlock = lck_rw_grab(lck, LCK_RW_GRAB_SHARED, TRUE);
2163
2164 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
2165 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, gotlock, 0);
2166
2167 if (gotlock)
2168 break;
2169 /*
2170 * if we get here, the deadline has expired w/o us
2171 * being able to grab the lock for read
2172 * check to see if we're allowed to do a thread_block
2173 */
2174 if (lck->lck_rw_can_sleep) {
2175
2176 istate = lck_interlock_lock(lck);
2177
2178 word.data = ordered_load_rw(lck);
2179 if ((word.want_excl || word.want_upgrade) &&
2180 ((word.shared_count == 0) || word.priv_excl)) {
2181
2182 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
2183 trace_lck, word.want_excl, word.want_upgrade, 0, 0);
2184
2185 word.r_waiting = 1;
2186 ordered_store_rw(lck, word.data);
2187
2188 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
2189 res = assert_wait(LCK_RW_READER_EVENT(lck),
2190 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
2191 lck_interlock_unlock(lck, istate);
2192
2193 if (res == THREAD_WAITING) {
2194 res = thread_block(THREAD_CONTINUE_NULL);
2195 slept++;
2196 }
2197 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
2198 trace_lck, res, slept, 0, 0);
2199 } else {
2200 word.shared_count++;
2201 ordered_store_rw(lck, word.data);
2202 lck_interlock_unlock(lck, istate);
2203 break;
2204 }
2205 }
2206 }
2207
2208 #if CONFIG_DTRACE
2209 if (dtrace_ls_enabled == TRUE) {
2210 if (slept == 0) {
2211 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
2212 } else {
2213 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
2214 mach_absolute_time() - wait_interval, 0,
2215 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
2216 }
2217 }
2218 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
2219 #endif /* CONFIG_DTRACE */
2220 }
2221
2222
2223 void
2224 lck_rw_assert(
2225 lck_rw_t *lck,
2226 unsigned int type)
2227 {
2228 switch (type) {
2229 case LCK_RW_ASSERT_SHARED:
2230 if ((lck->lck_rw_shared_count != 0) &&
2231 (lck->lck_rw_owner == THREAD_NULL)) {
2232 return;
2233 }
2234 break;
2235 case LCK_RW_ASSERT_EXCLUSIVE:
2236 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2237 (lck->lck_rw_shared_count == 0) &&
2238 (lck->lck_rw_owner == current_thread())) {
2239 return;
2240 }
2241 break;
2242 case LCK_RW_ASSERT_HELD:
2243 if (lck->lck_rw_shared_count != 0)
2244 return; // Held shared
2245 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2246 (lck->lck_rw_owner == current_thread())) {
2247 return; // Held exclusive
2248 }
2249 break;
2250 case LCK_RW_ASSERT_NOTHELD:
2251 if ((lck->lck_rw_shared_count == 0) &&
2252 !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2253 (lck->lck_rw_owner == THREAD_NULL)) {
2254 return;
2255 }
2256 break;
2257 default:
2258 break;
2259 }
2260 panic("rw lock (%p)%s held (mode=%u)", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type);
2261 }
2262
2263
2264 /*
2265 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
2266 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2267 */
2268 boolean_t
2269 kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck) {
2270 if (not_in_kdp) {
2271 panic("panic: rw lock exclusive check done outside of kernel debugger");
2272 }
2273 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
2274 }
2275
2276 /*
2277 * The C portion of the mutex package. These routines are only invoked
2278 * if the optimized assembler routines can't do the work.
2279 */
2280
2281 /*
2282 * Forward declaration
2283 */
2284
2285 void
2286 lck_mtx_ext_init(
2287 lck_mtx_ext_t * lck,
2288 lck_grp_t * grp,
2289 lck_attr_t * attr);
2290
2291 /*
2292 * Routine: lck_mtx_alloc_init
2293 */
2294 lck_mtx_t *
2295 lck_mtx_alloc_init(
2296 lck_grp_t * grp,
2297 lck_attr_t * attr)
2298 {
2299 lck_mtx_t *lck;
2300
2301 if ((lck = (lck_mtx_t *) kalloc(sizeof(lck_mtx_t))) != 0)
2302 lck_mtx_init(lck, grp, attr);
2303
2304 return (lck);
2305 }
2306
2307 /*
2308 * Routine: lck_mtx_free
2309 */
2310 void
2311 lck_mtx_free(
2312 lck_mtx_t * lck,
2313 lck_grp_t * grp)
2314 {
2315 lck_mtx_destroy(lck, grp);
2316 kfree((void *) lck, sizeof(lck_mtx_t));
2317 }
2318
2319 /*
2320 * Routine: lck_mtx_init
2321 */
2322 void
2323 lck_mtx_init(
2324 lck_mtx_t * lck,
2325 lck_grp_t * grp,
2326 lck_attr_t * attr)
2327 {
2328 #ifdef BER_XXX
2329 lck_mtx_ext_t *lck_ext;
2330 #endif
2331 lck_attr_t *lck_attr;
2332
2333 if (attr != LCK_ATTR_NULL)
2334 lck_attr = attr;
2335 else
2336 lck_attr = &LockDefaultLckAttr;
2337
2338 #ifdef BER_XXX
2339 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2340 if ((lck_ext = (lck_mtx_ext_t *) kalloc(sizeof(lck_mtx_ext_t))) != 0) {
2341 lck_mtx_ext_init(lck_ext, grp, lck_attr);
2342 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2343 lck->lck_mtx_ptr = lck_ext;
2344 lck->lck_mtx_type = LCK_MTX_TYPE;
2345 }
2346 } else
2347 #endif
2348 {
2349 lck->lck_mtx_ptr = NULL; // Clear any padding in the union fields below
2350 lck->lck_mtx_waiters = 0;
2351 lck->lck_mtx_pri = 0;
2352 lck->lck_mtx_type = LCK_MTX_TYPE;
2353 ordered_store_mtx(lck, 0);
2354 }
2355 lck_grp_reference(grp);
2356 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2357 }
2358
2359 /*
2360 * Routine: lck_mtx_init_ext
2361 */
2362 void
2363 lck_mtx_init_ext(
2364 lck_mtx_t * lck,
2365 lck_mtx_ext_t * lck_ext,
2366 lck_grp_t * grp,
2367 lck_attr_t * attr)
2368 {
2369 lck_attr_t *lck_attr;
2370
2371 if (attr != LCK_ATTR_NULL)
2372 lck_attr = attr;
2373 else
2374 lck_attr = &LockDefaultLckAttr;
2375
2376 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2377 lck_mtx_ext_init(lck_ext, grp, lck_attr);
2378 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2379 lck->lck_mtx_ptr = lck_ext;
2380 lck->lck_mtx_type = LCK_MTX_TYPE;
2381 } else {
2382 lck->lck_mtx_waiters = 0;
2383 lck->lck_mtx_pri = 0;
2384 lck->lck_mtx_type = LCK_MTX_TYPE;
2385 ordered_store_mtx(lck, 0);
2386 }
2387 lck_grp_reference(grp);
2388 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2389 }
2390
2391 /*
2392 * Routine: lck_mtx_ext_init
2393 */
2394 void
2395 lck_mtx_ext_init(
2396 lck_mtx_ext_t * lck,
2397 lck_grp_t * grp,
2398 lck_attr_t * attr)
2399 {
2400 bzero((void *) lck, sizeof(lck_mtx_ext_t));
2401
2402 lck->lck_mtx.lck_mtx_type = LCK_MTX_TYPE;
2403
2404 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2405 lck->lck_mtx_deb.type = MUTEX_TAG;
2406 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
2407 }
2408 lck->lck_mtx_grp = grp;
2409
2410 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
2411 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
2412 }
2413
2414 /* The slow versions */
2415 static void lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2416 static boolean_t lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread);
2417 static void lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2418
2419 /*
2420 * Routine: lck_mtx_verify
2421 *
2422 * Verify if a mutex is valid
2423 */
2424 static inline void
2425 lck_mtx_verify(lck_mtx_t *lock)
2426 {
2427 if (lock->lck_mtx_type != LCK_MTX_TYPE)
2428 panic("Invalid mutex %p", lock);
2429 #if DEVELOPMENT || DEBUG
2430 if (lock->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
2431 panic("Mutex destroyed %p", lock);
2432 #endif /* DEVELOPMENT || DEBUG */
2433 }
2434
2435 /*
2436 * Routine: lck_mtx_check_preemption
2437 *
2438 * Verify preemption is enabled when attempting to acquire a mutex.
2439 */
2440
2441 static inline void
2442 lck_mtx_check_preemption(lck_mtx_t *lock)
2443 {
2444 #if DEVELOPMENT || DEBUG
2445 int pl = get_preemption_level();
2446
2447 if (pl != 0)
2448 panic("Attempt to take mutex with preemption disabled. Lock=%p, level=%d", lock, pl);
2449 #else
2450 (void)lock;
2451 #endif
2452 }
2453
2454 /*
2455 * Routine: lck_mtx_lock
2456 */
2457 void
2458 lck_mtx_lock(lck_mtx_t *lock)
2459 {
2460 thread_t thread;
2461
2462 lck_mtx_verify(lock);
2463 lck_mtx_check_preemption(lock);
2464 thread = current_thread();
2465 if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread),
2466 memory_order_acquire_smp, FALSE)) {
2467 #if CONFIG_DTRACE
2468 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
2469 #endif /* CONFIG_DTRACE */
2470 return;
2471 }
2472 lck_mtx_lock_contended(lock, thread, FALSE);
2473 }
2474
2475 /*
2476 This is the slow version of mutex locking.
2477 */
2478 static void NOINLINE
2479 lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
2480 {
2481 thread_t holding_thread;
2482 uintptr_t state;
2483 int waiters;
2484
2485 if (interlocked)
2486 goto interlock_held;
2487
2488 /* TODO: short-duration spin for on-core contention <rdar://problem/10234625> */
2489
2490 /* Loop waiting until I see that the mutex is unowned */
2491 for ( ; ; ) {
2492 interlock_lock(lock);
2493 interlock_held:
2494 state = ordered_load_mtx(lock);
2495 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2496 if (holding_thread == NULL)
2497 break;
2498 ordered_store_mtx(lock, (state | LCK_ILOCK | ARM_LCK_WAITERS)); // Set waiters bit and wait
2499 lck_mtx_lock_wait(lock, holding_thread);
2500 /* returns interlock unlocked */
2501 }
2502
2503 /* Hooray, I'm the new owner! */
2504 waiters = lck_mtx_lock_acquire(lock);
2505 state = LCK_MTX_THREAD_TO_STATE(thread);
2506 if (waiters != 0)
2507 state |= ARM_LCK_WAITERS;
2508 #if __SMP__
2509 state |= LCK_ILOCK; // Preserve interlock
2510 ordered_store_mtx(lock, state); // Set ownership
2511 interlock_unlock(lock); // Release interlock, enable preemption
2512 #else
2513 ordered_store_mtx(lock, state); // Set ownership
2514 enable_preemption();
2515 #endif
2516 load_memory_barrier();
2517
2518 #if CONFIG_DTRACE
2519 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
2520 #endif /* CONFIG_DTRACE */
2521 }
2522
2523 /*
2524 * Common code for mutex locking as spinlock
2525 */
2526 static inline void
2527 lck_mtx_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
2528 {
2529 uintptr_t state;
2530
2531 interlock_lock(lock);
2532 state = ordered_load_mtx(lock);
2533 if (LCK_MTX_STATE_TO_THREAD(state)) {
2534 if (allow_held_as_mutex)
2535 lck_mtx_lock_contended(lock, current_thread(), TRUE);
2536 else
2537 // "Always" variants can never block. If the lock is held and blocking is not allowed
2538 // then someone is mixing always and non-always calls on the same lock, which is
2539 // forbidden.
2540 panic("Attempting to block on a lock taken as spin-always %p", lock);
2541 return;
2542 }
2543 state &= ARM_LCK_WAITERS; // Preserve waiters bit
2544 state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK); // Add spin tag and maintain interlock
2545 ordered_store_mtx(lock, state);
2546 load_memory_barrier();
2547
2548 #if CONFIG_DTRACE
2549 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
2550 #endif /* CONFIG_DTRACE */
2551 }
2552
2553 /*
2554 * Routine: lck_mtx_lock_spin
2555 */
2556 void
2557 lck_mtx_lock_spin(lck_mtx_t *lock)
2558 {
2559 lck_mtx_check_preemption(lock);
2560 lck_mtx_lock_spin_internal(lock, TRUE);
2561 }
2562
2563 /*
2564 * Routine: lck_mtx_lock_spin_always
2565 */
2566 void
2567 lck_mtx_lock_spin_always(lck_mtx_t *lock)
2568 {
2569 lck_mtx_lock_spin_internal(lock, FALSE);
2570 }
2571
2572 /*
2573 * Routine: lck_mtx_try_lock
2574 */
2575 boolean_t
2576 lck_mtx_try_lock(lck_mtx_t *lock)
2577 {
2578 thread_t thread = current_thread();
2579
2580 lck_mtx_verify(lock);
2581 if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread),
2582 memory_order_acquire_smp, FALSE)) {
2583 #if CONFIG_DTRACE
2584 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, lock, 0);
2585 #endif /* CONFIG_DTRACE */
2586 return TRUE;
2587 }
2588 return lck_mtx_try_lock_contended(lock, thread);
2589 }
2590
2591 static boolean_t NOINLINE
2592 lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread)
2593 {
2594 thread_t holding_thread;
2595 uintptr_t state;
2596 int waiters;
2597
2598 #if __SMP__
2599 interlock_lock(lock);
2600 state = ordered_load_mtx(lock);
2601 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2602 if (holding_thread) {
2603 interlock_unlock(lock);
2604 return FALSE;
2605 }
2606 #else
2607 disable_preemption_for_thread(thread);
2608 state = ordered_load_mtx(lock);
2609 if (state & LCK_ILOCK)
2610 panic("Unexpected interlock set (%p)", lock);
2611 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2612 if (holding_thread) {
2613 enable_preemption();
2614 return FALSE;
2615 }
2616 state |= LCK_ILOCK;
2617 ordered_store_mtx(lock, state);
2618 #endif // __SMP__
2619 waiters = lck_mtx_lock_acquire(lock);
2620 state = LCK_MTX_THREAD_TO_STATE(thread);
2621 if (waiters != 0)
2622 state |= ARM_LCK_WAITERS;
2623 #if __SMP__
2624 state |= LCK_ILOCK; // Preserve interlock
2625 ordered_store_mtx(lock, state); // Set ownership
2626 interlock_unlock(lock); // Release interlock, enable preemption
2627 #else
2628 ordered_store_mtx(lock, state); // Set ownership
2629 enable_preemption();
2630 #endif
2631 load_memory_barrier();
2632 return TRUE;
2633 }
2634
2635 static inline boolean_t
2636 lck_mtx_try_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
2637 {
2638 uintptr_t state;
2639
2640 if (!interlock_try(lock))
2641 return FALSE;
2642 state = ordered_load_mtx(lock);
2643 if(LCK_MTX_STATE_TO_THREAD(state)) {
2644 // Lock is held as mutex
2645 if (allow_held_as_mutex)
2646 interlock_unlock(lock);
2647 else
2648 // "Always" variants can never block. If the lock is held as a normal mutex
2649 // then someone is mixing always and non-always calls on the same lock, which is
2650 // forbidden.
2651 panic("Spin-mutex held as full mutex %p", lock);
2652 return FALSE;
2653 }
2654 state &= ARM_LCK_WAITERS; // Preserve waiters bit
2655 state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK); // Add spin tag and maintain interlock
2656 ordered_store_mtx(lock, state);
2657 load_memory_barrier();
2658
2659 #if CONFIG_DTRACE
2660 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, lock, 0);
2661 #endif /* CONFIG_DTRACE */
2662 return TRUE;
2663 }
2664
2665 /*
2666 * Routine: lck_mtx_try_lock_spin
2667 */
2668 boolean_t
2669 lck_mtx_try_lock_spin(lck_mtx_t *lock)
2670 {
2671 return lck_mtx_try_lock_spin_internal(lock, TRUE);
2672 }
2673
2674 /*
2675 * Routine: lck_mtx_try_lock_spin_always
2676 */
2677 boolean_t
2678 lck_mtx_try_lock_spin_always(lck_mtx_t *lock)
2679 {
2680 return lck_mtx_try_lock_spin_internal(lock, FALSE);
2681 }
2682
2683
2684
2685 /*
2686 * Routine: lck_mtx_unlock
2687 */
2688 void
2689 lck_mtx_unlock(lck_mtx_t *lock)
2690 {
2691 thread_t thread = current_thread();
2692 uintptr_t state;
2693 boolean_t ilk_held = FALSE;
2694
2695 lck_mtx_verify(lock);
2696
2697 state = ordered_load_mtx(lock);
2698 if (state & LCK_ILOCK) {
2699 if(LCK_MTX_STATE_TO_THREAD(state) == (thread_t)LCK_MTX_SPIN_TAG)
2700 ilk_held = TRUE; // Interlock is held by (presumably) this thread
2701 goto slow_case;
2702 }
2703 // Locked as a mutex
2704 if (atomic_compare_exchange(&lock->lck_mtx_data, LCK_MTX_THREAD_TO_STATE(thread), 0,
2705 memory_order_release_smp, FALSE)) {
2706 #if CONFIG_DTRACE
2707 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
2708 #endif /* CONFIG_DTRACE */
2709 return;
2710 }
2711 slow_case:
2712 lck_mtx_unlock_contended(lock, thread, ilk_held);
2713 }
2714
2715 static void NOINLINE
2716 lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t ilk_held)
2717 {
2718 uintptr_t state;
2719
2720 if (ilk_held) {
2721 state = ordered_load_mtx(lock);
2722 } else {
2723 #if __SMP__
2724 interlock_lock(lock);
2725 state = ordered_load_mtx(lock);
2726 if (thread != LCK_MTX_STATE_TO_THREAD(state))
2727 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
2728 #else
2729 disable_preemption_for_thread(thread);
2730 state = ordered_load_mtx(lock);
2731 if (state & LCK_ILOCK)
2732 panic("lck_mtx_unlock(): Unexpected interlock set (%p)", lock);
2733 if (thread != LCK_MTX_STATE_TO_THREAD(state))
2734 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
2735 state |= LCK_ILOCK;
2736 ordered_store_mtx(lock, state);
2737 #endif
2738 if (state & ARM_LCK_WAITERS) {
2739 lck_mtx_unlock_wakeup(lock, thread);
2740 state = ordered_load_mtx(lock);
2741 } else {
2742 assertf(lock->lck_mtx_pri == 0, "pri=0x%x", lock->lck_mtx_pri);
2743 }
2744 }
2745 state &= ARM_LCK_WAITERS; /* Clear state, retain waiters bit */
2746 #if __SMP__
2747 state |= LCK_ILOCK;
2748 ordered_store_mtx(lock, state);
2749 interlock_unlock(lock);
2750 #else
2751 ordered_store_mtx(lock, state);
2752 enable_preemption();
2753 #endif
2754
2755 #if CONFIG_DTRACE
2756 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
2757 #endif /* CONFIG_DTRACE */
2758 }
2759
2760 /*
2761 * Routine: lck_mtx_assert
2762 */
2763 void
2764 lck_mtx_assert(lck_mtx_t *lock, unsigned int type)
2765 {
2766 thread_t thread, holder;
2767 uintptr_t state;
2768
2769 state = ordered_load_mtx(lock);
2770 holder = LCK_MTX_STATE_TO_THREAD(state);
2771 if (holder == (thread_t)LCK_MTX_SPIN_TAG) {
2772 // Lock is held in spin mode, owner is unknown.
2773 return; // Punt
2774 }
2775 thread = current_thread();
2776 if (type == LCK_MTX_ASSERT_OWNED) {
2777 if (thread != holder)
2778 panic("lck_mtx_assert(): mutex (%p) owned", lock);
2779 } else if (type == LCK_MTX_ASSERT_NOTOWNED) {
2780 if (thread == holder)
2781 panic("lck_mtx_assert(): mutex (%p) not owned", lock);
2782 } else
2783 panic("lck_mtx_assert(): invalid arg (%u)", type);
2784 }
2785
2786 /*
2787 * Routine: lck_mtx_ilk_unlock
2788 */
2789 boolean_t
2790 lck_mtx_ilk_unlock(lck_mtx_t *lock)
2791 {
2792 interlock_unlock(lock);
2793 return TRUE;
2794 }
2795
2796 /*
2797 * Routine: lck_mtx_convert_spin
2798 *
2799 * Convert a mutex held for spin into a held full mutex
2800 */
2801 void
2802 lck_mtx_convert_spin(lck_mtx_t *lock)
2803 {
2804 thread_t thread = current_thread();
2805 uintptr_t state;
2806 int waiters;
2807
2808 state = ordered_load_mtx(lock);
2809 if (LCK_MTX_STATE_TO_THREAD(state) == thread)
2810 return; // Already owned as mutex, return
2811 if ((state & LCK_ILOCK) == 0 || (LCK_MTX_STATE_TO_THREAD(state) != (thread_t)LCK_MTX_SPIN_TAG))
2812 panic("lck_mtx_convert_spin: Not held as spinlock (%p)", lock);
2813 state &= ~(LCK_MTX_THREAD_MASK); // Clear the spin tag
2814 ordered_store_mtx(lock, state);
2815 waiters = lck_mtx_lock_acquire(lock); // Acquire to manage priority boosts
2816 state = LCK_MTX_THREAD_TO_STATE(thread);
2817 if (waiters != 0)
2818 state |= ARM_LCK_WAITERS;
2819 #if __SMP__
2820 state |= LCK_ILOCK;
2821 ordered_store_mtx(lock, state); // Set ownership
2822 interlock_unlock(lock); // Release interlock, enable preemption
2823 #else
2824 ordered_store_mtx(lock, state); // Set ownership
2825 enable_preemption();
2826 #endif
2827 }
2828
2829
2830 /*
2831 * Routine: lck_mtx_destroy
2832 */
2833 void
2834 lck_mtx_destroy(
2835 lck_mtx_t * lck,
2836 lck_grp_t * grp)
2837 {
2838 if (lck->lck_mtx_type != LCK_MTX_TYPE)
2839 panic("Destroying invalid mutex %p", lck);
2840 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
2841 panic("Destroying previously destroyed lock %p", lck);
2842 lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
2843 lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED;
2844 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
2845 lck_grp_deallocate(grp);
2846 return;
2847 }
2848
2849 /*
2850 * Routine: lck_spin_assert
2851 */
2852 void
2853 lck_spin_assert(lck_spin_t *lock, unsigned int type)
2854 {
2855 thread_t thread, holder;
2856 uintptr_t state;
2857
2858 if (lock->type != LCK_SPIN_TYPE)
2859 panic("Invalid spinlock %p", lock);
2860
2861 state = lock->lck_spin_data;
2862 holder = (thread_t)(state & ~LCK_ILOCK);
2863 thread = current_thread();
2864 if (type == LCK_ASSERT_OWNED) {
2865 if (holder == 0)
2866 panic("Lock not owned %p = %lx", lock, state);
2867 if (holder != thread)
2868 panic("Lock not owned by current thread %p = %lx", lock, state);
2869 if ((state & LCK_ILOCK) == 0)
2870 panic("Lock bit not set %p = %lx", lock, state);
2871 } else if (type == LCK_ASSERT_NOTOWNED) {
2872 if (holder != 0) {
2873 if (holder == thread)
2874 panic("Lock owned by current thread %p = %lx", lock, state);
2875 else
2876 panic("Lock %p owned by thread %p", lock, holder);
2877 }
2878 if (state & LCK_ILOCK)
2879 panic("Lock bit set %p = %lx", lock, state);
2880 } else
2881 panic("lck_spin_assert(): invalid arg (%u)", type);
2882 }
2883
2884 boolean_t
2885 lck_rw_lock_yield_shared(lck_rw_t *lck, boolean_t force_yield)
2886 {
2887 lck_rw_word_t word;
2888
2889 lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
2890
2891 word.data = ordered_load_rw(lck);
2892 if (word.want_excl || word.want_upgrade || force_yield) {
2893 lck_rw_unlock_shared(lck);
2894 mutex_pause(2);
2895 lck_rw_lock_shared(lck);
2896 return TRUE;
2897 }
2898
2899 return FALSE;
2900 }
2901
2902 /*
2903 * Routine: kdp_lck_mtx_lock_spin_is_acquired
2904 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2905 */
2906 boolean_t
2907 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
2908 {
2909 uintptr_t state;
2910
2911 if (not_in_kdp) {
2912 panic("panic: spinlock acquired check done outside of kernel debugger");
2913 }
2914 state = ordered_load_mtx(lck);
2915 if (state == LCK_MTX_TAG_DESTROYED)
2916 return FALSE;
2917 if (LCK_MTX_STATE_TO_THREAD(state) || (state & LCK_ILOCK))
2918 return TRUE;
2919 return FALSE;
2920 }
2921
2922 void
2923 kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2924 {
2925 lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
2926 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
2927 uintptr_t state = ordered_load_mtx(mutex);
2928 thread_t holder = LCK_MTX_STATE_TO_THREAD(state);
2929 if ((uintptr_t)holder == (uintptr_t)LCK_MTX_SPIN_TAG) {
2930 waitinfo->owner = STACKSHOT_WAITOWNER_MTXSPIN;
2931 } else {
2932 assertf(state != (uintptr_t)LCK_MTX_TAG_DESTROYED, "state=0x%llx", (uint64_t)state);
2933 assertf(state != (uintptr_t)LCK_MTX_TAG_INDIRECT, "state=0x%llx", (uint64_t)state);
2934 waitinfo->owner = thread_tid(holder);
2935 }
2936 }
2937
2938 void
2939 kdp_rwlck_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2940 {
2941 lck_rw_t *rwlck = NULL;
2942 switch(waitinfo->wait_type) {
2943 case kThreadWaitKernelRWLockRead:
2944 rwlck = READ_EVENT_TO_RWLOCK(event);
2945 break;
2946 case kThreadWaitKernelRWLockWrite:
2947 case kThreadWaitKernelRWLockUpgrade:
2948 rwlck = WRITE_EVENT_TO_RWLOCK(event);
2949 break;
2950 default:
2951 panic("%s was called with an invalid blocking type", __FUNCTION__);
2952 break;
2953 }
2954 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
2955 waitinfo->owner = thread_tid(rwlck->lck_rw_owner);
2956 }