]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/locks_i386.c
xnu-4570.61.1.tar.gz
[apple/xnu.git] / osfmk / i386 / locks_i386.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/lock.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * Date: 1985
60 *
61 * Locking primitives implementation
62 */
63
64 #include <mach_ldebug.h>
65
66 #include <kern/locks.h>
67 #include <kern/kalloc.h>
68 #include <kern/misc_protos.h>
69 #include <kern/thread.h>
70 #include <kern/processor.h>
71 #include <kern/cpu_data.h>
72 #include <kern/cpu_number.h>
73 #include <kern/sched_prim.h>
74 #include <kern/xpr.h>
75 #include <kern/debug.h>
76 #include <string.h>
77
78 #include <i386/machine_routines.h> /* machine_timeout_suspended() */
79 #include <machine/atomic.h>
80 #include <machine/machine_cpu.h>
81 #include <i386/mp.h>
82
83 #include <sys/kdebug.h>
84 #include <mach/branch_predicates.h>
85
86 /*
87 * We need only enough declarations from the BSD-side to be able to
88 * test if our probe is active, and to call __dtrace_probe(). Setting
89 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
90 */
91 #if CONFIG_DTRACE
92 #define NEED_DTRACE_DEFS
93 #include <../bsd/sys/lockstat.h>
94
95 #define DTRACE_RW_SHARED 0x0 //reader
96 #define DTRACE_RW_EXCL 0x1 //writer
97 #define DTRACE_NO_FLAG 0x0 //not applicable
98
99 #endif
100
101 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
102 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
103 #define LCK_RW_LCK_SHARED_CODE 0x102
104 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
105 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
106 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
107
108 #define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
109 #define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
110 #define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
111 #define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
112 #define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
113 #define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
114 #define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
115 #define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
116
117
118 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
119
120 unsigned int LcksOpts=0;
121
122 #if DEVELOPMENT || DEBUG
123 unsigned int LckDisablePreemptCheck = 0;
124 #endif
125
126 /* Forwards */
127
128 #if USLOCK_DEBUG
129 /*
130 * Perform simple lock checks.
131 */
132 int uslock_check = 1;
133 int max_lock_loops = 100000000;
134 decl_simple_lock_data(extern , printf_lock)
135 decl_simple_lock_data(extern , panic_lock)
136 #endif /* USLOCK_DEBUG */
137
138 extern unsigned int not_in_kdp;
139
140 /*
141 * We often want to know the addresses of the callers
142 * of the various lock routines. However, this information
143 * is only used for debugging and statistics.
144 */
145 typedef void *pc_t;
146 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
147 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
148 #if ANY_LOCK_DEBUG
149 #define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
150 #define DECL_PC(pc) pc_t pc;
151 #else /* ANY_LOCK_DEBUG */
152 #define DECL_PC(pc)
153 #ifdef lint
154 /*
155 * Eliminate lint complaints about unused local pc variables.
156 */
157 #define OBTAIN_PC(pc) ++pc
158 #else /* lint */
159 #define OBTAIN_PC(pc)
160 #endif /* lint */
161 #endif /* USLOCK_DEBUG */
162
163 // Enforce program order of loads and stores.
164 #define ordered_load(target) _Generic( (target),\
165 uint32_t* : __c11_atomic_load((_Atomic uint32_t* )(target), memory_order_relaxed), \
166 uintptr_t*: __c11_atomic_load((_Atomic uintptr_t*)(target), memory_order_relaxed) )
167 #define ordered_store(target, value) _Generic( (target),\
168 uint32_t* : __c11_atomic_store((_Atomic uint32_t* )(target), (value), memory_order_relaxed), \
169 uintptr_t*: __c11_atomic_store((_Atomic uintptr_t*)(target), (value), memory_order_relaxed) )
170
171 /*
172 * atomic exchange API is a low level abstraction of the operations
173 * to atomically read, modify, and write a pointer. This abstraction works
174 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
175 * well as the ARM exclusive instructions.
176 *
177 * atomic_exchange_begin() - begin exchange and retrieve current value
178 * atomic_exchange_complete() - conclude an exchange
179 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
180 */
181 static uint32_t
182 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
183 {
184 uint32_t val;
185
186 (void)ord; // Memory order not used
187 val = __c11_atomic_load((_Atomic uint32_t *)target, memory_order_relaxed);
188 *previous = val;
189 return val;
190 }
191
192 static boolean_t
193 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
194 {
195 return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
196 }
197
198 static void
199 atomic_exchange_abort(void) { }
200
201 static boolean_t
202 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
203 {
204 uint32_t value, prev;
205
206 for ( ; ; ) {
207 value = atomic_exchange_begin32(target, &prev, ord);
208 if (value & test_mask) {
209 if (wait)
210 cpu_pause();
211 else
212 atomic_exchange_abort();
213 return FALSE;
214 }
215 value |= set_mask;
216 if (atomic_exchange_complete32(target, prev, value, ord))
217 return TRUE;
218 }
219 }
220
221 /*
222 * Portable lock package implementation of usimple_locks.
223 */
224
225 #if USLOCK_DEBUG
226 #define USLDBG(stmt) stmt
227 void usld_lock_init(usimple_lock_t, unsigned short);
228 void usld_lock_pre(usimple_lock_t, pc_t);
229 void usld_lock_post(usimple_lock_t, pc_t);
230 void usld_unlock(usimple_lock_t, pc_t);
231 void usld_lock_try_pre(usimple_lock_t, pc_t);
232 void usld_lock_try_post(usimple_lock_t, pc_t);
233 int usld_lock_common_checks(usimple_lock_t, char *);
234 #else /* USLOCK_DEBUG */
235 #define USLDBG(stmt)
236 #endif /* USLOCK_DEBUG */
237
238
239 /*
240 * Forward definitions
241 */
242
243 static void lck_rw_lock_shared_gen(lck_rw_t *lck);
244 static void lck_rw_lock_exclusive_gen(lck_rw_t *lck);
245 static boolean_t lck_rw_lock_shared_to_exclusive_success(lck_rw_t *lck);
246 static boolean_t lck_rw_lock_shared_to_exclusive_failure(lck_rw_t *lck, uint32_t prior_lock_state);
247 static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t *lck, uint32_t prior_lock_state);
248 static lck_rw_type_t lck_rw_done_gen(lck_rw_t *lck, uint32_t prior_lock_state);
249 void lck_rw_clear_promotions_x86(thread_t thread);
250 static boolean_t lck_rw_held_read_or_upgrade(lck_rw_t *lock);
251 static boolean_t lck_rw_grab_want(lck_rw_t *lock);
252 static boolean_t lck_rw_grab_shared(lck_rw_t *lock);
253
254 /*
255 * Routine: lck_spin_alloc_init
256 */
257 lck_spin_t *
258 lck_spin_alloc_init(
259 lck_grp_t *grp,
260 lck_attr_t *attr)
261 {
262 lck_spin_t *lck;
263
264 if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0)
265 lck_spin_init(lck, grp, attr);
266
267 return(lck);
268 }
269
270 /*
271 * Routine: lck_spin_free
272 */
273 void
274 lck_spin_free(
275 lck_spin_t *lck,
276 lck_grp_t *grp)
277 {
278 lck_spin_destroy(lck, grp);
279 kfree(lck, sizeof(lck_spin_t));
280 }
281
282 /*
283 * Routine: lck_spin_init
284 */
285 void
286 lck_spin_init(
287 lck_spin_t *lck,
288 lck_grp_t *grp,
289 __unused lck_attr_t *attr)
290 {
291 usimple_lock_init((usimple_lock_t) lck, 0);
292 lck_grp_reference(grp);
293 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
294 }
295
296 /*
297 * Routine: lck_spin_destroy
298 */
299 void
300 lck_spin_destroy(
301 lck_spin_t *lck,
302 lck_grp_t *grp)
303 {
304 if (lck->interlock == LCK_SPIN_TAG_DESTROYED)
305 return;
306 lck->interlock = LCK_SPIN_TAG_DESTROYED;
307 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
308 lck_grp_deallocate(grp);
309 return;
310 }
311
312 /*
313 * Routine: lck_spin_lock
314 */
315 void
316 lck_spin_lock(
317 lck_spin_t *lck)
318 {
319 usimple_lock((usimple_lock_t) lck);
320 }
321
322 /*
323 * Routine: lck_spin_unlock
324 */
325 void
326 lck_spin_unlock(
327 lck_spin_t *lck)
328 {
329 usimple_unlock((usimple_lock_t) lck);
330 }
331
332
333 /*
334 * Routine: lck_spin_try_lock
335 */
336 boolean_t
337 lck_spin_try_lock(
338 lck_spin_t *lck)
339 {
340 boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck);
341 #if DEVELOPMENT || DEBUG
342 if (lrval) {
343 pltrace(FALSE);
344 }
345 #endif
346 return(lrval);
347 }
348
349 /*
350 * Routine: lck_spin_assert
351 */
352 void
353 lck_spin_assert(lck_spin_t *lock, unsigned int type)
354 {
355 thread_t thread, holder;
356 uintptr_t state;
357
358 if (__improbable(type != LCK_ASSERT_OWNED && type != LCK_ASSERT_NOTOWNED)) {
359 panic("lck_spin_assert(): invalid arg (%u)", type);
360 }
361
362 state = lock->interlock;
363 holder = (thread_t)state;
364 thread = current_thread();
365 if (type == LCK_ASSERT_OWNED) {
366 if (__improbable(holder == THREAD_NULL)) {
367 panic("Lock not owned %p = %lx", lock, state);
368 }
369 if (__improbable(holder != thread)) {
370 panic("Lock not owned by current thread %p = %lx", lock, state);
371 }
372 } else if (type == LCK_ASSERT_NOTOWNED) {
373 if (__improbable(holder != THREAD_NULL)) {
374 if (holder == thread) {
375 panic("Lock owned by current thread %p = %lx", lock, state);
376 } else {
377 panic("Lock %p owned by thread %p", lock, holder);
378 }
379 }
380 }
381 }
382
383 /*
384 * Routine: kdp_lck_spin_is_acquired
385 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
386 * Returns: TRUE if lock is acquired.
387 */
388 boolean_t
389 kdp_lck_spin_is_acquired(lck_spin_t *lck) {
390 if (not_in_kdp) {
391 panic("panic: spinlock acquired check done outside of kernel debugger");
392 }
393 return (lck->interlock != 0)? TRUE : FALSE;
394 }
395
396 /*
397 * Initialize a usimple_lock.
398 *
399 * No change in preemption state.
400 */
401 void
402 usimple_lock_init(
403 usimple_lock_t l,
404 __unused unsigned short tag)
405 {
406 #ifndef MACHINE_SIMPLE_LOCK
407 USLDBG(usld_lock_init(l, tag));
408 hw_lock_init(&l->interlock);
409 #else
410 simple_lock_init((simple_lock_t)l,tag);
411 #endif
412 }
413
414 volatile uint32_t spinlock_owner_cpu = ~0;
415 volatile usimple_lock_t spinlock_timed_out;
416
417 uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
418 uint32_t i;
419
420 for (i = 0; i < real_ncpus; i++) {
421 if ((cpu_data_ptr[i] != NULL) && ((uintptr_t)cpu_data_ptr[i]->cpu_active_thread == thread_addr)) {
422 spinlock_owner_cpu = i;
423 if ((uint32_t) cpu_number() != i) {
424 /* Cause NMI and panic on the owner's cpu */
425 NMIPI_panic(cpu_to_cpumask(i), SPINLOCK_TIMEOUT);
426 }
427 break;
428 }
429 }
430
431 return spinlock_owner_cpu;
432 }
433
434 /*
435 * Acquire a usimple_lock.
436 *
437 * Returns with preemption disabled. Note
438 * that the hw_lock routines are responsible for
439 * maintaining preemption state.
440 */
441 void
442 usimple_lock(
443 usimple_lock_t l)
444 {
445 #ifndef MACHINE_SIMPLE_LOCK
446 DECL_PC(pc);
447
448 OBTAIN_PC(pc);
449 USLDBG(usld_lock_pre(l, pc));
450
451 if(__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC) == 0)) {
452 boolean_t uslock_acquired = FALSE;
453 while (machine_timeout_suspended()) {
454 enable_preemption();
455 if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC)))
456 break;
457 }
458
459 if (uslock_acquired == FALSE) {
460 uint32_t lock_cpu;
461 uintptr_t lowner = (uintptr_t)l->interlock.lock_data;
462 spinlock_timed_out = l;
463 lock_cpu = spinlock_timeout_NMI(lowner);
464 panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx, time: %llu",
465 l, lowner, current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data, mach_absolute_time());
466 }
467 }
468 #if DEVELOPMENT || DEBUG
469 pltrace(FALSE);
470 #endif
471
472 USLDBG(usld_lock_post(l, pc));
473 #else
474 simple_lock((simple_lock_t)l);
475 #endif
476 #if CONFIG_DTRACE
477 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0);
478 #endif
479 }
480
481
482 /*
483 * Release a usimple_lock.
484 *
485 * Returns with preemption enabled. Note
486 * that the hw_lock routines are responsible for
487 * maintaining preemption state.
488 */
489 void
490 usimple_unlock(
491 usimple_lock_t l)
492 {
493 #ifndef MACHINE_SIMPLE_LOCK
494 DECL_PC(pc);
495
496 OBTAIN_PC(pc);
497 USLDBG(usld_unlock(l, pc));
498 #if DEVELOPMENT || DEBUG
499 pltrace(TRUE);
500 #endif
501 hw_lock_unlock(&l->interlock);
502 #else
503 simple_unlock_rwmb((simple_lock_t)l);
504 #endif
505 }
506
507
508 /*
509 * Conditionally acquire a usimple_lock.
510 *
511 * On success, returns with preemption disabled.
512 * On failure, returns with preemption in the same state
513 * as when first invoked. Note that the hw_lock routines
514 * are responsible for maintaining preemption state.
515 *
516 * XXX No stats are gathered on a miss; I preserved this
517 * behavior from the original assembly-language code, but
518 * doesn't it make sense to log misses? XXX
519 */
520 unsigned int
521 usimple_lock_try(
522 usimple_lock_t l)
523 {
524 #ifndef MACHINE_SIMPLE_LOCK
525 unsigned int success;
526 DECL_PC(pc);
527
528 OBTAIN_PC(pc);
529 USLDBG(usld_lock_try_pre(l, pc));
530 if ((success = hw_lock_try(&l->interlock))) {
531 #if DEVELOPMENT || DEBUG
532 pltrace(FALSE);
533 #endif
534 USLDBG(usld_lock_try_post(l, pc));
535 }
536 return success;
537 #else
538 return(simple_lock_try((simple_lock_t)l));
539 #endif
540 }
541
542 /*
543 * Acquire a usimple_lock while polling for pending TLB flushes
544 * and spinning on a lock.
545 *
546 */
547 void
548 usimple_lock_try_lock_loop(usimple_lock_t l)
549 {
550 boolean_t istate = ml_get_interrupts_enabled();
551 while (!simple_lock_try((l))) {
552 if (!istate)
553 handle_pending_TLB_flushes();
554 cpu_pause();
555 }
556 }
557
558 #if USLOCK_DEBUG
559 /*
560 * States of a usimple_lock. The default when initializing
561 * a usimple_lock is setting it up for debug checking.
562 */
563 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
564 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
565 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
566 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
567 #define USLOCK_CHECKING(l) (uslock_check && \
568 ((l)->debug.state & USLOCK_CHECKED))
569
570 /*
571 * Trace activities of a particularly interesting lock.
572 */
573 void usl_trace(usimple_lock_t, int, pc_t, const char *);
574
575
576 /*
577 * Initialize the debugging information contained
578 * in a usimple_lock.
579 */
580 void
581 usld_lock_init(
582 usimple_lock_t l,
583 __unused unsigned short tag)
584 {
585 if (l == USIMPLE_LOCK_NULL)
586 panic("lock initialization: null lock pointer");
587 l->lock_type = USLOCK_TAG;
588 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
589 l->debug.lock_cpu = l->debug.unlock_cpu = 0;
590 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
591 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
592 l->debug.duration[0] = l->debug.duration[1] = 0;
593 l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
594 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
595 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
596 }
597
598
599 /*
600 * These checks apply to all usimple_locks, not just
601 * those with USLOCK_CHECKED turned on.
602 */
603 int
604 usld_lock_common_checks(
605 usimple_lock_t l,
606 char *caller)
607 {
608 if (l == USIMPLE_LOCK_NULL)
609 panic("%s: null lock pointer", caller);
610 if (l->lock_type != USLOCK_TAG)
611 panic("%s: %p is not a usimple lock, 0x%x", caller, l, l->lock_type);
612 if (!(l->debug.state & USLOCK_INIT))
613 panic("%s: %p is not an initialized lock, 0x%x", caller, l, l->debug.state);
614 return USLOCK_CHECKING(l);
615 }
616
617
618 /*
619 * Debug checks on a usimple_lock just before attempting
620 * to acquire it.
621 */
622 /* ARGSUSED */
623 void
624 usld_lock_pre(
625 usimple_lock_t l,
626 pc_t pc)
627 {
628 char caller[] = "usimple_lock";
629
630
631 if (!usld_lock_common_checks(l, caller))
632 return;
633
634 /*
635 * Note that we have a weird case where we are getting a lock when we are]
636 * in the process of putting the system to sleep. We are running with no
637 * current threads, therefore we can't tell if we are trying to retake a lock
638 * we have or someone on the other processor has it. Therefore we just
639 * ignore this test if the locking thread is 0.
640 */
641
642 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
643 l->debug.lock_thread == (void *) current_thread()) {
644 printf("%s: lock %p already locked (at %p) by",
645 caller, l, l->debug.lock_pc);
646 printf(" current thread %p (new attempt at pc %p)\n",
647 l->debug.lock_thread, pc);
648 panic("%s", caller);
649 }
650 mp_disable_preemption();
651 usl_trace(l, cpu_number(), pc, caller);
652 mp_enable_preemption();
653 }
654
655
656 /*
657 * Debug checks on a usimple_lock just after acquiring it.
658 *
659 * Pre-emption has been disabled at this point,
660 * so we are safe in using cpu_number.
661 */
662 void
663 usld_lock_post(
664 usimple_lock_t l,
665 pc_t pc)
666 {
667 int mycpu;
668 char caller[] = "successful usimple_lock";
669
670
671 if (!usld_lock_common_checks(l, caller))
672 return;
673
674 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
675 panic("%s: lock %p became uninitialized",
676 caller, l);
677 if ((l->debug.state & USLOCK_TAKEN))
678 panic("%s: lock 0x%p became TAKEN by someone else",
679 caller, l);
680
681 mycpu = cpu_number();
682 l->debug.lock_thread = (void *)current_thread();
683 l->debug.state |= USLOCK_TAKEN;
684 l->debug.lock_pc = pc;
685 l->debug.lock_cpu = mycpu;
686
687 usl_trace(l, mycpu, pc, caller);
688 }
689
690
691 /*
692 * Debug checks on a usimple_lock just before
693 * releasing it. Note that the caller has not
694 * yet released the hardware lock.
695 *
696 * Preemption is still disabled, so there's
697 * no problem using cpu_number.
698 */
699 void
700 usld_unlock(
701 usimple_lock_t l,
702 pc_t pc)
703 {
704 int mycpu;
705 char caller[] = "usimple_unlock";
706
707
708 if (!usld_lock_common_checks(l, caller))
709 return;
710
711 mycpu = cpu_number();
712
713 if (!(l->debug.state & USLOCK_TAKEN))
714 panic("%s: lock 0x%p hasn't been taken",
715 caller, l);
716 if (l->debug.lock_thread != (void *) current_thread())
717 panic("%s: unlocking lock 0x%p, owned by thread %p",
718 caller, l, l->debug.lock_thread);
719 if (l->debug.lock_cpu != mycpu) {
720 printf("%s: unlocking lock 0x%p on cpu 0x%x",
721 caller, l, mycpu);
722 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
723 panic("%s", caller);
724 }
725 usl_trace(l, mycpu, pc, caller);
726
727 l->debug.unlock_thread = l->debug.lock_thread;
728 l->debug.lock_thread = INVALID_PC;
729 l->debug.state &= ~USLOCK_TAKEN;
730 l->debug.unlock_pc = pc;
731 l->debug.unlock_cpu = mycpu;
732 }
733
734
735 /*
736 * Debug checks on a usimple_lock just before
737 * attempting to acquire it.
738 *
739 * Preemption isn't guaranteed to be disabled.
740 */
741 void
742 usld_lock_try_pre(
743 usimple_lock_t l,
744 pc_t pc)
745 {
746 char caller[] = "usimple_lock_try";
747
748 if (!usld_lock_common_checks(l, caller))
749 return;
750 mp_disable_preemption();
751 usl_trace(l, cpu_number(), pc, caller);
752 mp_enable_preemption();
753 }
754
755
756 /*
757 * Debug checks on a usimple_lock just after
758 * successfully attempting to acquire it.
759 *
760 * Preemption has been disabled by the
761 * lock acquisition attempt, so it's safe
762 * to use cpu_number.
763 */
764 void
765 usld_lock_try_post(
766 usimple_lock_t l,
767 pc_t pc)
768 {
769 int mycpu;
770 char caller[] = "successful usimple_lock_try";
771
772 if (!usld_lock_common_checks(l, caller))
773 return;
774
775 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
776 panic("%s: lock 0x%p became uninitialized",
777 caller, l);
778 if ((l->debug.state & USLOCK_TAKEN))
779 panic("%s: lock 0x%p became TAKEN by someone else",
780 caller, l);
781
782 mycpu = cpu_number();
783 l->debug.lock_thread = (void *) current_thread();
784 l->debug.state |= USLOCK_TAKEN;
785 l->debug.lock_pc = pc;
786 l->debug.lock_cpu = mycpu;
787
788 usl_trace(l, mycpu, pc, caller);
789 }
790
791
792 /*
793 * For very special cases, set traced_lock to point to a
794 * specific lock of interest. The result is a series of
795 * XPRs showing lock operations on that lock. The lock_seq
796 * value is used to show the order of those operations.
797 */
798 usimple_lock_t traced_lock;
799 unsigned int lock_seq;
800
801 void
802 usl_trace(
803 usimple_lock_t l,
804 int mycpu,
805 pc_t pc,
806 const char * op_name)
807 {
808 if (traced_lock == l) {
809 XPR(XPR_SLOCK,
810 "seq %d, cpu %d, %s @ %x\n",
811 (uintptr_t) lock_seq, (uintptr_t) mycpu,
812 (uintptr_t) op_name, (uintptr_t) pc, 0);
813 lock_seq++;
814 }
815 }
816
817
818 #endif /* USLOCK_DEBUG */
819
820 /*
821 * Routine: lck_rw_alloc_init
822 */
823 lck_rw_t *
824 lck_rw_alloc_init(
825 lck_grp_t *grp,
826 lck_attr_t *attr) {
827 lck_rw_t *lck;
828
829 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) {
830 bzero(lck, sizeof(lck_rw_t));
831 lck_rw_init(lck, grp, attr);
832 }
833
834 return(lck);
835 }
836
837 /*
838 * Routine: lck_rw_free
839 */
840 void
841 lck_rw_free(
842 lck_rw_t *lck,
843 lck_grp_t *grp) {
844 lck_rw_destroy(lck, grp);
845 kfree(lck, sizeof(lck_rw_t));
846 }
847
848 /*
849 * Routine: lck_rw_init
850 */
851 void
852 lck_rw_init(
853 lck_rw_t *lck,
854 lck_grp_t *grp,
855 lck_attr_t *attr)
856 {
857 lck_attr_t *lck_attr = (attr != LCK_ATTR_NULL) ?
858 attr : &LockDefaultLckAttr;
859
860 hw_lock_byte_init(&lck->lck_rw_interlock);
861 lck->lck_rw_want_write = FALSE;
862 lck->lck_rw_want_upgrade = FALSE;
863 lck->lck_rw_shared_count = 0;
864 lck->lck_rw_can_sleep = TRUE;
865 lck->lck_r_waiting = lck->lck_w_waiting = 0;
866 lck->lck_rw_tag = 0;
867 lck->lck_rw_priv_excl = ((lck_attr->lck_attr_val &
868 LCK_ATTR_RW_SHARED_PRIORITY) == 0);
869
870 lck_grp_reference(grp);
871 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
872 }
873
874 /*
875 * Routine: lck_rw_destroy
876 */
877 void
878 lck_rw_destroy(
879 lck_rw_t *lck,
880 lck_grp_t *grp)
881 {
882 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
883 return;
884 #if MACH_LDEBUG
885 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
886 #endif
887 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
888 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
889 lck_grp_deallocate(grp);
890 return;
891 }
892
893 /*
894 * Sleep locks. These use the same data structure and algorithm
895 * as the spin locks, but the process sleeps while it is waiting
896 * for the lock. These work on uniprocessor systems.
897 */
898
899 #define DECREMENTER_TIMEOUT 1000000
900
901 /*
902 * We disable interrupts while holding the RW interlock to prevent an
903 * interrupt from exacerbating hold time.
904 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
905 */
906 static inline boolean_t
907 lck_interlock_lock(lck_rw_t *lck)
908 {
909 boolean_t istate;
910
911 istate = ml_set_interrupts_enabled(FALSE);
912 hw_lock_byte_lock(&lck->lck_rw_interlock);
913 return istate;
914 }
915
916 static inline void
917 lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
918 {
919 hw_lock_byte_unlock(&lck->lck_rw_interlock);
920 ml_set_interrupts_enabled(istate);
921 }
922
923 /*
924 * This inline is used when busy-waiting for an rw lock.
925 * If interrupts were disabled when the lock primitive was called,
926 * we poll the IPI handler for pending tlb flushes.
927 * XXX This is a hack to avoid deadlocking on the pmap_system_lock.
928 */
929 static inline void
930 lck_rw_lock_pause(boolean_t interrupts_enabled)
931 {
932 if (!interrupts_enabled)
933 handle_pending_TLB_flushes();
934 cpu_pause();
935 }
936
937 static inline boolean_t
938 lck_rw_held_read_or_upgrade(lck_rw_t *lock)
939 {
940 if (ordered_load(&lock->data) & (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE))
941 return TRUE;
942 return FALSE;
943 }
944
945 /*
946 * compute the deadline to spin against when
947 * waiting for a change of state on a lck_rw_t
948 */
949 static inline uint64_t
950 lck_rw_deadline_for_spin(lck_rw_t *lck)
951 {
952 if (lck->lck_rw_can_sleep) {
953 if (lck->lck_r_waiting || lck->lck_w_waiting || lck->lck_rw_shared_count > machine_info.max_cpus) {
954 /*
955 * there are already threads waiting on this lock... this
956 * implies that they have spun beyond their deadlines waiting for
957 * the desired state to show up so we will not bother spinning at this time...
958 * or
959 * the current number of threads sharing this lock exceeds our capacity to run them
960 * concurrently and since all states we're going to spin for require the rw_shared_count
961 * to be at 0, we'll not bother spinning since the latency for this to happen is
962 * unpredictable...
963 */
964 return (mach_absolute_time());
965 }
966 return (mach_absolute_time() + MutexSpin);
967 } else
968 return (mach_absolute_time() + (100000LL * 1000000000LL));
969 }
970
971
972 /*
973 * Spin while interlock is held.
974 */
975
976 static inline void
977 lck_rw_interlock_spin(lck_rw_t *lock)
978 {
979 while (ordered_load(&lock->data) & LCK_RW_INTERLOCK) {
980 cpu_pause();
981 }
982 }
983
984 static boolean_t
985 lck_rw_grab_want(lck_rw_t *lock)
986 {
987 uint32_t data, prev;
988
989 for ( ; ; ) {
990 data = atomic_exchange_begin32(&lock->data, &prev, memory_order_relaxed);
991 if ((data & LCK_RW_INTERLOCK) == 0)
992 break;
993 atomic_exchange_abort();
994 lck_rw_interlock_spin(lock);
995 }
996 if (data & LCK_RW_WANT_WRITE) {
997 atomic_exchange_abort();
998 return FALSE;
999 }
1000 data |= LCK_RW_WANT_WRITE;
1001 return atomic_exchange_complete32(&lock->data, prev, data, memory_order_relaxed);
1002 }
1003
1004 static boolean_t
1005 lck_rw_grab_shared(lck_rw_t *lock)
1006 {
1007 uint32_t data, prev;
1008
1009 for ( ; ; ) {
1010 data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
1011 if ((data & LCK_RW_INTERLOCK) == 0)
1012 break;
1013 atomic_exchange_abort();
1014 lck_rw_interlock_spin(lock);
1015 }
1016 if (data & (LCK_RW_WANT_WRITE | LCK_RW_WANT_UPGRADE)) {
1017 if (((data & LCK_RW_SHARED_MASK) == 0) || (data & LCK_RW_PRIV_EXCL)) {
1018 atomic_exchange_abort();
1019 return FALSE;
1020 }
1021 }
1022 data += LCK_RW_SHARED_READER;
1023 return atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp);
1024 }
1025
1026 /*
1027 * Routine: lck_rw_lock_exclusive
1028 */
1029 static void
1030 lck_rw_lock_exclusive_gen(
1031 lck_rw_t *lck)
1032 {
1033 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1034 uint64_t deadline = 0;
1035 int slept = 0;
1036 int gotlock = 0;
1037 int lockheld = 0;
1038 wait_result_t res = 0;
1039 boolean_t istate = -1;
1040
1041 #if CONFIG_DTRACE
1042 boolean_t dtrace_ls_initialized = FALSE;
1043 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE;
1044 uint64_t wait_interval = 0;
1045 int readers_at_sleep = 0;
1046 #endif
1047
1048 /*
1049 * Try to acquire the lck_rw_want_write bit.
1050 */
1051 while ( !lck_rw_grab_want(lck)) {
1052
1053 #if CONFIG_DTRACE
1054 if (dtrace_ls_initialized == FALSE) {
1055 dtrace_ls_initialized = TRUE;
1056 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1057 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1058 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1059 if (dtrace_ls_enabled) {
1060 /*
1061 * Either sleeping or spinning is happening,
1062 * start a timing of our delay interval now.
1063 */
1064 readers_at_sleep = lck->lck_rw_shared_count;
1065 wait_interval = mach_absolute_time();
1066 }
1067 }
1068 #endif
1069 if (istate == -1)
1070 istate = ml_get_interrupts_enabled();
1071
1072 deadline = lck_rw_deadline_for_spin(lck);
1073
1074 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1075
1076 while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline)
1077 lck_rw_lock_pause(istate);
1078
1079 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
1080
1081 if (gotlock)
1082 break;
1083 /*
1084 * if we get here, the deadline has expired w/o us
1085 * being able to grab the lock exclusively
1086 * check to see if we're allowed to do a thread_block
1087 */
1088 if (lck->lck_rw_can_sleep) {
1089
1090 istate = lck_interlock_lock(lck);
1091
1092 if (lck->lck_rw_want_write) {
1093
1094 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1095
1096 lck->lck_w_waiting = TRUE;
1097
1098 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
1099 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
1100 lck_interlock_unlock(lck, istate);
1101
1102 if (res == THREAD_WAITING) {
1103 res = thread_block(THREAD_CONTINUE_NULL);
1104 slept++;
1105 }
1106 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1107 } else {
1108 lck->lck_rw_want_write = TRUE;
1109 lck_interlock_unlock(lck, istate);
1110 break;
1111 }
1112 }
1113 }
1114 /*
1115 * Wait for readers (and upgrades) to finish...
1116 * the test for these conditions must be done simultaneously with
1117 * a check of the interlock not being held since
1118 * the rw_shared_count will drop to 0 first and then want_upgrade
1119 * will be set to 1 in the shared_to_exclusive scenario... those
1120 * adjustments are done behind the interlock and represent an
1121 * atomic change in state and must be considered as such
1122 * however, once we see the read count at 0, the want_upgrade not set
1123 * and the interlock not held, we are safe to proceed
1124 */
1125 while (lck_rw_held_read_or_upgrade(lck)) {
1126
1127 #if CONFIG_DTRACE
1128 /*
1129 * Either sleeping or spinning is happening, start
1130 * a timing of our delay interval now. If we set it
1131 * to -1 we don't have accurate data so we cannot later
1132 * decide to record a dtrace spin or sleep event.
1133 */
1134 if (dtrace_ls_initialized == FALSE) {
1135 dtrace_ls_initialized = TRUE;
1136 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1137 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1138 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1139 if (dtrace_ls_enabled) {
1140 /*
1141 * Either sleeping or spinning is happening,
1142 * start a timing of our delay interval now.
1143 */
1144 readers_at_sleep = lck->lck_rw_shared_count;
1145 wait_interval = mach_absolute_time();
1146 }
1147 }
1148 #endif
1149 if (istate == -1)
1150 istate = ml_get_interrupts_enabled();
1151
1152 deadline = lck_rw_deadline_for_spin(lck);
1153
1154 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1155
1156 while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline)
1157 lck_rw_lock_pause(istate);
1158
1159 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, lockheld, 0);
1160
1161 if ( !lockheld)
1162 break;
1163 /*
1164 * if we get here, the deadline has expired w/o us
1165 * being able to grab the lock exclusively
1166 * check to see if we're allowed to do a thread_block
1167 */
1168 if (lck->lck_rw_can_sleep) {
1169
1170 istate = lck_interlock_lock(lck);
1171
1172 if (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade) {
1173 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1174
1175 lck->lck_w_waiting = TRUE;
1176
1177 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
1178 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
1179 lck_interlock_unlock(lck, istate);
1180
1181 if (res == THREAD_WAITING) {
1182 res = thread_block(THREAD_CONTINUE_NULL);
1183 slept++;
1184 }
1185 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1186 } else {
1187 lck_interlock_unlock(lck, istate);
1188 /*
1189 * must own the lock now, since we checked for
1190 * readers or upgrade owner behind the interlock
1191 * no need for a call to 'lck_rw_held_read_or_upgrade'
1192 */
1193 break;
1194 }
1195 }
1196 }
1197
1198 #if CONFIG_DTRACE
1199 /*
1200 * Decide what latencies we suffered that are Dtrace events.
1201 * If we have set wait_interval, then we either spun or slept.
1202 * At least we get out from under the interlock before we record
1203 * which is the best we can do here to minimize the impact
1204 * of the tracing.
1205 * If we have set wait_interval to -1, then dtrace was not enabled when we
1206 * started sleeping/spinning so we don't record this event.
1207 */
1208 if (dtrace_ls_enabled == TRUE) {
1209 if (slept == 0) {
1210 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
1211 mach_absolute_time() - wait_interval, 1);
1212 } else {
1213 /*
1214 * For the blocking case, we also record if when we blocked
1215 * it was held for read or write, and how many readers.
1216 * Notice that above we recorded this before we dropped
1217 * the interlock so the count is accurate.
1218 */
1219 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
1220 mach_absolute_time() - wait_interval, 1,
1221 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1222 }
1223 }
1224 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1);
1225 #endif
1226 }
1227
1228 /*
1229 * Routine: lck_rw_done
1230 */
1231
1232 lck_rw_type_t lck_rw_done(lck_rw_t *lock)
1233 {
1234 uint32_t data, prev;
1235
1236 for ( ; ; ) {
1237 data = atomic_exchange_begin32(&lock->data, &prev, memory_order_release_smp);
1238 if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
1239 atomic_exchange_abort();
1240 lck_rw_interlock_spin(lock);
1241 continue;
1242 }
1243 if (data & LCK_RW_SHARED_MASK) {
1244 data -= LCK_RW_SHARED_READER;
1245 if ((data & LCK_RW_SHARED_MASK) == 0) /* if reader count has now gone to 0, check for waiters */
1246 goto check_waiters;
1247 } else { /* if reader count == 0, must be exclusive lock */
1248 if (data & LCK_RW_WANT_UPGRADE) {
1249 data &= ~(LCK_RW_WANT_UPGRADE);
1250 } else {
1251 if (data & LCK_RW_WANT_WRITE)
1252 data &= ~(LCK_RW_WANT_EXCL);
1253 else /* lock is not 'owned', panic */
1254 panic("Releasing non-exclusive RW lock without a reader refcount!");
1255 }
1256 check_waiters:
1257 if (prev & LCK_RW_W_WAITING) {
1258 data &= ~(LCK_RW_W_WAITING);
1259 if ((prev & LCK_RW_PRIV_EXCL) == 0)
1260 data &= ~(LCK_RW_R_WAITING);
1261 } else
1262 data &= ~(LCK_RW_R_WAITING);
1263 }
1264 if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_release_smp))
1265 break;
1266 cpu_pause();
1267 }
1268 return lck_rw_done_gen(lock, prev);
1269 }
1270
1271 /*
1272 * Routine: lck_rw_done_gen
1273 *
1274 * called from lck_rw_done()
1275 * prior_lock_state is the value in the 1st
1276 * word of the lock at the time of a successful
1277 * atomic compare and exchange with the new value...
1278 * it represents the state of the lock before we
1279 * decremented the rw_shared_count or cleared either
1280 * rw_want_upgrade or rw_want_write and
1281 * the lck_x_waiting bits... since the wrapper
1282 * routine has already changed the state atomically,
1283 * we just need to decide if we should
1284 * wake up anyone and what value to return... we do
1285 * this by examining the state of the lock before
1286 * we changed it
1287 */
1288 static lck_rw_type_t
1289 lck_rw_done_gen(
1290 lck_rw_t *lck,
1291 uint32_t prior_lock_state)
1292 {
1293 lck_rw_t *fake_lck;
1294 lck_rw_type_t lock_type;
1295 thread_t thread;
1296 uint32_t rwlock_count;
1297
1298 /*
1299 * prior_lock state is a snapshot of the 1st word of the
1300 * lock in question... we'll fake up a pointer to it
1301 * and carefully not access anything beyond whats defined
1302 * in the first word of a lck_rw_t
1303 */
1304 fake_lck = (lck_rw_t *)&prior_lock_state;
1305
1306 if (fake_lck->lck_rw_shared_count <= 1) {
1307 if (fake_lck->lck_w_waiting)
1308 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
1309
1310 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
1311 thread_wakeup(RW_LOCK_READER_EVENT(lck));
1312 }
1313 if (fake_lck->lck_rw_shared_count)
1314 lock_type = LCK_RW_TYPE_SHARED;
1315 else
1316 lock_type = LCK_RW_TYPE_EXCLUSIVE;
1317
1318 /* Check if dropping the lock means that we need to unpromote */
1319 thread = current_thread();
1320 rwlock_count = thread->rwlock_count--;
1321 #if MACH_LDEBUG
1322 if (rwlock_count == 0) {
1323 panic("rw lock count underflow for thread %p", thread);
1324 }
1325 #endif
1326 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1327 /* sched_flags checked without lock, but will be rechecked while clearing */
1328 lck_rw_clear_promotion(thread);
1329 }
1330
1331 #if CONFIG_DTRACE
1332 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
1333 #endif
1334
1335 return(lock_type);
1336 }
1337
1338
1339 /*
1340 * Routine: lck_rw_unlock
1341 */
1342 void
1343 lck_rw_unlock(
1344 lck_rw_t *lck,
1345 lck_rw_type_t lck_rw_type)
1346 {
1347 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1348 lck_rw_unlock_shared(lck);
1349 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1350 lck_rw_unlock_exclusive(lck);
1351 else
1352 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type);
1353 }
1354
1355
1356 /*
1357 * Routine: lck_rw_unlock_shared
1358 */
1359 void
1360 lck_rw_unlock_shared(
1361 lck_rw_t *lck)
1362 {
1363 lck_rw_type_t ret;
1364
1365 assertf(lck->lck_rw_shared_count > 0, "lck %p has shared_count=0x%x", lck, lck->lck_rw_shared_count);
1366 ret = lck_rw_done(lck);
1367
1368 if (ret != LCK_RW_TYPE_SHARED)
1369 panic("lck_rw_unlock_shared(): lock %p held in mode: %d\n", lck, ret);
1370 }
1371
1372
1373 /*
1374 * Routine: lck_rw_unlock_exclusive
1375 */
1376 void
1377 lck_rw_unlock_exclusive(
1378 lck_rw_t *lck)
1379 {
1380 lck_rw_type_t ret;
1381
1382 ret = lck_rw_done(lck);
1383
1384 if (ret != LCK_RW_TYPE_EXCLUSIVE)
1385 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret);
1386 }
1387
1388
1389 /*
1390 * Routine: lck_rw_lock
1391 */
1392 void
1393 lck_rw_lock(
1394 lck_rw_t *lck,
1395 lck_rw_type_t lck_rw_type)
1396 {
1397 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1398 lck_rw_lock_shared(lck);
1399 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1400 lck_rw_lock_exclusive(lck);
1401 else
1402 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type);
1403 }
1404
1405 /*
1406 * Routine: lck_rw_lock_shared
1407 */
1408 void
1409 lck_rw_lock_shared(lck_rw_t *lock)
1410 {
1411 uint32_t data, prev;
1412
1413 current_thread()->rwlock_count++;
1414 for ( ; ; ) {
1415 data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
1416 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
1417 atomic_exchange_abort();
1418 lck_rw_lock_shared_gen(lock);
1419 break;
1420 }
1421 data += LCK_RW_SHARED_READER;
1422 if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
1423 break;
1424 cpu_pause();
1425 }
1426 #if CONFIG_DTRACE
1427 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1428 #endif /* CONFIG_DTRACE */
1429 return;
1430 }
1431
1432 /*
1433 * Routine: lck_rw_lock_shared_gen
1434 * Function:
1435 * assembly fast path code has determined that this lock
1436 * is held exclusively... this is where we spin/block
1437 * until we can acquire the lock in the shared mode
1438 */
1439 static void
1440 lck_rw_lock_shared_gen(
1441 lck_rw_t *lck)
1442 {
1443 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1444 uint64_t deadline = 0;
1445 int gotlock = 0;
1446 int slept = 0;
1447 wait_result_t res = 0;
1448 boolean_t istate = -1;
1449
1450 #if CONFIG_DTRACE
1451 uint64_t wait_interval = 0;
1452 int readers_at_sleep = 0;
1453 boolean_t dtrace_ls_initialized = FALSE;
1454 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
1455 #endif
1456
1457 while ( !lck_rw_grab_shared(lck)) {
1458
1459 #if CONFIG_DTRACE
1460 if (dtrace_ls_initialized == FALSE) {
1461 dtrace_ls_initialized = TRUE;
1462 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1463 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1464 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1465 if (dtrace_ls_enabled) {
1466 /*
1467 * Either sleeping or spinning is happening,
1468 * start a timing of our delay interval now.
1469 */
1470 readers_at_sleep = lck->lck_rw_shared_count;
1471 wait_interval = mach_absolute_time();
1472 }
1473 }
1474 #endif
1475 if (istate == -1)
1476 istate = ml_get_interrupts_enabled();
1477
1478 deadline = lck_rw_deadline_for_spin(lck);
1479
1480 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
1481 trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
1482
1483 while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline)
1484 lck_rw_lock_pause(istate);
1485
1486 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
1487 trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0);
1488
1489 if (gotlock)
1490 break;
1491 /*
1492 * if we get here, the deadline has expired w/o us
1493 * being able to grab the lock for read
1494 * check to see if we're allowed to do a thread_block
1495 */
1496 if (lck->lck_rw_can_sleep) {
1497
1498 istate = lck_interlock_lock(lck);
1499
1500 if ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
1501 ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {
1502
1503 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
1504 trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
1505
1506 lck->lck_r_waiting = TRUE;
1507
1508 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
1509 res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT);
1510 lck_interlock_unlock(lck, istate);
1511
1512 if (res == THREAD_WAITING) {
1513 res = thread_block(THREAD_CONTINUE_NULL);
1514 slept++;
1515 }
1516 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
1517 trace_lck, res, slept, 0, 0);
1518 } else {
1519 lck->lck_rw_shared_count++;
1520 lck_interlock_unlock(lck, istate);
1521 break;
1522 }
1523 }
1524 }
1525
1526 #if CONFIG_DTRACE
1527 if (dtrace_ls_enabled == TRUE) {
1528 if (slept == 0) {
1529 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1530 } else {
1531 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1532 mach_absolute_time() - wait_interval, 0,
1533 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1534 }
1535 }
1536 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1537 #endif
1538 }
1539
1540
1541 /*
1542 * Routine: lck_rw_lock_exclusive
1543 */
1544
1545 void
1546 lck_rw_lock_exclusive(lck_rw_t *lock)
1547 {
1548 current_thread()->rwlock_count++;
1549 if (atomic_test_and_set32(&lock->data,
1550 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
1551 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
1552 #if CONFIG_DTRACE
1553 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1554 #endif /* CONFIG_DTRACE */
1555 } else
1556 lck_rw_lock_exclusive_gen(lock);
1557 }
1558
1559
1560 /*
1561 * Routine: lck_rw_lock_shared_to_exclusive
1562 */
1563
1564 boolean_t
1565 lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
1566 {
1567 uint32_t data, prev;
1568
1569 for ( ; ; ) {
1570 data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
1571 if (data & LCK_RW_INTERLOCK) {
1572 atomic_exchange_abort();
1573 lck_rw_interlock_spin(lock);
1574 continue;
1575 }
1576 if (data & LCK_RW_WANT_UPGRADE) {
1577 data -= LCK_RW_SHARED_READER;
1578 if ((data & LCK_RW_SHARED_MASK) == 0) /* we were the last reader */
1579 data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
1580 if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
1581 return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
1582 } else {
1583 data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
1584 data -= LCK_RW_SHARED_READER; /* and shed our read count */
1585 if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
1586 break;
1587 }
1588 cpu_pause();
1589 }
1590 /* we now own the WANT_UPGRADE */
1591 if (data & LCK_RW_SHARED_MASK) /* check to see if all of the readers are drained */
1592 lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
1593 #if CONFIG_DTRACE
1594 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
1595 #endif
1596 return TRUE;
1597 }
1598
1599
1600 /*
1601 * Routine: lck_rw_lock_shared_to_exclusive_failure
1602 * Function:
1603 * assembly fast path code has already dropped our read
1604 * count and determined that someone else owns 'lck_rw_want_upgrade'
1605 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1606 * all we need to do here is determine if a wakeup is needed
1607 */
1608 static boolean_t
1609 lck_rw_lock_shared_to_exclusive_failure(
1610 lck_rw_t *lck,
1611 uint32_t prior_lock_state)
1612 {
1613 lck_rw_t *fake_lck;
1614 thread_t thread = current_thread();
1615 uint32_t rwlock_count;
1616
1617 /* Check if dropping the lock means that we need to unpromote */
1618 rwlock_count = thread->rwlock_count--;
1619 #if MACH_LDEBUG
1620 if (rwlock_count == 0) {
1621 panic("rw lock count underflow for thread %p", thread);
1622 }
1623 #endif
1624 fake_lck = (lck_rw_t *)&prior_lock_state;
1625
1626 if (fake_lck->lck_w_waiting && fake_lck->lck_rw_shared_count == 1) {
1627 /*
1628 * Someone else has requested upgrade.
1629 * Since we've released the read lock, wake
1630 * him up if he's blocked waiting
1631 */
1632 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
1633 }
1634
1635 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1636 /* sched_flags checked without lock, but will be rechecked while clearing */
1637 lck_rw_clear_promotion(thread);
1638 }
1639
1640 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1641 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1642
1643 return (FALSE);
1644 }
1645
1646
1647 /*
1648 * Routine: lck_rw_lock_shared_to_exclusive_failure
1649 * Function:
1650 * assembly fast path code has already dropped our read
1651 * count and successfully acquired 'lck_rw_want_upgrade'
1652 * we just need to wait for the rest of the readers to drain
1653 * and then we can return as the exclusive holder of this lock
1654 */
1655 static boolean_t
1656 lck_rw_lock_shared_to_exclusive_success(
1657 lck_rw_t *lck)
1658 {
1659 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1660 uint64_t deadline = 0;
1661 int slept = 0;
1662 int still_shared = 0;
1663 wait_result_t res;
1664 boolean_t istate = -1;
1665
1666 #if CONFIG_DTRACE
1667 uint64_t wait_interval = 0;
1668 int readers_at_sleep = 0;
1669 boolean_t dtrace_ls_initialized = FALSE;
1670 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1671 #endif
1672
1673 while (lck->lck_rw_shared_count != 0) {
1674
1675 #if CONFIG_DTRACE
1676 if (dtrace_ls_initialized == FALSE) {
1677 dtrace_ls_initialized = TRUE;
1678 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1679 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1680 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1681 if (dtrace_ls_enabled) {
1682 /*
1683 * Either sleeping or spinning is happening,
1684 * start a timing of our delay interval now.
1685 */
1686 readers_at_sleep = lck->lck_rw_shared_count;
1687 wait_interval = mach_absolute_time();
1688 }
1689 }
1690 #endif
1691 if (istate == -1)
1692 istate = ml_get_interrupts_enabled();
1693
1694 deadline = lck_rw_deadline_for_spin(lck);
1695
1696 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1697 trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
1698
1699 while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline)
1700 lck_rw_lock_pause(istate);
1701
1702 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1703 trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
1704
1705 if ( !still_shared)
1706 break;
1707 /*
1708 * if we get here, the deadline has expired w/o
1709 * the rw_shared_count having drained to 0
1710 * check to see if we're allowed to do a thread_block
1711 */
1712 if (lck->lck_rw_can_sleep) {
1713
1714 istate = lck_interlock_lock(lck);
1715
1716 if (lck->lck_rw_shared_count != 0) {
1717 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1718 trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
1719
1720 lck->lck_w_waiting = TRUE;
1721
1722 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
1723 res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
1724 lck_interlock_unlock(lck, istate);
1725
1726 if (res == THREAD_WAITING) {
1727 res = thread_block(THREAD_CONTINUE_NULL);
1728 slept++;
1729 }
1730 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1731 trace_lck, res, slept, 0, 0);
1732 } else {
1733 lck_interlock_unlock(lck, istate);
1734 break;
1735 }
1736 }
1737 }
1738 #if CONFIG_DTRACE
1739 /*
1740 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1741 */
1742 if (dtrace_ls_enabled == TRUE) {
1743 if (slept == 0) {
1744 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1745 } else {
1746 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck,
1747 mach_absolute_time() - wait_interval, 1,
1748 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1749 }
1750 }
1751 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
1752 #endif
1753 return (TRUE);
1754 }
1755
1756 /*
1757 * Routine: lck_rw_lock_exclusive_to_shared
1758 */
1759
1760 void lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
1761 {
1762 uint32_t data, prev;
1763
1764 for ( ; ; ) {
1765 data = atomic_exchange_begin32(&lock->data, &prev, memory_order_release_smp);
1766 if (data & LCK_RW_INTERLOCK) {
1767 atomic_exchange_abort();
1768 lck_rw_interlock_spin(lock); /* wait for interlock to clear */
1769 continue;
1770 }
1771 data += LCK_RW_SHARED_READER;
1772 if (data & LCK_RW_WANT_UPGRADE)
1773 data &= ~(LCK_RW_WANT_UPGRADE);
1774 else
1775 data &= ~(LCK_RW_WANT_EXCL);
1776 if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL)))
1777 data &= ~(LCK_RW_W_WAITING);
1778 if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_release_smp))
1779 break;
1780 cpu_pause();
1781 }
1782 return lck_rw_lock_exclusive_to_shared_gen(lock, prev);
1783 }
1784
1785
1786 /*
1787 * Routine: lck_rw_lock_exclusive_to_shared_gen
1788 * Function:
1789 * assembly fast path has already dropped
1790 * our exclusive state and bumped lck_rw_shared_count
1791 * all we need to do here is determine if anyone
1792 * needs to be awakened.
1793 */
1794 static void
1795 lck_rw_lock_exclusive_to_shared_gen(
1796 lck_rw_t *lck,
1797 uint32_t prior_lock_state)
1798 {
1799 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1800 lck_rw_t *fake_lck;
1801
1802 fake_lck = (lck_rw_t *)&prior_lock_state;
1803
1804 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1805 trace_lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0);
1806
1807 /*
1808 * don't wake up anyone waiting to take the lock exclusively
1809 * since we hold a read count... when the read count drops to 0,
1810 * the writers will be woken.
1811 *
1812 * wake up any waiting readers if we don't have any writers waiting,
1813 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1814 */
1815 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
1816 thread_wakeup(RW_LOCK_READER_EVENT(lck));
1817
1818 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1819 trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
1820
1821 #if CONFIG_DTRACE
1822 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1823 #endif
1824 }
1825
1826
1827 /*
1828 * Routine: lck_rw_try_lock
1829 */
1830 boolean_t
1831 lck_rw_try_lock(
1832 lck_rw_t *lck,
1833 lck_rw_type_t lck_rw_type)
1834 {
1835 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1836 return(lck_rw_try_lock_shared(lck));
1837 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1838 return(lck_rw_try_lock_exclusive(lck));
1839 else
1840 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type);
1841 return(FALSE);
1842 }
1843
1844 /*
1845 * Routine: lck_rw_try_lock_shared
1846 */
1847
1848 boolean_t lck_rw_try_lock_shared(lck_rw_t *lock)
1849 {
1850 uint32_t data, prev;
1851
1852 for ( ; ; ) {
1853 data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
1854 if (data & LCK_RW_INTERLOCK) {
1855 atomic_exchange_abort();
1856 lck_rw_interlock_spin(lock);
1857 continue;
1858 }
1859 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1860 atomic_exchange_abort();
1861 return FALSE; /* lock is busy */
1862 }
1863 data += LCK_RW_SHARED_READER; /* Increment reader refcount */
1864 if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
1865 break;
1866 cpu_pause();
1867 }
1868 current_thread()->rwlock_count++;
1869 /* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
1870 #if CONFIG_DTRACE
1871 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1872 #endif /* CONFIG_DTRACE */
1873 return TRUE;
1874 }
1875
1876
1877 /*
1878 * Routine: lck_rw_try_lock_exclusive
1879 */
1880
1881 boolean_t lck_rw_try_lock_exclusive(lck_rw_t *lock)
1882 {
1883 uint32_t data, prev;
1884
1885 for ( ; ; ) {
1886 data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
1887 if (data & LCK_RW_INTERLOCK) {
1888 atomic_exchange_abort();
1889 lck_rw_interlock_spin(lock);
1890 continue;
1891 }
1892 if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1893 atomic_exchange_abort();
1894 return FALSE; /* can't get it */
1895 }
1896 data |= LCK_RW_WANT_EXCL;
1897 if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
1898 break;
1899 cpu_pause();
1900 }
1901
1902 current_thread()->rwlock_count++;
1903 #if CONFIG_DTRACE
1904 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1905 #endif /* CONFIG_DTRACE */
1906 return TRUE;
1907 }
1908
1909
1910 void
1911 lck_rw_assert(
1912 lck_rw_t *lck,
1913 unsigned int type)
1914 {
1915 switch (type) {
1916 case LCK_RW_ASSERT_SHARED:
1917 if (lck->lck_rw_shared_count != 0) {
1918 return;
1919 }
1920 break;
1921 case LCK_RW_ASSERT_EXCLUSIVE:
1922 if ((lck->lck_rw_want_write ||
1923 lck->lck_rw_want_upgrade) &&
1924 lck->lck_rw_shared_count == 0) {
1925 return;
1926 }
1927 break;
1928 case LCK_RW_ASSERT_HELD:
1929 if (lck->lck_rw_want_write ||
1930 lck->lck_rw_want_upgrade ||
1931 lck->lck_rw_shared_count != 0) {
1932 return;
1933 }
1934 break;
1935 case LCK_RW_ASSERT_NOTHELD:
1936 if (!(lck->lck_rw_want_write ||
1937 lck->lck_rw_want_upgrade ||
1938 lck->lck_rw_shared_count != 0)) {
1939 return;
1940 }
1941 break;
1942 default:
1943 break;
1944 }
1945
1946 panic("rw lock (%p)%s held (mode=%u), first word %08x\n", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type, *(uint32_t *)lck);
1947 }
1948
1949 /* On return to userspace, this routine is called if the rwlock_count is somehow imbalanced */
1950 void
1951 lck_rw_clear_promotions_x86(thread_t thread)
1952 {
1953 #if MACH_LDEBUG
1954 /* It's fatal to leave a RW lock locked and return to userspace */
1955 panic("%u rw lock(s) held on return to userspace for thread %p", thread->rwlock_count, thread);
1956 #else
1957 /* Paper over the issue */
1958 thread->rwlock_count = 0;
1959 lck_rw_clear_promotion(thread);
1960 #endif
1961 }
1962
1963 boolean_t
1964 lck_rw_lock_yield_shared(lck_rw_t *lck, boolean_t force_yield)
1965 {
1966 lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
1967
1968 if (lck->lck_rw_want_write || lck->lck_rw_want_upgrade || force_yield) {
1969 lck_rw_unlock_shared(lck);
1970 mutex_pause(2);
1971 lck_rw_lock_shared(lck);
1972 return TRUE;
1973 }
1974
1975 return FALSE;
1976 }
1977
1978 /*
1979 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
1980 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
1981 */
1982 boolean_t
1983 kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck) {
1984 if (not_in_kdp) {
1985 panic("panic: rw lock exclusive check done outside of kernel debugger");
1986 }
1987 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_write) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
1988 }
1989
1990
1991 #ifdef MUTEX_ZONE
1992 extern zone_t lck_mtx_zone;
1993 #endif
1994 /*
1995 * Routine: lck_mtx_alloc_init
1996 */
1997 lck_mtx_t *
1998 lck_mtx_alloc_init(
1999 lck_grp_t *grp,
2000 lck_attr_t *attr)
2001 {
2002 lck_mtx_t *lck;
2003 #ifdef MUTEX_ZONE
2004 if ((lck = (lck_mtx_t *)zalloc(lck_mtx_zone)) != 0)
2005 lck_mtx_init(lck, grp, attr);
2006 #else
2007 if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0)
2008 lck_mtx_init(lck, grp, attr);
2009 #endif
2010 return(lck);
2011 }
2012
2013 /*
2014 * Routine: lck_mtx_free
2015 */
2016 void
2017 lck_mtx_free(
2018 lck_mtx_t *lck,
2019 lck_grp_t *grp)
2020 {
2021 lck_mtx_destroy(lck, grp);
2022 #ifdef MUTEX_ZONE
2023 zfree(lck_mtx_zone, lck);
2024 #else
2025 kfree(lck, sizeof(lck_mtx_t));
2026 #endif
2027 }
2028
2029 /*
2030 * Routine: lck_mtx_ext_init
2031 */
2032 static void
2033 lck_mtx_ext_init(
2034 lck_mtx_ext_t *lck,
2035 lck_grp_t *grp,
2036 lck_attr_t *attr)
2037 {
2038 bzero((void *)lck, sizeof(lck_mtx_ext_t));
2039
2040 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2041 lck->lck_mtx_deb.type = MUTEX_TAG;
2042 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
2043 }
2044
2045 lck->lck_mtx_grp = grp;
2046
2047 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
2048 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
2049
2050 lck->lck_mtx.lck_mtx_is_ext = 1;
2051 lck->lck_mtx.lck_mtx_pad32 = 0xFFFFFFFF;
2052 }
2053
2054 /*
2055 * Routine: lck_mtx_init
2056 */
2057 void
2058 lck_mtx_init(
2059 lck_mtx_t *lck,
2060 lck_grp_t *grp,
2061 lck_attr_t *attr)
2062 {
2063 lck_mtx_ext_t *lck_ext;
2064 lck_attr_t *lck_attr;
2065
2066 if (attr != LCK_ATTR_NULL)
2067 lck_attr = attr;
2068 else
2069 lck_attr = &LockDefaultLckAttr;
2070
2071 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2072 if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) {
2073 lck_mtx_ext_init(lck_ext, grp, lck_attr);
2074 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2075 lck->lck_mtx_ptr = lck_ext;
2076 }
2077 } else {
2078 lck->lck_mtx_owner = 0;
2079 lck->lck_mtx_state = 0;
2080 }
2081 lck->lck_mtx_pad32 = 0xFFFFFFFF;
2082 lck_grp_reference(grp);
2083 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2084 }
2085
2086 /*
2087 * Routine: lck_mtx_init_ext
2088 */
2089 void
2090 lck_mtx_init_ext(
2091 lck_mtx_t *lck,
2092 lck_mtx_ext_t *lck_ext,
2093 lck_grp_t *grp,
2094 lck_attr_t *attr)
2095 {
2096 lck_attr_t *lck_attr;
2097
2098 if (attr != LCK_ATTR_NULL)
2099 lck_attr = attr;
2100 else
2101 lck_attr = &LockDefaultLckAttr;
2102
2103 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2104 lck_mtx_ext_init(lck_ext, grp, lck_attr);
2105 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2106 lck->lck_mtx_ptr = lck_ext;
2107 } else {
2108 lck->lck_mtx_owner = 0;
2109 lck->lck_mtx_state = 0;
2110 }
2111 lck->lck_mtx_pad32 = 0xFFFFFFFF;
2112
2113 lck_grp_reference(grp);
2114 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2115 }
2116
2117 /*
2118 * Routine: lck_mtx_destroy
2119 */
2120 void
2121 lck_mtx_destroy(
2122 lck_mtx_t *lck,
2123 lck_grp_t *grp)
2124 {
2125 boolean_t lck_is_indirect;
2126
2127 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
2128 return;
2129 #if MACH_LDEBUG
2130 lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
2131 #endif
2132 lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
2133
2134 lck_mtx_lock_mark_destroyed(lck);
2135
2136 if (lck_is_indirect)
2137 kfree(lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t));
2138 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
2139 lck_grp_deallocate(grp);
2140 return;
2141 }
2142
2143
2144 #define LCK_MTX_LCK_WAIT_CODE 0x20
2145 #define LCK_MTX_LCK_WAKEUP_CODE 0x21
2146 #define LCK_MTX_LCK_SPIN_CODE 0x22
2147 #define LCK_MTX_LCK_ACQUIRE_CODE 0x23
2148 #define LCK_MTX_LCK_DEMOTE_CODE 0x24
2149
2150
2151 /*
2152 * Routine: lck_mtx_unlock_wakeup_x86
2153 *
2154 * Invoked on unlock when there is
2155 * contention (i.e. the assembly routine sees that
2156 * that mutex->lck_mtx_waiters != 0 or
2157 * that mutex->lck_mtx_promoted != 0...
2158 *
2159 * neither the mutex or interlock is held
2160 */
2161 void
2162 lck_mtx_unlock_wakeup_x86 (
2163 lck_mtx_t *mutex,
2164 int prior_lock_state)
2165 {
2166 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
2167 lck_mtx_t fake_lck;
2168
2169 /*
2170 * prior_lock state is a snapshot of the 2nd word of the
2171 * lock in question... we'll fake up a lock with the bits
2172 * copied into place and carefully not access anything
2173 * beyond whats defined in the second word of a lck_mtx_t
2174 */
2175 fake_lck.lck_mtx_state = prior_lock_state;
2176
2177 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
2178 trace_lck, fake_lck.lck_mtx_promoted, fake_lck.lck_mtx_waiters, fake_lck.lck_mtx_pri, 0);
2179
2180 if (__probable(fake_lck.lck_mtx_waiters)) {
2181 if (fake_lck.lck_mtx_waiters > 1)
2182 thread_wakeup_one_with_pri(LCK_MTX_EVENT(mutex), fake_lck.lck_mtx_pri);
2183 else
2184 thread_wakeup_one(LCK_MTX_EVENT(mutex));
2185 }
2186
2187 if (__improbable(fake_lck.lck_mtx_promoted)) {
2188 thread_t thread = current_thread();
2189
2190
2191 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_DEMOTE_CODE) | DBG_FUNC_NONE,
2192 thread_tid(thread), thread->promotions, thread->sched_flags & TH_SFLAG_PROMOTED, 0, 0);
2193
2194 if (thread->promotions > 0) {
2195 spl_t s = splsched();
2196
2197 thread_lock(thread);
2198
2199 if (--thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED)) {
2200
2201 thread->sched_flags &= ~TH_SFLAG_PROMOTED;
2202
2203 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
2204 /* Thread still has a RW lock promotion */
2205 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
2206 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
2207 thread->sched_pri, DEPRESSPRI, 0, trace_lck, 0);
2208
2209 set_sched_pri(thread, DEPRESSPRI);
2210 }
2211 else {
2212 if (thread->base_pri < thread->sched_pri) {
2213 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
2214 thread->sched_pri, thread->base_pri, 0, trace_lck, 0);
2215
2216 thread_recompute_sched_pri(thread, FALSE);
2217 }
2218 }
2219 }
2220 thread_unlock(thread);
2221 splx(s);
2222 }
2223 }
2224 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
2225 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
2226 }
2227
2228
2229 /*
2230 * Routine: lck_mtx_lock_acquire_x86
2231 *
2232 * Invoked on acquiring the mutex when there is
2233 * contention (i.e. the assembly routine sees that
2234 * that mutex->lck_mtx_waiters != 0 or
2235 * thread->was_promoted_on_wakeup != 0)...
2236 *
2237 * mutex is owned... interlock is held... preemption is disabled
2238 */
2239 void
2240 lck_mtx_lock_acquire_x86(
2241 lck_mtx_t *mutex)
2242 {
2243 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
2244 thread_t thread;
2245 integer_t priority;
2246 spl_t s;
2247
2248 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
2249 trace_lck, thread->was_promoted_on_wakeup, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
2250
2251 if (mutex->lck_mtx_waiters)
2252 priority = mutex->lck_mtx_pri;
2253 else
2254 priority = 0;
2255
2256 thread = (thread_t)mutex->lck_mtx_owner; /* faster then current_thread() */
2257
2258 if (thread->sched_pri < priority || thread->was_promoted_on_wakeup) {
2259
2260 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
2261 thread->sched_pri, priority, thread->was_promoted_on_wakeup, trace_lck, 0);
2262
2263 s = splsched();
2264 thread_lock(thread);
2265
2266 if (thread->sched_pri < priority) {
2267 /* Do not promote past promotion ceiling */
2268 assert(priority <= MAXPRI_PROMOTE);
2269 set_sched_pri(thread, priority);
2270 }
2271 if (mutex->lck_mtx_promoted == 0) {
2272 mutex->lck_mtx_promoted = 1;
2273
2274 thread->promotions++;
2275 thread->sched_flags |= TH_SFLAG_PROMOTED;
2276 }
2277 thread->was_promoted_on_wakeup = 0;
2278
2279 thread_unlock(thread);
2280 splx(s);
2281 }
2282 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
2283 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
2284 }
2285
2286
2287 static int
2288 lck_mtx_interlock_try_lock(lck_mtx_t *mutex, boolean_t *istate)
2289 {
2290 int retval;
2291
2292 *istate = ml_set_interrupts_enabled(FALSE);
2293 retval = lck_mtx_ilk_try_lock(mutex);
2294
2295 if (retval == 0)
2296 ml_set_interrupts_enabled(*istate);
2297
2298 return retval;
2299 }
2300
2301 static void
2302 lck_mtx_interlock_unlock(lck_mtx_t *mutex, boolean_t istate)
2303 {
2304 lck_mtx_ilk_unlock(mutex);
2305 ml_set_interrupts_enabled(istate);
2306 }
2307
2308
2309 /*
2310 * Routine: lck_mtx_lock_spinwait_x86
2311 *
2312 * Invoked trying to acquire a mutex when there is contention but
2313 * the holder is running on another processor. We spin for up to a maximum
2314 * time waiting for the lock to be released.
2315 *
2316 * Called with the interlock unlocked.
2317 * returns 0 if mutex acquired
2318 * returns 1 if we spun
2319 * returns 2 if we didn't spin due to the holder not running
2320 */
2321 int
2322 lck_mtx_lock_spinwait_x86(
2323 lck_mtx_t *mutex)
2324 {
2325 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
2326 thread_t holder;
2327 uint64_t overall_deadline;
2328 uint64_t check_owner_deadline;
2329 uint64_t cur_time;
2330 int retval = 1;
2331 int loopcount = 0;
2332
2333 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
2334 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, 0, 0);
2335
2336 cur_time = mach_absolute_time();
2337 overall_deadline = cur_time + MutexSpin;
2338 check_owner_deadline = cur_time;
2339
2340 /*
2341 * Spin while:
2342 * - mutex is locked, and
2343 * - its locked as a spin lock, and
2344 * - owner is running on another processor, and
2345 * - owner (processor) is not idling, and
2346 * - we haven't spun for long enough.
2347 */
2348 do {
2349 if (__probable(lck_mtx_lock_grab_mutex(mutex))) {
2350 retval = 0;
2351 break;
2352 }
2353 cur_time = mach_absolute_time();
2354
2355 if (cur_time >= overall_deadline)
2356 break;
2357
2358 if (cur_time >= check_owner_deadline && mutex->lck_mtx_owner) {
2359 boolean_t istate;
2360
2361 if (lck_mtx_interlock_try_lock(mutex, &istate)) {
2362
2363 if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) {
2364
2365 if ( !(holder->machine.specFlags & OnProc) ||
2366 (holder->state & TH_IDLE)) {
2367
2368 lck_mtx_interlock_unlock(mutex, istate);
2369
2370 if (loopcount == 0)
2371 retval = 2;
2372 break;
2373 }
2374 }
2375 lck_mtx_interlock_unlock(mutex, istate);
2376
2377 check_owner_deadline = cur_time + (MutexSpin / 4);
2378 }
2379 }
2380 cpu_pause();
2381
2382 loopcount++;
2383
2384 } while (TRUE);
2385
2386 #if CONFIG_DTRACE
2387 /*
2388 * We've already kept a count via overall_deadline of how long we spun.
2389 * If dtrace is active, then we compute backwards to decide how
2390 * long we spun.
2391 *
2392 * Note that we record a different probe id depending on whether
2393 * this is a direct or indirect mutex. This allows us to
2394 * penalize only lock groups that have debug/stats enabled
2395 * with dtrace processing if desired.
2396 */
2397 if (__probable(mutex->lck_mtx_is_ext == 0)) {
2398 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex,
2399 mach_absolute_time() - (overall_deadline - MutexSpin));
2400 } else {
2401 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, mutex,
2402 mach_absolute_time() - (overall_deadline - MutexSpin));
2403 }
2404 /* The lockstat acquire event is recorded by the assembly code beneath us. */
2405 #endif
2406
2407 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
2408 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, retval, 0);
2409
2410 return retval;
2411 }
2412
2413
2414
2415 /*
2416 * Routine: lck_mtx_lock_wait_x86
2417 *
2418 * Invoked in order to wait on contention.
2419 *
2420 * Called with the interlock locked and
2421 * preemption disabled...
2422 * returns it unlocked and with preemption enabled
2423 */
2424 void
2425 lck_mtx_lock_wait_x86 (
2426 lck_mtx_t *mutex)
2427 {
2428 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
2429 thread_t self = current_thread();
2430 thread_t holder;
2431 integer_t priority;
2432 spl_t s;
2433 #if CONFIG_DTRACE
2434 uint64_t sleep_start = 0;
2435
2436 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
2437 sleep_start = mach_absolute_time();
2438 }
2439 #endif
2440 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
2441 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
2442
2443 priority = self->sched_pri;
2444
2445 if (priority < self->base_pri)
2446 priority = self->base_pri;
2447 if (priority < BASEPRI_DEFAULT)
2448 priority = BASEPRI_DEFAULT;
2449
2450 /* Do not promote past promotion ceiling */
2451 priority = MIN(priority, MAXPRI_PROMOTE);
2452
2453 if (mutex->lck_mtx_waiters == 0 || priority > mutex->lck_mtx_pri)
2454 mutex->lck_mtx_pri = priority;
2455 mutex->lck_mtx_waiters++;
2456
2457 if ( (holder = (thread_t)mutex->lck_mtx_owner) &&
2458 holder->sched_pri < mutex->lck_mtx_pri ) {
2459 s = splsched();
2460 thread_lock(holder);
2461
2462 /* holder priority may have been bumped by another thread
2463 * before thread_lock was taken
2464 */
2465 if (holder->sched_pri < mutex->lck_mtx_pri) {
2466 KERNEL_DEBUG_CONSTANT(
2467 MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
2468 holder->sched_pri, priority, thread_tid(holder), trace_lck, 0);
2469 /* Assert that we're not altering the priority of a
2470 * thread above the MAXPRI_PROMOTE band
2471 */
2472 assert(holder->sched_pri < MAXPRI_PROMOTE);
2473 set_sched_pri(holder, priority);
2474
2475 if (mutex->lck_mtx_promoted == 0) {
2476 holder->promotions++;
2477 holder->sched_flags |= TH_SFLAG_PROMOTED;
2478
2479 mutex->lck_mtx_promoted = 1;
2480 }
2481 }
2482 thread_unlock(holder);
2483 splx(s);
2484 }
2485 thread_set_pending_block_hint(self, kThreadWaitKernelMutex);
2486 assert_wait(LCK_MTX_EVENT(mutex), THREAD_UNINT);
2487
2488 lck_mtx_ilk_unlock(mutex);
2489
2490 thread_block(THREAD_CONTINUE_NULL);
2491
2492 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
2493 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
2494
2495 #if CONFIG_DTRACE
2496 /*
2497 * Record the Dtrace lockstat probe for blocking, block time
2498 * measured from when we were entered.
2499 */
2500 if (sleep_start) {
2501 if (mutex->lck_mtx_is_ext == 0) {
2502 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, mutex,
2503 mach_absolute_time() - sleep_start);
2504 } else {
2505 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, mutex,
2506 mach_absolute_time() - sleep_start);
2507 }
2508 }
2509 #endif
2510 }
2511
2512 /*
2513 * Routine: kdp_lck_mtx_lock_spin_is_acquired
2514 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2515 * Returns: TRUE if lock is acquired.
2516 */
2517 boolean_t
2518 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
2519 {
2520 if (not_in_kdp) {
2521 panic("panic: kdp_lck_mtx_lock_spin_is_acquired called outside of kernel debugger");
2522 }
2523
2524 if (lck->lck_mtx_ilocked || lck->lck_mtx_mlocked) {
2525 return TRUE;
2526 }
2527
2528 return FALSE;
2529 }
2530
2531 void
2532 kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2533 {
2534 lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
2535 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
2536 thread_t holder = (thread_t)mutex->lck_mtx_owner;
2537 waitinfo->owner = thread_tid(holder);
2538 }
2539
2540 void
2541 kdp_rwlck_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2542 {
2543 lck_rw_t *rwlck = NULL;
2544 switch(waitinfo->wait_type) {
2545 case kThreadWaitKernelRWLockRead:
2546 rwlck = READ_EVENT_TO_RWLOCK(event);
2547 break;
2548 case kThreadWaitKernelRWLockWrite:
2549 case kThreadWaitKernelRWLockUpgrade:
2550 rwlck = WRITE_EVENT_TO_RWLOCK(event);
2551 break;
2552 default:
2553 panic("%s was called with an invalid blocking type", __FUNCTION__);
2554 break;
2555 }
2556 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
2557 waitinfo->owner = 0;
2558 }