]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/locks_i386.c
xnu-6153.11.26.tar.gz
[apple/xnu.git] / osfmk / i386 / locks_i386.c
CommitLineData
91447636 1/*
cb323159 2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
91447636 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
0a7de745 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
0a7de745 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
0a7de745 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
0a7de745 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
91447636
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
0a7de745 31/*
91447636
A
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
0a7de745 35 *
91447636
A
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
0a7de745 41 *
91447636
A
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
0a7de745 45 *
91447636 46 * Carnegie Mellon requests users of this software to return to
0a7de745 47 *
91447636
A
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
0a7de745 52 *
91447636
A
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 * File: kern/lock.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * Date: 1985
60 *
61 * Locking primitives implementation
62 */
63
d9a64523
A
64#define LOCK_PRIVATE 1
65
91447636
A
66#include <mach_ldebug.h>
67
0a7de745 68#include <kern/lock_stat.h>
91447636
A
69#include <kern/locks.h>
70#include <kern/kalloc.h>
71#include <kern/misc_protos.h>
72#include <kern/thread.h>
73#include <kern/processor.h>
74#include <kern/cpu_data.h>
75#include <kern/cpu_number.h>
76#include <kern/sched_prim.h>
91447636
A
77#include <kern/debug.h>
78#include <string.h>
79
060df5ea 80#include <i386/machine_routines.h> /* machine_timeout_suspended() */
5ba3f43e 81#include <machine/atomic.h>
b0d623f7 82#include <machine/machine_cpu.h>
060df5ea 83#include <i386/mp.h>
d9a64523 84#include <machine/atomic.h>
91447636 85#include <sys/kdebug.h>
d9a64523 86#include <i386/locks_i386_inlines.h>
91447636 87
cb323159
A
88#if CONFIG_DTRACE
89#define DTRACE_RW_SHARED 0x0 //reader
90#define DTRACE_RW_EXCL 0x1 //writer
91#define DTRACE_NO_FLAG 0x0 //not applicable
0a7de745 92#endif /* CONFIG_DTRACE */
2d21ac55 93
cb323159
A
94#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
95#define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
96#define LCK_RW_LCK_SHARED_CODE 0x102
97#define LCK_RW_LCK_SH_TO_EX_CODE 0x103
98#define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
99#define LCK_RW_LCK_EX_TO_SH_CODE 0x105
91447636 100
cb323159
A
101#define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
102#define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
103#define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
104#define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
105#define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
106#define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
107#define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
108#define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
b0d623f7 109
91447636 110
cb323159 111#define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
91447636 112
cb323159 113unsigned int LcksOpts=0;
91447636 114
5ba3f43e
A
115#if DEVELOPMENT || DEBUG
116unsigned int LckDisablePreemptCheck = 0;
117#endif
118
91447636
A
119/* Forwards */
120
cb323159 121#if USLOCK_DEBUG
91447636
A
122/*
123 * Perform simple lock checks.
124 */
cb323159
A
125int uslock_check = 1;
126int max_lock_loops = 100000000;
127decl_simple_lock_data(extern , printf_lock);
128decl_simple_lock_data(extern , panic_lock);
129#endif /* USLOCK_DEBUG */
91447636 130
fe8ab488 131extern unsigned int not_in_kdp;
91447636
A
132
133/*
134 * We often want to know the addresses of the callers
135 * of the various lock routines. However, this information
136 * is only used for debugging and statistics.
137 */
cb323159
A
138typedef void *pc_t;
139#define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
140#define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
141#if ANY_LOCK_DEBUG
142#define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
143#define DECL_PC(pc) pc_t pc;
144#else /* ANY_LOCK_DEBUG */
91447636 145#define DECL_PC(pc)
cb323159 146#ifdef lint
91447636
A
147/*
148 * Eliminate lint complaints about unused local pc variables.
149 */
cb323159
A
150#define OBTAIN_PC(pc) ++pc
151#else /* lint */
152#define OBTAIN_PC(pc)
153#endif /* lint */
154#endif /* USLOCK_DEBUG */
91447636 155
5ba3f43e
A
156/*
157 * atomic exchange API is a low level abstraction of the operations
158 * to atomically read, modify, and write a pointer. This abstraction works
159 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
160 * well as the ARM exclusive instructions.
161 *
162 * atomic_exchange_begin() - begin exchange and retrieve current value
163 * atomic_exchange_complete() - conclude an exchange
164 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
165 */
166static uint32_t
167atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
168{
cb323159 169 uint32_t val;
5ba3f43e 170
cb323159
A
171 (void)ord; // Memory order not used
172 val = os_atomic_load(target, relaxed);
5ba3f43e
A
173 *previous = val;
174 return val;
175}
176
177static boolean_t
178atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
179{
180 return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
181}
182
183static void
cb323159 184atomic_exchange_abort(void) { }
5ba3f43e
A
185
186static boolean_t
187atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
188{
cb323159 189 uint32_t value, prev;
5ba3f43e 190
cb323159 191 for ( ; ; ) {
5ba3f43e
A
192 value = atomic_exchange_begin32(target, &prev, ord);
193 if (value & test_mask) {
cb323159 194 if (wait)
5ba3f43e 195 cpu_pause();
cb323159 196 else
5ba3f43e
A
197 atomic_exchange_abort();
198 return FALSE;
199 }
200 value |= set_mask;
cb323159 201 if (atomic_exchange_complete32(target, prev, value, ord))
5ba3f43e
A
202 return TRUE;
203 }
204}
91447636 205
cb323159
A
206inline boolean_t
207hw_atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
208{
209 return atomic_test_and_set32(target, test_mask, set_mask, ord, wait);
210}
211
91447636
A
212/*
213 * Portable lock package implementation of usimple_locks.
214 */
215
cb323159
A
216#if USLOCK_DEBUG
217#define USLDBG(stmt) stmt
218void usld_lock_init(usimple_lock_t, unsigned short);
219void usld_lock_pre(usimple_lock_t, pc_t);
220void usld_lock_post(usimple_lock_t, pc_t);
221void usld_unlock(usimple_lock_t, pc_t);
222void usld_lock_try_pre(usimple_lock_t, pc_t);
223void usld_lock_try_post(usimple_lock_t, pc_t);
224int usld_lock_common_checks(usimple_lock_t, char *);
225#else /* USLOCK_DEBUG */
226#define USLDBG(stmt)
227#endif /* USLOCK_DEBUG */
91447636 228
2d21ac55
A
229/*
230 * Forward definitions
231 */
232
5ba3f43e
A
233static void lck_rw_lock_shared_gen(lck_rw_t *lck);
234static void lck_rw_lock_exclusive_gen(lck_rw_t *lck);
235static boolean_t lck_rw_lock_shared_to_exclusive_success(lck_rw_t *lck);
236static boolean_t lck_rw_lock_shared_to_exclusive_failure(lck_rw_t *lck, uint32_t prior_lock_state);
237static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t *lck, uint32_t prior_lock_state);
238static lck_rw_type_t lck_rw_done_gen(lck_rw_t *lck, uint32_t prior_lock_state);
39236c6e 239void lck_rw_clear_promotions_x86(thread_t thread);
5ba3f43e
A
240static boolean_t lck_rw_held_read_or_upgrade(lck_rw_t *lock);
241static boolean_t lck_rw_grab_want(lck_rw_t *lock);
242static boolean_t lck_rw_grab_shared(lck_rw_t *lock);
cb323159 243static void lck_mtx_unlock_wakeup_tail(lck_mtx_t *mutex, uint32_t state, boolean_t indirect);
d9a64523
A
244static void lck_mtx_interlock_lock(lck_mtx_t *mutex, uint32_t *new_state);
245static void lck_mtx_interlock_lock_clear_flags(lck_mtx_t *mutex, uint32_t and_flags, uint32_t *new_state);
246static int lck_mtx_interlock_try_lock(lck_mtx_t *mutex, uint32_t *new_state);
247static int lck_mtx_interlock_try_lock_set_flags(lck_mtx_t *mutex, uint32_t or_flags, uint32_t *new_state);
248static boolean_t lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
249static boolean_t lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
250
39236c6e 251
91447636
A
252/*
253 * Routine: lck_spin_alloc_init
254 */
255lck_spin_t *
256lck_spin_alloc_init(
cb323159
A
257 lck_grp_t *grp,
258 lck_attr_t *attr)
91447636 259{
cb323159 260 lck_spin_t *lck;
91447636 261
cb323159 262 if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0)
91447636
A
263 lck_spin_init(lck, grp, attr);
264
cb323159 265 return(lck);
91447636
A
266}
267
268/*
269 * Routine: lck_spin_free
270 */
271void
272lck_spin_free(
cb323159
A
273 lck_spin_t *lck,
274 lck_grp_t *grp)
91447636
A
275{
276 lck_spin_destroy(lck, grp);
277 kfree(lck, sizeof(lck_spin_t));
278}
279
280/*
281 * Routine: lck_spin_init
282 */
283void
284lck_spin_init(
cb323159
A
285 lck_spin_t *lck,
286 lck_grp_t *grp,
287 __unused lck_attr_t *attr)
91447636
A
288{
289 usimple_lock_init((usimple_lock_t) lck, 0);
cb323159
A
290 if (grp) {
291 lck_grp_reference(grp);
292 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
293 }
91447636
A
294}
295
296/*
297 * Routine: lck_spin_destroy
298 */
299void
300lck_spin_destroy(
cb323159
A
301 lck_spin_t *lck,
302 lck_grp_t *grp)
91447636 303{
cb323159 304 if (lck->interlock == LCK_SPIN_TAG_DESTROYED)
91447636 305 return;
b0d623f7 306 lck->interlock = LCK_SPIN_TAG_DESTROYED;
cb323159
A
307 if (grp) {
308 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
309 lck_grp_deallocate(grp);
310 }
91447636
A
311 return;
312}
313
314/*
315 * Routine: lck_spin_lock
316 */
0a7de745
A
317void
318lck_spin_lock_grp(
cb323159
A
319 lck_spin_t *lck,
320 lck_grp_t *grp)
0a7de745
A
321{
322#pragma unused(grp)
323 usimple_lock((usimple_lock_t) lck, grp);
324}
325
91447636
A
326void
327lck_spin_lock(
cb323159 328 lck_spin_t *lck)
91447636 329{
0a7de745 330 usimple_lock((usimple_lock_t) lck, NULL);
91447636
A
331}
332
333/*
334 * Routine: lck_spin_unlock
335 */
336void
337lck_spin_unlock(
cb323159 338 lck_spin_t *lck)
91447636
A
339{
340 usimple_unlock((usimple_lock_t) lck);
341}
342
0a7de745
A
343boolean_t
344lck_spin_try_lock_grp(
cb323159
A
345 lck_spin_t *lck,
346 lck_grp_t *grp)
0a7de745
A
347{
348#pragma unused(grp)
349 boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, grp);
cb323159 350#if DEVELOPMENT || DEBUG
0a7de745
A
351 if (lrval) {
352 pltrace(FALSE);
353 }
354#endif
cb323159 355 return(lrval);
0a7de745
A
356}
357
91447636
A
358
359/*
360 * Routine: lck_spin_try_lock
361 */
362boolean_t
363lck_spin_try_lock(
cb323159 364 lck_spin_t *lck)
91447636 365{
0a7de745 366 boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, LCK_GRP_NULL);
cb323159 367#if DEVELOPMENT || DEBUG
39037602
A
368 if (lrval) {
369 pltrace(FALSE);
370 }
371#endif
cb323159 372 return(lrval);
39037602
A
373}
374
375/*
376 * Routine: lck_spin_assert
377 */
378void
379lck_spin_assert(lck_spin_t *lock, unsigned int type)
380{
381 thread_t thread, holder;
382 uintptr_t state;
383
384 if (__improbable(type != LCK_ASSERT_OWNED && type != LCK_ASSERT_NOTOWNED)) {
385 panic("lck_spin_assert(): invalid arg (%u)", type);
386 }
387
388 state = lock->interlock;
389 holder = (thread_t)state;
390 thread = current_thread();
391 if (type == LCK_ASSERT_OWNED) {
392 if (__improbable(holder == THREAD_NULL)) {
393 panic("Lock not owned %p = %lx", lock, state);
394 }
395 if (__improbable(holder != thread)) {
396 panic("Lock not owned by current thread %p = %lx", lock, state);
397 }
398 } else if (type == LCK_ASSERT_NOTOWNED) {
399 if (__improbable(holder != THREAD_NULL)) {
400 if (holder == thread) {
401 panic("Lock owned by current thread %p = %lx", lock, state);
39037602
A
402 }
403 }
404 }
91447636
A
405}
406
fe8ab488 407/*
3e170ce0 408 * Routine: kdp_lck_spin_is_acquired
fe8ab488
A
409 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
410 * Returns: TRUE if lock is acquired.
411 */
412boolean_t
cb323159 413kdp_lck_spin_is_acquired(lck_spin_t *lck) {
fe8ab488
A
414 if (not_in_kdp) {
415 panic("panic: spinlock acquired check done outside of kernel debugger");
416 }
417 return (lck->interlock != 0)? TRUE : FALSE;
418}
419
91447636
A
420/*
421 * Initialize a usimple_lock.
422 *
423 * No change in preemption state.
424 */
425void
426usimple_lock_init(
cb323159
A
427 usimple_lock_t l,
428 __unused unsigned short tag)
91447636 429{
cb323159 430#ifndef MACHINE_SIMPLE_LOCK
91447636
A
431 USLDBG(usld_lock_init(l, tag));
432 hw_lock_init(&l->interlock);
433#else
cb323159 434 simple_lock_init((simple_lock_t)l,tag);
91447636
A
435#endif
436}
437
060df5ea
A
438volatile uint32_t spinlock_owner_cpu = ~0;
439volatile usimple_lock_t spinlock_timed_out;
440
cb323159 441uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
060df5ea
A
442 uint32_t i;
443
444 for (i = 0; i < real_ncpus; i++) {
a39ff7e2 445 if ((cpu_data_ptr[i] != NULL) && ((uintptr_t)cpu_data_ptr[i]->cpu_active_thread == thread_addr)) {
060df5ea 446 spinlock_owner_cpu = i;
5ba3f43e
A
447 if ((uint32_t) cpu_number() != i) {
448 /* Cause NMI and panic on the owner's cpu */
449 NMIPI_panic(cpu_to_cpumask(i), SPINLOCK_TIMEOUT);
450 }
060df5ea
A
451 break;
452 }
453 }
454
455 return spinlock_owner_cpu;
456}
91447636
A
457
458/*
459 * Acquire a usimple_lock.
460 *
461 * Returns with preemption disabled. Note
462 * that the hw_lock routines are responsible for
463 * maintaining preemption state.
464 */
465void
0a7de745 466(usimple_lock)(
cb323159 467 usimple_lock_t l
0a7de745 468 LCK_GRP_ARG(lck_grp_t *grp))
91447636 469{
cb323159 470#ifndef MACHINE_SIMPLE_LOCK
2d21ac55 471 DECL_PC(pc);
91447636 472
b0d623f7 473 OBTAIN_PC(pc);
91447636 474 USLDBG(usld_lock_pre(l, pc));
6d2010ae 475
cb323159 476 if(__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC, grp) == 0)) {
b0d623f7 477 boolean_t uslock_acquired = FALSE;
060df5ea
A
478 while (machine_timeout_suspended()) {
479 enable_preemption();
cb323159 480 if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC, grp)))
060df5ea 481 break;
6d2010ae
A
482 }
483
060df5ea
A
484 if (uslock_acquired == FALSE) {
485 uint32_t lock_cpu;
7ddcb079 486 uintptr_t lowner = (uintptr_t)l->interlock.lock_data;
060df5ea 487 spinlock_timed_out = l;
7ddcb079 488 lock_cpu = spinlock_timeout_NMI(lowner);
5ba3f43e 489 panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx, time: %llu",
cb323159 490 l, lowner, current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data, mach_absolute_time());
060df5ea 491 }
b0d623f7 492 }
39037602 493#if DEVELOPMENT || DEBUG
cb323159 494 pltrace(FALSE);
39037602
A
495#endif
496
91447636
A
497 USLDBG(usld_lock_post(l, pc));
498#else
0a7de745 499 simple_lock((simple_lock_t)l, grp);
91447636 500#endif
5ba3f43e 501#if CONFIG_DTRACE
0a7de745 502 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0, (uintptr_t)LCK_GRP_PROBEARG(grp));
5ba3f43e 503#endif
91447636
A
504}
505
506
507/*
508 * Release a usimple_lock.
509 *
510 * Returns with preemption enabled. Note
511 * that the hw_lock routines are responsible for
512 * maintaining preemption state.
513 */
514void
515usimple_unlock(
cb323159 516 usimple_lock_t l)
91447636 517{
cb323159 518#ifndef MACHINE_SIMPLE_LOCK
91447636
A
519 DECL_PC(pc);
520
b0d623f7 521 OBTAIN_PC(pc);
91447636 522 USLDBG(usld_unlock(l, pc));
39037602 523#if DEVELOPMENT || DEBUG
cb323159 524 pltrace(TRUE);
39037602 525#endif
91447636
A
526 hw_lock_unlock(&l->interlock);
527#else
528 simple_unlock_rwmb((simple_lock_t)l);
529#endif
530}
531
532
533/*
534 * Conditionally acquire a usimple_lock.
535 *
536 * On success, returns with preemption disabled.
537 * On failure, returns with preemption in the same state
538 * as when first invoked. Note that the hw_lock routines
539 * are responsible for maintaining preemption state.
540 *
541 * XXX No stats are gathered on a miss; I preserved this
542 * behavior from the original assembly-language code, but
543 * doesn't it make sense to log misses? XXX
544 */
545unsigned int
546usimple_lock_try(
cb323159 547 usimple_lock_t l,
0a7de745 548 lck_grp_t *grp)
91447636 549{
cb323159
A
550#ifndef MACHINE_SIMPLE_LOCK
551 unsigned int success;
2d21ac55 552 DECL_PC(pc);
91447636 553
b0d623f7 554 OBTAIN_PC(pc);
91447636 555 USLDBG(usld_lock_try_pre(l, pc));
0a7de745 556 if ((success = hw_lock_try(&l->interlock, grp))) {
39037602
A
557#if DEVELOPMENT || DEBUG
558 pltrace(FALSE);
559#endif
cb323159 560 USLDBG(usld_lock_try_post(l, pc));
91447636
A
561 }
562 return success;
563#else
cb323159 564 return(simple_lock_try((simple_lock_t)l, grp));
91447636
A
565#endif
566}
567
39037602 568/*
cb323159 569 * Acquire a usimple_lock while polling for pending cpu signals
39037602
A
570 * and spinning on a lock.
571 *
572 */
cb323159
A
573unsigned int
574(usimple_lock_try_lock_mp_signal_safe_loop_deadline)(usimple_lock_t l,
575 uint64_t deadline
576 LCK_GRP_ARG(lck_grp_t *grp))
39037602
A
577{
578 boolean_t istate = ml_get_interrupts_enabled();
cb323159
A
579
580 if (deadline < mach_absolute_time()) {
581 return 0;
582 }
583
0a7de745 584 while (!simple_lock_try(l, grp)) {
cb323159
A
585 if (!istate)
586 cpu_signal_handler(NULL);
587
588 if (deadline < mach_absolute_time()) {
589 return 0;
0a7de745 590 }
cb323159 591
39037602
A
592 cpu_pause();
593 }
cb323159
A
594
595 return 1;
596}
597
598void
599(usimple_lock_try_lock_loop)(usimple_lock_t l
600 LCK_GRP_ARG(lck_grp_t *grp))
601{
602 usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, ULLONG_MAX, grp);
39037602
A
603}
604
cb323159
A
605unsigned int
606(usimple_lock_try_lock_mp_signal_safe_loop_duration)(usimple_lock_t l,
607 uint64_t duration
608 LCK_GRP_ARG(lck_grp_t *grp))
609{
610 uint64_t deadline;
611 uint64_t base_at = mach_absolute_time();
612 uint64_t duration_at;
613
614 nanoseconds_to_absolutetime(duration, &duration_at);
615 deadline = base_at + duration_at;
616 if (deadline < base_at) {
617 /* deadline has overflowed, make it saturate */
618 deadline = ULLONG_MAX;
619 }
620
621 return usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, deadline, grp);
622}
623
624#if USLOCK_DEBUG
91447636
A
625/*
626 * States of a usimple_lock. The default when initializing
627 * a usimple_lock is setting it up for debug checking.
628 */
cb323159
A
629#define USLOCK_CHECKED 0x0001 /* lock is being checked */
630#define USLOCK_TAKEN 0x0002 /* lock has been taken */
631#define USLOCK_INIT 0xBAA0 /* lock has been initialized */
632#define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
633#define USLOCK_CHECKING(l) (uslock_check && \
634 ((l)->debug.state & USLOCK_CHECKED))
91447636
A
635
636/*
637 * Initialize the debugging information contained
638 * in a usimple_lock.
639 */
640void
641usld_lock_init(
cb323159
A
642 usimple_lock_t l,
643 __unused unsigned short tag)
91447636 644{
cb323159 645 if (l == USIMPLE_LOCK_NULL)
91447636
A
646 panic("lock initialization: null lock pointer");
647 l->lock_type = USLOCK_TAG;
648 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
649 l->debug.lock_cpu = l->debug.unlock_cpu = 0;
650 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
651 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
652 l->debug.duration[0] = l->debug.duration[1] = 0;
653 l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
654 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
655 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
656}
657
658
659/*
660 * These checks apply to all usimple_locks, not just
661 * those with USLOCK_CHECKED turned on.
662 */
663int
664usld_lock_common_checks(
cb323159
A
665 usimple_lock_t l,
666 char *caller)
91447636 667{
cb323159 668 if (l == USIMPLE_LOCK_NULL)
91447636 669 panic("%s: null lock pointer", caller);
cb323159 670 if (l->lock_type != USLOCK_TAG)
ebb1b9f4 671 panic("%s: %p is not a usimple lock, 0x%x", caller, l, l->lock_type);
cb323159 672 if (!(l->debug.state & USLOCK_INIT))
ebb1b9f4 673 panic("%s: %p is not an initialized lock, 0x%x", caller, l, l->debug.state);
91447636
A
674 return USLOCK_CHECKING(l);
675}
676
677
678/*
679 * Debug checks on a usimple_lock just before attempting
680 * to acquire it.
681 */
682/* ARGSUSED */
683void
684usld_lock_pre(
cb323159
A
685 usimple_lock_t l,
686 pc_t pc)
91447636 687{
cb323159 688 char caller[] = "usimple_lock";
91447636
A
689
690
cb323159 691 if (!usld_lock_common_checks(l, caller))
91447636
A
692 return;
693
694/*
695 * Note that we have a weird case where we are getting a lock when we are]
696 * in the process of putting the system to sleep. We are running with no
697 * current threads, therefore we can't tell if we are trying to retake a lock
698 * we have or someone on the other processor has it. Therefore we just
699 * ignore this test if the locking thread is 0.
700 */
701
702 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
703 l->debug.lock_thread == (void *) current_thread()) {
2d21ac55 704 printf("%s: lock %p already locked (at %p) by",
cb323159 705 caller, l, l->debug.lock_pc);
2d21ac55 706 printf(" current thread %p (new attempt at pc %p)\n",
cb323159 707 l->debug.lock_thread, pc);
2d21ac55 708 panic("%s", caller);
91447636
A
709 }
710 mp_disable_preemption();
91447636
A
711 mp_enable_preemption();
712}
713
714
715/*
716 * Debug checks on a usimple_lock just after acquiring it.
717 *
718 * Pre-emption has been disabled at this point,
719 * so we are safe in using cpu_number.
720 */
721void
722usld_lock_post(
cb323159
A
723 usimple_lock_t l,
724 pc_t pc)
91447636 725{
cb323159
A
726 int mycpu;
727 char caller[] = "successful usimple_lock";
91447636
A
728
729
cb323159 730 if (!usld_lock_common_checks(l, caller))
91447636
A
731 return;
732
cb323159 733 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
b0d623f7 734 panic("%s: lock %p became uninitialized",
cb323159
A
735 caller, l);
736 if ((l->debug.state & USLOCK_TAKEN))
b0d623f7 737 panic("%s: lock 0x%p became TAKEN by someone else",
cb323159 738 caller, l);
91447636
A
739
740 mycpu = cpu_number();
741 l->debug.lock_thread = (void *)current_thread();
742 l->debug.state |= USLOCK_TAKEN;
743 l->debug.lock_pc = pc;
744 l->debug.lock_cpu = mycpu;
91447636
A
745}
746
747
748/*
749 * Debug checks on a usimple_lock just before
750 * releasing it. Note that the caller has not
751 * yet released the hardware lock.
752 *
753 * Preemption is still disabled, so there's
754 * no problem using cpu_number.
755 */
756void
757usld_unlock(
cb323159
A
758 usimple_lock_t l,
759 pc_t pc)
91447636 760{
cb323159
A
761 int mycpu;
762 char caller[] = "usimple_unlock";
91447636
A
763
764
cb323159 765 if (!usld_lock_common_checks(l, caller))
91447636
A
766 return;
767
768 mycpu = cpu_number();
769
cb323159 770 if (!(l->debug.state & USLOCK_TAKEN))
b0d623f7 771 panic("%s: lock 0x%p hasn't been taken",
cb323159
A
772 caller, l);
773 if (l->debug.lock_thread != (void *) current_thread())
b0d623f7 774 panic("%s: unlocking lock 0x%p, owned by thread %p",
cb323159 775 caller, l, l->debug.lock_thread);
91447636 776 if (l->debug.lock_cpu != mycpu) {
b0d623f7 777 printf("%s: unlocking lock 0x%p on cpu 0x%x",
cb323159 778 caller, l, mycpu);
91447636 779 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
2d21ac55 780 panic("%s", caller);
91447636 781 }
91447636
A
782
783 l->debug.unlock_thread = l->debug.lock_thread;
784 l->debug.lock_thread = INVALID_PC;
785 l->debug.state &= ~USLOCK_TAKEN;
786 l->debug.unlock_pc = pc;
787 l->debug.unlock_cpu = mycpu;
788}
789
790
791/*
792 * Debug checks on a usimple_lock just before
793 * attempting to acquire it.
794 *
795 * Preemption isn't guaranteed to be disabled.
796 */
797void
798usld_lock_try_pre(
cb323159
A
799 usimple_lock_t l,
800 __unused pc_t pc)
91447636 801{
cb323159 802 char caller[] = "usimple_lock_try";
91447636 803
cb323159 804 if (!usld_lock_common_checks(l, caller))
91447636 805 return;
91447636
A
806}
807
808
809/*
810 * Debug checks on a usimple_lock just after
811 * successfully attempting to acquire it.
812 *
813 * Preemption has been disabled by the
814 * lock acquisition attempt, so it's safe
815 * to use cpu_number.
816 */
817void
818usld_lock_try_post(
cb323159
A
819 usimple_lock_t l,
820 pc_t pc)
91447636 821{
cb323159
A
822 int mycpu;
823 char caller[] = "successful usimple_lock_try";
91447636 824
cb323159 825 if (!usld_lock_common_checks(l, caller))
91447636
A
826 return;
827
cb323159 828 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
b0d623f7 829 panic("%s: lock 0x%p became uninitialized",
cb323159
A
830 caller, l);
831 if ((l->debug.state & USLOCK_TAKEN))
b0d623f7 832 panic("%s: lock 0x%p became TAKEN by someone else",
cb323159 833 caller, l);
91447636
A
834
835 mycpu = cpu_number();
836 l->debug.lock_thread = (void *) current_thread();
837 l->debug.state |= USLOCK_TAKEN;
838 l->debug.lock_pc = pc;
839 l->debug.lock_cpu = mycpu;
91447636 840}
cb323159 841#endif /* USLOCK_DEBUG */
91447636 842
91447636
A
843/*
844 * Routine: lck_rw_alloc_init
845 */
846lck_rw_t *
847lck_rw_alloc_init(
cb323159
A
848 lck_grp_t *grp,
849 lck_attr_t *attr) {
850 lck_rw_t *lck;
91447636 851
b0d623f7
A
852 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) {
853 bzero(lck, sizeof(lck_rw_t));
91447636 854 lck_rw_init(lck, grp, attr);
b0d623f7
A
855 }
856
cb323159 857 return(lck);
91447636
A
858}
859
860/*
861 * Routine: lck_rw_free
862 */
863void
864lck_rw_free(
cb323159
A
865 lck_rw_t *lck,
866 lck_grp_t *grp) {
91447636
A
867 lck_rw_destroy(lck, grp);
868 kfree(lck, sizeof(lck_rw_t));
869}
870
871/*
872 * Routine: lck_rw_init
873 */
874void
875lck_rw_init(
cb323159
A
876 lck_rw_t *lck,
877 lck_grp_t *grp,
878 lck_attr_t *attr)
0c530ab8 879{
cb323159
A
880 lck_attr_t *lck_attr = (attr != LCK_ATTR_NULL) ?
881 attr : &LockDefaultLckAttr;
91447636 882
2d21ac55
A
883 hw_lock_byte_init(&lck->lck_rw_interlock);
884 lck->lck_rw_want_write = FALSE;
885 lck->lck_rw_want_upgrade = FALSE;
886 lck->lck_rw_shared_count = 0;
887 lck->lck_rw_can_sleep = TRUE;
b0d623f7 888 lck->lck_r_waiting = lck->lck_w_waiting = 0;
91447636 889 lck->lck_rw_tag = 0;
2d21ac55 890 lck->lck_rw_priv_excl = ((lck_attr->lck_attr_val &
cb323159 891 LCK_ATTR_RW_SHARED_PRIORITY) == 0);
91447636
A
892
893 lck_grp_reference(grp);
894 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
895}
896
897/*
898 * Routine: lck_rw_destroy
899 */
900void
901lck_rw_destroy(
cb323159
A
902 lck_rw_t *lck,
903 lck_grp_t *grp)
b0d623f7 904{
cb323159 905 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
91447636 906 return;
39236c6e
A
907#if MACH_LDEBUG
908 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
909#endif
91447636
A
910 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
911 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
912 lck_grp_deallocate(grp);
913 return;
914}
915
916/*
917 * Sleep locks. These use the same data structure and algorithm
918 * as the spin locks, but the process sleeps while it is waiting
919 * for the lock. These work on uniprocessor systems.
920 */
921
922#define DECREMENTER_TIMEOUT 1000000
923
91447636 924/*
6d2010ae
A
925 * We disable interrupts while holding the RW interlock to prevent an
926 * interrupt from exacerbating hold time.
91447636
A
927 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
928 */
5ba3f43e 929static inline boolean_t
91447636
A
930lck_interlock_lock(lck_rw_t *lck)
931{
cb323159 932 boolean_t istate;
91447636 933
0a7de745 934 istate = ml_set_interrupts_enabled(FALSE);
2d21ac55 935 hw_lock_byte_lock(&lck->lck_rw_interlock);
91447636
A
936 return istate;
937}
938
5ba3f43e 939static inline void
91447636 940lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
0a7de745 941{
2d21ac55 942 hw_lock_byte_unlock(&lck->lck_rw_interlock);
91447636
A
943 ml_set_interrupts_enabled(istate);
944}
945
0c530ab8
A
946/*
947 * This inline is used when busy-waiting for an rw lock.
948 * If interrupts were disabled when the lock primitive was called,
949 * we poll the IPI handler for pending tlb flushes.
950 * XXX This is a hack to avoid deadlocking on the pmap_system_lock.
951 */
952static inline void
953lck_rw_lock_pause(boolean_t interrupts_enabled)
954{
cb323159 955 if (!interrupts_enabled)
0c530ab8
A
956 handle_pending_TLB_flushes();
957 cpu_pause();
958}
959
5ba3f43e
A
960static inline boolean_t
961lck_rw_held_read_or_upgrade(lck_rw_t *lock)
962{
cb323159 963 if (ordered_load(&lock->data) & (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE))
5ba3f43e
A
964 return TRUE;
965 return FALSE;
966}
b0d623f7
A
967
968/*
969 * compute the deadline to spin against when
970 * waiting for a change of state on a lck_rw_t
971 */
972static inline uint64_t
973lck_rw_deadline_for_spin(lck_rw_t *lck)
974{
975 if (lck->lck_rw_can_sleep) {
976 if (lck->lck_r_waiting || lck->lck_w_waiting || lck->lck_rw_shared_count > machine_info.max_cpus) {
977 /*
978 * there are already threads waiting on this lock... this
cb323159 979 * implies that they have spun beyond their deadlines waiting for
b0d623f7
A
980 * the desired state to show up so we will not bother spinning at this time...
981 * or
982 * the current number of threads sharing this lock exceeds our capacity to run them
983 * concurrently and since all states we're going to spin for require the rw_shared_count
984 * to be at 0, we'll not bother spinning since the latency for this to happen is
985 * unpredictable...
986 */
cb323159 987 return (mach_absolute_time());
b0d623f7 988 }
cb323159
A
989 return (mach_absolute_time() + MutexSpin);
990 } else
991 return (mach_absolute_time() + (100000LL * 1000000000LL));
b0d623f7
A
992}
993
994
5ba3f43e
A
995/*
996 * Spin while interlock is held.
997 */
998
999static inline void
1000lck_rw_interlock_spin(lck_rw_t *lock)
1001{
1002 while (ordered_load(&lock->data) & LCK_RW_INTERLOCK) {
1003 cpu_pause();
1004 }
1005}
1006
1007static boolean_t
1008lck_rw_grab_want(lck_rw_t *lock)
1009{
cb323159 1010 uint32_t data, prev;
5ba3f43e 1011
cb323159 1012 for ( ; ; ) {
5ba3f43e 1013 data = atomic_exchange_begin32(&lock->data, &prev, memory_order_relaxed);
cb323159 1014 if ((data & LCK_RW_INTERLOCK) == 0)
5ba3f43e
A
1015 break;
1016 atomic_exchange_abort();
1017 lck_rw_interlock_spin(lock);
1018 }
1019 if (data & LCK_RW_WANT_WRITE) {
1020 atomic_exchange_abort();
1021 return FALSE;
1022 }
1023 data |= LCK_RW_WANT_WRITE;
1024 return atomic_exchange_complete32(&lock->data, prev, data, memory_order_relaxed);
1025}
1026
1027static boolean_t
1028lck_rw_grab_shared(lck_rw_t *lock)
1029{
cb323159 1030 uint32_t data, prev;
5ba3f43e 1031
cb323159 1032 for ( ; ; ) {
5ba3f43e 1033 data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
cb323159 1034 if ((data & LCK_RW_INTERLOCK) == 0)
5ba3f43e
A
1035 break;
1036 atomic_exchange_abort();
1037 lck_rw_interlock_spin(lock);
1038 }
1039 if (data & (LCK_RW_WANT_WRITE | LCK_RW_WANT_UPGRADE)) {
1040 if (((data & LCK_RW_SHARED_MASK) == 0) || (data & LCK_RW_PRIV_EXCL)) {
1041 atomic_exchange_abort();
1042 return FALSE;
1043 }
1044 }
1045 data += LCK_RW_SHARED_READER;
1046 return atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp);
1047}
1048
91447636
A
1049/*
1050 * Routine: lck_rw_lock_exclusive
1051 */
5ba3f43e 1052static void
b0d623f7 1053lck_rw_lock_exclusive_gen(
cb323159 1054 lck_rw_t *lck)
91447636 1055{
cb323159
A
1056 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1057 uint64_t deadline = 0;
1058 int slept = 0;
1059 int gotlock = 0;
1060 int lockheld = 0;
1061 wait_result_t res = 0;
1062 boolean_t istate = -1;
91447636 1063
cb323159 1064#if CONFIG_DTRACE
b0d623f7 1065 boolean_t dtrace_ls_initialized = FALSE;
cb323159 1066 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE;
b0d623f7
A
1067 uint64_t wait_interval = 0;
1068 int readers_at_sleep = 0;
2d21ac55 1069#endif
91447636 1070
91447636 1071 /*
2d21ac55 1072 * Try to acquire the lck_rw_want_write bit.
91447636 1073 */
cb323159
A
1074 while ( !lck_rw_grab_want(lck)) {
1075
1076#if CONFIG_DTRACE
b0d623f7
A
1077 if (dtrace_ls_initialized == FALSE) {
1078 dtrace_ls_initialized = TRUE;
1079 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1080 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1081 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1082 if (dtrace_ls_enabled) {
1083 /*
1084 * Either sleeping or spinning is happening,
1085 * start a timing of our delay interval now.
1086 */
1087 readers_at_sleep = lck->lck_rw_shared_count;
1088 wait_interval = mach_absolute_time();
1089 }
91447636 1090 }
2d21ac55 1091#endif
cb323159 1092 if (istate == -1)
b0d623f7 1093 istate = ml_get_interrupts_enabled();
91447636 1094
b0d623f7
A
1095 deadline = lck_rw_deadline_for_spin(lck);
1096
3e170ce0 1097 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
0a7de745 1098
cb323159 1099 while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline)
b0d623f7
A
1100 lck_rw_lock_pause(istate);
1101
3e170ce0 1102 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
b0d623f7 1103
cb323159 1104 if (gotlock)
b0d623f7
A
1105 break;
1106 /*
1107 * if we get here, the deadline has expired w/o us
1108 * being able to grab the lock exclusively
1109 * check to see if we're allowed to do a thread_block
1110 */
1111 if (lck->lck_rw_can_sleep) {
cb323159 1112
91447636 1113 istate = lck_interlock_lock(lck);
91447636 1114
b0d623f7 1115 if (lck->lck_rw_want_write) {
cb323159 1116
3e170ce0 1117 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
91447636 1118
b0d623f7 1119 lck->lck_w_waiting = TRUE;
91447636 1120
813fb2f6 1121 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
d9a64523 1122 res = assert_wait(RW_LOCK_WRITER_EVENT(lck),
cb323159 1123 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
b0d623f7 1124 lck_interlock_unlock(lck, istate);
91447636 1125
b0d623f7
A
1126 if (res == THREAD_WAITING) {
1127 res = thread_block(THREAD_CONTINUE_NULL);
1128 slept++;
1129 }
3e170ce0 1130 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
b0d623f7
A
1131 } else {
1132 lck->lck_rw_want_write = TRUE;
1133 lck_interlock_unlock(lck, istate);
1134 break;
1135 }
1136 }
1137 }
1138 /*
1139 * Wait for readers (and upgrades) to finish...
1140 * the test for these conditions must be done simultaneously with
1141 * a check of the interlock not being held since
1142 * the rw_shared_count will drop to 0 first and then want_upgrade
1143 * will be set to 1 in the shared_to_exclusive scenario... those
1144 * adjustments are done behind the interlock and represent an
1145 * atomic change in state and must be considered as such
1146 * however, once we see the read count at 0, the want_upgrade not set
1147 * and the interlock not held, we are safe to proceed
1148 */
1149 while (lck_rw_held_read_or_upgrade(lck)) {
cb323159
A
1150
1151#if CONFIG_DTRACE
2d21ac55
A
1152 /*
1153 * Either sleeping or spinning is happening, start
1154 * a timing of our delay interval now. If we set it
1155 * to -1 we don't have accurate data so we cannot later
1156 * decide to record a dtrace spin or sleep event.
1157 */
b0d623f7
A
1158 if (dtrace_ls_initialized == FALSE) {
1159 dtrace_ls_initialized = TRUE;
1160 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1161 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1162 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1163 if (dtrace_ls_enabled) {
1164 /*
1165 * Either sleeping or spinning is happening,
1166 * start a timing of our delay interval now.
1167 */
1168 readers_at_sleep = lck->lck_rw_shared_count;
1169 wait_interval = mach_absolute_time();
1170 }
2d21ac55
A
1171 }
1172#endif
cb323159 1173 if (istate == -1)
b0d623f7
A
1174 istate = ml_get_interrupts_enabled();
1175
1176 deadline = lck_rw_deadline_for_spin(lck);
1177
3e170ce0 1178 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
b0d623f7 1179
cb323159 1180 while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline)
b0d623f7
A
1181 lck_rw_lock_pause(istate);
1182
3e170ce0 1183 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, lockheld, 0);
b0d623f7 1184
cb323159 1185 if ( !lockheld)
b0d623f7
A
1186 break;
1187 /*
1188 * if we get here, the deadline has expired w/o us
1189 * being able to grab the lock exclusively
1190 * check to see if we're allowed to do a thread_block
1191 */
1192 if (lck->lck_rw_can_sleep) {
cb323159 1193
91447636 1194 istate = lck_interlock_lock(lck);
91447636 1195
b0d623f7 1196 if (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade) {
3e170ce0 1197 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
b0d623f7
A
1198
1199 lck->lck_w_waiting = TRUE;
1200
813fb2f6 1201 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
d9a64523 1202 res = assert_wait(RW_LOCK_WRITER_EVENT(lck),
cb323159 1203 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
91447636 1204 lck_interlock_unlock(lck, istate);
b0d623f7
A
1205
1206 if (res == THREAD_WAITING) {
1207 res = thread_block(THREAD_CONTINUE_NULL);
1208 slept++;
1209 }
3e170ce0 1210 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
b0d623f7
A
1211 } else {
1212 lck_interlock_unlock(lck, istate);
1213 /*
1214 * must own the lock now, since we checked for
1215 * readers or upgrade owner behind the interlock
1216 * no need for a call to 'lck_rw_held_read_or_upgrade'
1217 */
1218 break;
91447636
A
1219 }
1220 }
91447636
A
1221 }
1222
cb323159 1223#if CONFIG_DTRACE
2d21ac55
A
1224 /*
1225 * Decide what latencies we suffered that are Dtrace events.
1226 * If we have set wait_interval, then we either spun or slept.
1227 * At least we get out from under the interlock before we record
1228 * which is the best we can do here to minimize the impact
1229 * of the tracing.
1230 * If we have set wait_interval to -1, then dtrace was not enabled when we
1231 * started sleeping/spinning so we don't record this event.
1232 */
b0d623f7 1233 if (dtrace_ls_enabled == TRUE) {
2d21ac55 1234 if (slept == 0) {
0a7de745 1235 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
2d21ac55
A
1236 mach_absolute_time() - wait_interval, 1);
1237 } else {
1238 /*
1239 * For the blocking case, we also record if when we blocked
1240 * it was held for read or write, and how many readers.
1241 * Notice that above we recorded this before we dropped
1242 * the interlock so the count is accurate.
1243 */
0a7de745 1244 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
2d21ac55
A
1245 mach_absolute_time() - wait_interval, 1,
1246 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1247 }
1248 }
1249 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1);
1250#endif
91447636
A
1251}
1252
5ba3f43e
A
1253/*
1254 * Routine: lck_rw_done
1255 */
1256
cb323159 1257lck_rw_type_t lck_rw_done(lck_rw_t *lock)
5ba3f43e 1258{
cb323159 1259 uint32_t data, prev;
5ba3f43e 1260
cb323159 1261 for ( ; ; ) {
5ba3f43e 1262 data = atomic_exchange_begin32(&lock->data, &prev, memory_order_release_smp);
cb323159 1263 if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
5ba3f43e
A
1264 atomic_exchange_abort();
1265 lck_rw_interlock_spin(lock);
1266 continue;
1267 }
1268 if (data & LCK_RW_SHARED_MASK) {
1269 data -= LCK_RW_SHARED_READER;
cb323159 1270 if ((data & LCK_RW_SHARED_MASK) == 0) /* if reader count has now gone to 0, check for waiters */
5ba3f43e 1271 goto check_waiters;
cb323159 1272 } else { /* if reader count == 0, must be exclusive lock */
5ba3f43e
A
1273 if (data & LCK_RW_WANT_UPGRADE) {
1274 data &= ~(LCK_RW_WANT_UPGRADE);
1275 } else {
cb323159 1276 if (data & LCK_RW_WANT_WRITE)
5ba3f43e 1277 data &= ~(LCK_RW_WANT_EXCL);
cb323159 1278 else /* lock is not 'owned', panic */
5ba3f43e
A
1279 panic("Releasing non-exclusive RW lock without a reader refcount!");
1280 }
1281check_waiters:
1282 if (prev & LCK_RW_W_WAITING) {
1283 data &= ~(LCK_RW_W_WAITING);
cb323159 1284 if ((prev & LCK_RW_PRIV_EXCL) == 0)
5ba3f43e 1285 data &= ~(LCK_RW_R_WAITING);
cb323159 1286 } else
5ba3f43e
A
1287 data &= ~(LCK_RW_R_WAITING);
1288 }
cb323159 1289 if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_release_smp))
5ba3f43e
A
1290 break;
1291 cpu_pause();
1292 }
1293 return lck_rw_done_gen(lock, prev);
1294}
91447636
A
1295
1296/*
2d21ac55 1297 * Routine: lck_rw_done_gen
b0d623f7 1298 *
5ba3f43e 1299 * called from lck_rw_done()
b0d623f7 1300 * prior_lock_state is the value in the 1st
cb323159 1301 * word of the lock at the time of a successful
b0d623f7 1302 * atomic compare and exchange with the new value...
cb323159 1303 * it represents the state of the lock before we
b0d623f7 1304 * decremented the rw_shared_count or cleared either
cb323159 1305 * rw_want_upgrade or rw_want_write and
b0d623f7 1306 * the lck_x_waiting bits... since the wrapper
cb323159 1307 * routine has already changed the state atomically,
b0d623f7
A
1308 * we just need to decide if we should
1309 * wake up anyone and what value to return... we do
1310 * this by examining the state of the lock before
1311 * we changed it
91447636 1312 */
5ba3f43e 1313static lck_rw_type_t
2d21ac55 1314lck_rw_done_gen(
0a7de745
A
1315 lck_rw_t *lck,
1316 uint32_t prior_lock_state)
91447636 1317{
0a7de745
A
1318 lck_rw_t *fake_lck;
1319 lck_rw_type_t lock_type;
1320 thread_t thread;
1321 uint32_t rwlock_count;
39236c6e 1322
0a7de745
A
1323 thread = current_thread();
1324 rwlock_count = thread->rwlock_count--;
b0d623f7 1325 fake_lck = (lck_rw_t *)&prior_lock_state;
91447636 1326
0a7de745
A
1327 if (lck->lck_rw_can_sleep) {
1328 /*
1329 * prior_lock state is a snapshot of the 1st word of the
1330 * lock in question... we'll fake up a pointer to it
1331 * and carefully not access anything beyond whats defined
1332 * in the first word of a lck_rw_t
1333 */
91447636 1334
0a7de745
A
1335 if (fake_lck->lck_rw_shared_count <= 1) {
1336 if (fake_lck->lck_w_waiting) {
1337 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
1338 }
2d21ac55 1339
0a7de745
A
1340 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting) {
1341 thread_wakeup(RW_LOCK_READER_EVENT(lck));
1342 }
1343 }
fe8ab488 1344#if MACH_LDEBUG
0a7de745
A
1345 if (rwlock_count == 0) {
1346 panic("rw lock count underflow for thread %p", thread);
1347 }
fe8ab488 1348#endif
0a7de745
A
1349 /* Check if dropping the lock means that we need to unpromote */
1350
1351 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1352 /* sched_flags checked without lock, but will be rechecked while clearing */
1353 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
1354 }
1355 }
1356 if (fake_lck->lck_rw_shared_count) {
1357 lock_type = LCK_RW_TYPE_SHARED;
1358 } else {
1359 lock_type = LCK_RW_TYPE_EXCLUSIVE;
fe8ab488
A
1360 }
1361
2d21ac55 1362#if CONFIG_DTRACE
b0d623f7 1363 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
2d21ac55
A
1364#endif
1365
0a7de745 1366 return lock_type;
91447636
A
1367}
1368
1369
91447636
A
1370/*
1371 * Routine: lck_rw_unlock
1372 */
1373void
1374lck_rw_unlock(
cb323159
A
1375 lck_rw_t *lck,
1376 lck_rw_type_t lck_rw_type)
91447636 1377{
cb323159 1378 if (lck_rw_type == LCK_RW_TYPE_SHARED)
91447636 1379 lck_rw_unlock_shared(lck);
cb323159 1380 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
91447636 1381 lck_rw_unlock_exclusive(lck);
cb323159 1382 else
91447636
A
1383 panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type);
1384}
1385
1386
1387/*
1388 * Routine: lck_rw_unlock_shared
1389 */
1390void
1391lck_rw_unlock_shared(
cb323159 1392 lck_rw_t *lck)
91447636 1393{
cb323159 1394 lck_rw_type_t ret;
91447636 1395
a39ff7e2 1396 assertf(lck->lck_rw_shared_count > 0, "lck %p has shared_count=0x%x", lck, lck->lck_rw_shared_count);
91447636
A
1397 ret = lck_rw_done(lck);
1398
cb323159 1399 if (ret != LCK_RW_TYPE_SHARED)
39037602 1400 panic("lck_rw_unlock_shared(): lock %p held in mode: %d\n", lck, ret);
91447636
A
1401}
1402
1403
1404/*
1405 * Routine: lck_rw_unlock_exclusive
1406 */
1407void
1408lck_rw_unlock_exclusive(
cb323159 1409 lck_rw_t *lck)
91447636 1410{
cb323159 1411 lck_rw_type_t ret;
91447636
A
1412
1413 ret = lck_rw_done(lck);
1414
cb323159 1415 if (ret != LCK_RW_TYPE_EXCLUSIVE)
91447636
A
1416 panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret);
1417}
1418
1419
1420/*
1421 * Routine: lck_rw_lock
1422 */
1423void
1424lck_rw_lock(
cb323159
A
1425 lck_rw_t *lck,
1426 lck_rw_type_t lck_rw_type)
91447636 1427{
cb323159 1428 if (lck_rw_type == LCK_RW_TYPE_SHARED)
91447636 1429 lck_rw_lock_shared(lck);
cb323159 1430 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
91447636 1431 lck_rw_lock_exclusive(lck);
cb323159 1432 else
91447636
A
1433 panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type);
1434}
1435
5ba3f43e
A
1436/*
1437 * Routine: lck_rw_lock_shared
1438 */
1439void
1440lck_rw_lock_shared(lck_rw_t *lock)
1441{
cb323159 1442 uint32_t data, prev;
5ba3f43e
A
1443
1444 current_thread()->rwlock_count++;
cb323159 1445 for ( ; ; ) {
5ba3f43e
A
1446 data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
1447 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
1448 atomic_exchange_abort();
0a7de745
A
1449 if (lock->lck_rw_can_sleep) {
1450 lck_rw_lock_shared_gen(lock);
1451 } else {
1452 cpu_pause();
1453 continue;
1454 }
5ba3f43e
A
1455 break;
1456 }
1457 data += LCK_RW_SHARED_READER;
cb323159 1458 if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
5ba3f43e
A
1459 break;
1460 cpu_pause();
1461 }
cb323159 1462#if CONFIG_DTRACE
5ba3f43e 1463 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
cb323159 1464#endif /* CONFIG_DTRACE */
5ba3f43e
A
1465 return;
1466}
91447636
A
1467
1468/*
2d21ac55 1469 * Routine: lck_rw_lock_shared_gen
b0d623f7
A
1470 * Function:
1471 * assembly fast path code has determined that this lock
1472 * is held exclusively... this is where we spin/block
1473 * until we can acquire the lock in the shared mode
91447636 1474 */
5ba3f43e 1475static void
2d21ac55 1476lck_rw_lock_shared_gen(
cb323159 1477 lck_rw_t *lck)
91447636 1478{
cb323159
A
1479 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1480 uint64_t deadline = 0;
1481 int gotlock = 0;
1482 int slept = 0;
1483 wait_result_t res = 0;
1484 boolean_t istate = -1;
3e170ce0 1485
cb323159 1486#if CONFIG_DTRACE
2d21ac55 1487 uint64_t wait_interval = 0;
b0d623f7
A
1488 int readers_at_sleep = 0;
1489 boolean_t dtrace_ls_initialized = FALSE;
1490 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
2d21ac55 1491#endif
91447636 1492
cb323159
A
1493 while ( !lck_rw_grab_shared(lck)) {
1494
1495#if CONFIG_DTRACE
b0d623f7
A
1496 if (dtrace_ls_initialized == FALSE) {
1497 dtrace_ls_initialized = TRUE;
1498 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1499 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1500 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1501 if (dtrace_ls_enabled) {
1502 /*
1503 * Either sleeping or spinning is happening,
1504 * start a timing of our delay interval now.
1505 */
1506 readers_at_sleep = lck->lck_rw_shared_count;
1507 wait_interval = mach_absolute_time();
1508 }
1509 }
2d21ac55 1510#endif
cb323159 1511 if (istate == -1)
b0d623f7 1512 istate = ml_get_interrupts_enabled();
91447636 1513
b0d623f7 1514 deadline = lck_rw_deadline_for_spin(lck);
0c530ab8 1515
b0d623f7 1516 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
cb323159 1517 trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
91447636 1518
cb323159 1519 while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline)
b0d623f7
A
1520 lck_rw_lock_pause(istate);
1521
1522 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
cb323159 1523 trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0);
b0d623f7 1524
cb323159 1525 if (gotlock)
b0d623f7
A
1526 break;
1527 /*
1528 * if we get here, the deadline has expired w/o us
1529 * being able to grab the lock for read
1530 * check to see if we're allowed to do a thread_block
1531 */
1532 if (lck->lck_rw_can_sleep) {
cb323159 1533
91447636 1534 istate = lck_interlock_lock(lck);
91447636 1535
b0d623f7
A
1536 if ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
1537 ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {
cb323159 1538
b0d623f7 1539 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
cb323159 1540 trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
b0d623f7
A
1541
1542 lck->lck_r_waiting = TRUE;
1543
813fb2f6 1544 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
d9a64523 1545 res = assert_wait(RW_LOCK_READER_EVENT(lck),
cb323159 1546 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
91447636 1547 lck_interlock_unlock(lck, istate);
b0d623f7
A
1548
1549 if (res == THREAD_WAITING) {
1550 res = thread_block(THREAD_CONTINUE_NULL);
1551 slept++;
1552 }
1553 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
cb323159 1554 trace_lck, res, slept, 0, 0);
b0d623f7
A
1555 } else {
1556 lck->lck_rw_shared_count++;
1557 lck_interlock_unlock(lck, istate);
1558 break;
91447636
A
1559 }
1560 }
91447636
A
1561 }
1562
cb323159 1563#if CONFIG_DTRACE
b0d623f7 1564 if (dtrace_ls_enabled == TRUE) {
2d21ac55 1565 if (slept == 0) {
0a7de745 1566 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
2d21ac55 1567 } else {
0a7de745 1568 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
2d21ac55
A
1569 mach_absolute_time() - wait_interval, 0,
1570 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1571 }
1572 }
1573 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1574#endif
91447636
A
1575}
1576
1577
5ba3f43e
A
1578/*
1579 * Routine: lck_rw_lock_exclusive
1580 */
1581
1582void
1583lck_rw_lock_exclusive(lck_rw_t *lock)
1584{
1585 current_thread()->rwlock_count++;
1586 if (atomic_test_and_set32(&lock->data,
cb323159
A
1587 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
1588 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
1589#if CONFIG_DTRACE
5ba3f43e 1590 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
cb323159
A
1591#endif /* CONFIG_DTRACE */
1592 } else
5ba3f43e
A
1593 lck_rw_lock_exclusive_gen(lock);
1594}
1595
1596
1597/*
1598 * Routine: lck_rw_lock_shared_to_exclusive
cb323159
A
1599 *
1600 * False returned upon failure, in this case the shared lock is dropped.
5ba3f43e
A
1601 */
1602
1603boolean_t
1604lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
1605{
cb323159 1606 uint32_t data, prev;
5ba3f43e 1607
cb323159 1608 for ( ; ; ) {
5ba3f43e
A
1609 data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
1610 if (data & LCK_RW_INTERLOCK) {
1611 atomic_exchange_abort();
1612 lck_rw_interlock_spin(lock);
1613 continue;
1614 }
1615 if (data & LCK_RW_WANT_UPGRADE) {
1616 data -= LCK_RW_SHARED_READER;
cb323159
A
1617 if ((data & LCK_RW_SHARED_MASK) == 0) /* we were the last reader */
1618 data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
1619 if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
5ba3f43e
A
1620 return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
1621 } else {
cb323159
A
1622 data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
1623 data -= LCK_RW_SHARED_READER; /* and shed our read count */
1624 if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
5ba3f43e
A
1625 break;
1626 }
1627 cpu_pause();
1628 }
cb323159
A
1629 /* we now own the WANT_UPGRADE */
1630 if (data & LCK_RW_SHARED_MASK) /* check to see if all of the readers are drained */
1631 lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
1632#if CONFIG_DTRACE
5ba3f43e
A
1633 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
1634#endif
1635 return TRUE;
1636}
1637
1638
91447636 1639/*
b0d623f7 1640 * Routine: lck_rw_lock_shared_to_exclusive_failure
91447636 1641 * Function:
b0d623f7
A
1642 * assembly fast path code has already dropped our read
1643 * count and determined that someone else owns 'lck_rw_want_upgrade'
1644 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1645 * all we need to do here is determine if a wakeup is needed
91447636 1646 */
5ba3f43e 1647static boolean_t
b0d623f7 1648lck_rw_lock_shared_to_exclusive_failure(
cb323159
A
1649 lck_rw_t *lck,
1650 uint32_t prior_lock_state)
91447636 1651{
cb323159
A
1652 lck_rw_t *fake_lck;
1653 thread_t thread = current_thread();
1654 uint32_t rwlock_count;
39236c6e
A
1655
1656 /* Check if dropping the lock means that we need to unpromote */
1657 rwlock_count = thread->rwlock_count--;
1658#if MACH_LDEBUG
1659 if (rwlock_count == 0) {
1660 panic("rw lock count underflow for thread %p", thread);
1661 }
1662#endif
b0d623f7 1663 fake_lck = (lck_rw_t *)&prior_lock_state;
91447636 1664
b0d623f7 1665 if (fake_lck->lck_w_waiting && fake_lck->lck_rw_shared_count == 1) {
91447636
A
1666 /*
1667 * Someone else has requested upgrade.
b0d623f7
A
1668 * Since we've released the read lock, wake
1669 * him up if he's blocked waiting
91447636 1670 */
b0d623f7
A
1671 thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
1672 }
5ba3f43e
A
1673
1674 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1675 /* sched_flags checked without lock, but will be rechecked while clearing */
d9a64523 1676 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
5ba3f43e
A
1677 }
1678
b0d623f7 1679 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
cb323159 1680 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
91447636 1681
cb323159 1682 return (FALSE);
b0d623f7 1683}
91447636 1684
91447636 1685
b0d623f7
A
1686/*
1687 * Routine: lck_rw_lock_shared_to_exclusive_failure
1688 * Function:
1689 * assembly fast path code has already dropped our read
1690 * count and successfully acquired 'lck_rw_want_upgrade'
1691 * we just need to wait for the rest of the readers to drain
1692 * and then we can return as the exclusive holder of this lock
1693 */
5ba3f43e 1694static boolean_t
b0d623f7 1695lck_rw_lock_shared_to_exclusive_success(
cb323159 1696 lck_rw_t *lck)
b0d623f7 1697{
cb323159
A
1698 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1699 uint64_t deadline = 0;
1700 int slept = 0;
1701 int still_shared = 0;
1702 wait_result_t res;
1703 boolean_t istate = -1;
91447636 1704
cb323159 1705#if CONFIG_DTRACE
b0d623f7
A
1706 uint64_t wait_interval = 0;
1707 int readers_at_sleep = 0;
1708 boolean_t dtrace_ls_initialized = FALSE;
1709 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1710#endif
91447636 1711
2d21ac55 1712 while (lck->lck_rw_shared_count != 0) {
cb323159
A
1713
1714#if CONFIG_DTRACE
b0d623f7
A
1715 if (dtrace_ls_initialized == FALSE) {
1716 dtrace_ls_initialized = TRUE;
1717 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1718 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1719 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1720 if (dtrace_ls_enabled) {
1721 /*
1722 * Either sleeping or spinning is happening,
1723 * start a timing of our delay interval now.
1724 */
1725 readers_at_sleep = lck->lck_rw_shared_count;
1726 wait_interval = mach_absolute_time();
1727 }
2d21ac55
A
1728 }
1729#endif
cb323159 1730 if (istate == -1)
b0d623f7
A
1731 istate = ml_get_interrupts_enabled();
1732
1733 deadline = lck_rw_deadline_for_spin(lck);
1734
1735 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
cb323159 1736 trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
b0d623f7 1737
cb323159 1738 while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline)
b0d623f7
A
1739 lck_rw_lock_pause(istate);
1740
1741 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
cb323159 1742 trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
b0d623f7 1743
cb323159 1744 if ( !still_shared)
b0d623f7
A
1745 break;
1746 /*
1747 * if we get here, the deadline has expired w/o
1748 * the rw_shared_count having drained to 0
1749 * check to see if we're allowed to do a thread_block
1750 */
1751 if (lck->lck_rw_can_sleep) {
cb323159 1752
91447636 1753 istate = lck_interlock_lock(lck);
0a7de745 1754
b0d623f7
A
1755 if (lck->lck_rw_shared_count != 0) {
1756 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
cb323159 1757 trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
b0d623f7
A
1758
1759 lck->lck_w_waiting = TRUE;
91447636 1760
813fb2f6 1761 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
d9a64523 1762 res = assert_wait(RW_LOCK_WRITER_EVENT(lck),
cb323159 1763 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
91447636 1764 lck_interlock_unlock(lck, istate);
b0d623f7
A
1765
1766 if (res == THREAD_WAITING) {
1767 res = thread_block(THREAD_CONTINUE_NULL);
1768 slept++;
1769 }
1770 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
cb323159 1771 trace_lck, res, slept, 0, 0);
b0d623f7
A
1772 } else {
1773 lck_interlock_unlock(lck, istate);
1774 break;
91447636
A
1775 }
1776 }
91447636 1777 }
cb323159 1778#if CONFIG_DTRACE
2d21ac55
A
1779 /*
1780 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1781 */
b0d623f7 1782 if (dtrace_ls_enabled == TRUE) {
2d21ac55 1783 if (slept == 0) {
0a7de745 1784 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
2d21ac55 1785 } else {
0a7de745 1786 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck,
2d21ac55
A
1787 mach_absolute_time() - wait_interval, 1,
1788 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1789 }
1790 }
2d21ac55
A
1791 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
1792#endif
cb323159 1793 return (TRUE);
91447636
A
1794}
1795
5ba3f43e
A
1796/*
1797 * Routine: lck_rw_lock_exclusive_to_shared
1798 */
1799
cb323159 1800void lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
5ba3f43e 1801{
cb323159 1802 uint32_t data, prev;
5ba3f43e 1803
cb323159 1804 for ( ; ; ) {
5ba3f43e
A
1805 data = atomic_exchange_begin32(&lock->data, &prev, memory_order_release_smp);
1806 if (data & LCK_RW_INTERLOCK) {
1807 atomic_exchange_abort();
cb323159 1808 lck_rw_interlock_spin(lock); /* wait for interlock to clear */
5ba3f43e
A
1809 continue;
1810 }
1811 data += LCK_RW_SHARED_READER;
cb323159 1812 if (data & LCK_RW_WANT_UPGRADE)
5ba3f43e 1813 data &= ~(LCK_RW_WANT_UPGRADE);
cb323159 1814 else
5ba3f43e 1815 data &= ~(LCK_RW_WANT_EXCL);
cb323159 1816 if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL)))
5ba3f43e 1817 data &= ~(LCK_RW_W_WAITING);
cb323159 1818 if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_release_smp))
5ba3f43e
A
1819 break;
1820 cpu_pause();
1821 }
1822 return lck_rw_lock_exclusive_to_shared_gen(lock, prev);
1823}
1824
b0d623f7 1825
91447636 1826/*
5ba3f43e 1827 * Routine: lck_rw_lock_exclusive_to_shared_gen
cb323159 1828 * Function:
b0d623f7
A
1829 * assembly fast path has already dropped
1830 * our exclusive state and bumped lck_rw_shared_count
1831 * all we need to do here is determine if anyone
1832 * needs to be awakened.
91447636 1833 */
5ba3f43e 1834static void
b0d623f7 1835lck_rw_lock_exclusive_to_shared_gen(
cb323159
A
1836 lck_rw_t *lck,
1837 uint32_t prior_lock_state)
91447636 1838{
cb323159
A
1839 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1840 lck_rw_t *fake_lck;
91447636 1841
b0d623f7 1842 fake_lck = (lck_rw_t *)&prior_lock_state;
91447636 1843
b0d623f7 1844 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
cb323159 1845 trace_lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0);
91447636 1846
b0d623f7
A
1847 /*
1848 * don't wake up anyone waiting to take the lock exclusively
1849 * since we hold a read count... when the read count drops to 0,
1850 * the writers will be woken.
1851 *
1852 * wake up any waiting readers if we don't have any writers waiting,
1853 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1854 */
cb323159 1855 if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
2d21ac55 1856 thread_wakeup(RW_LOCK_READER_EVENT(lck));
91447636
A
1857
1858 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
cb323159 1859 trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
91447636 1860
2d21ac55
A
1861#if CONFIG_DTRACE
1862 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1863#endif
91447636
A
1864}
1865
1866
1867/*
1868 * Routine: lck_rw_try_lock
1869 */
1870boolean_t
1871lck_rw_try_lock(
cb323159
A
1872 lck_rw_t *lck,
1873 lck_rw_type_t lck_rw_type)
1874{
1875 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1876 return(lck_rw_try_lock_shared(lck));
1877 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1878 return(lck_rw_try_lock_exclusive(lck));
1879 else
91447636 1880 panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type);
cb323159 1881 return(FALSE);
91447636
A
1882}
1883
5ba3f43e
A
1884/*
1885 * Routine: lck_rw_try_lock_shared
1886 */
1887
cb323159 1888boolean_t lck_rw_try_lock_shared(lck_rw_t *lock)
5ba3f43e 1889{
cb323159 1890 uint32_t data, prev;
5ba3f43e 1891
cb323159 1892 for ( ; ; ) {
5ba3f43e
A
1893 data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
1894 if (data & LCK_RW_INTERLOCK) {
1895 atomic_exchange_abort();
1896 lck_rw_interlock_spin(lock);
1897 continue;
1898 }
1899 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1900 atomic_exchange_abort();
cb323159 1901 return FALSE; /* lock is busy */
5ba3f43e 1902 }
cb323159
A
1903 data += LCK_RW_SHARED_READER; /* Increment reader refcount */
1904 if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
5ba3f43e
A
1905 break;
1906 cpu_pause();
1907 }
1908 current_thread()->rwlock_count++;
1909 /* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
cb323159 1910#if CONFIG_DTRACE
5ba3f43e 1911 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
cb323159 1912#endif /* CONFIG_DTRACE */
5ba3f43e
A
1913 return TRUE;
1914}
1915
1916
1917/*
1918 * Routine: lck_rw_try_lock_exclusive
1919 */
1920
cb323159 1921boolean_t lck_rw_try_lock_exclusive(lck_rw_t *lock)
5ba3f43e 1922{
cb323159 1923 uint32_t data, prev;
5ba3f43e 1924
cb323159 1925 for ( ; ; ) {
5ba3f43e
A
1926 data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
1927 if (data & LCK_RW_INTERLOCK) {
1928 atomic_exchange_abort();
1929 lck_rw_interlock_spin(lock);
1930 continue;
1931 }
1932 if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1933 atomic_exchange_abort();
cb323159 1934 return FALSE; /* can't get it */
5ba3f43e
A
1935 }
1936 data |= LCK_RW_WANT_EXCL;
cb323159 1937 if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
5ba3f43e
A
1938 break;
1939 cpu_pause();
1940 }
1941
1942 current_thread()->rwlock_count++;
cb323159 1943#if CONFIG_DTRACE
5ba3f43e 1944 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
cb323159 1945#endif /* CONFIG_DTRACE */
5ba3f43e
A
1946 return TRUE;
1947}
1948
91447636 1949
2d21ac55
A
1950void
1951lck_rw_assert(
cb323159
A
1952 lck_rw_t *lck,
1953 unsigned int type)
2d21ac55
A
1954{
1955 switch (type) {
1956 case LCK_RW_ASSERT_SHARED:
1957 if (lck->lck_rw_shared_count != 0) {
1958 return;
1959 }
1960 break;
1961 case LCK_RW_ASSERT_EXCLUSIVE:
1962 if ((lck->lck_rw_want_write ||
cb323159 1963 lck->lck_rw_want_upgrade) &&
2d21ac55
A
1964 lck->lck_rw_shared_count == 0) {
1965 return;
1966 }
1967 break;
1968 case LCK_RW_ASSERT_HELD:
1969 if (lck->lck_rw_want_write ||
1970 lck->lck_rw_want_upgrade ||
1971 lck->lck_rw_shared_count != 0) {
1972 return;
1973 }
1974 break;
39236c6e
A
1975 case LCK_RW_ASSERT_NOTHELD:
1976 if (!(lck->lck_rw_want_write ||
cb323159
A
1977 lck->lck_rw_want_upgrade ||
1978 lck->lck_rw_shared_count != 0)) {
39236c6e
A
1979 return;
1980 }
1981 break;
2d21ac55
A
1982 default:
1983 break;
1984 }
1985
39236c6e
A
1986 panic("rw lock (%p)%s held (mode=%u), first word %08x\n", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type, *(uint32_t *)lck);
1987}
1988
1989/* On return to userspace, this routine is called if the rwlock_count is somehow imbalanced */
cb323159
A
1990#if MACH_LDEBUG
1991__dead2
1992#endif
39236c6e
A
1993void
1994lck_rw_clear_promotions_x86(thread_t thread)
1995{
1996#if MACH_LDEBUG
1997 /* It's fatal to leave a RW lock locked and return to userspace */
1998 panic("%u rw lock(s) held on return to userspace for thread %p", thread->rwlock_count, thread);
1999#else
2000 /* Paper over the issue */
2001 thread->rwlock_count = 0;
d9a64523 2002 lck_rw_clear_promotion(thread, 0);
39236c6e 2003#endif
2d21ac55
A
2004}
2005
5ba3f43e
A
2006boolean_t
2007lck_rw_lock_yield_shared(lck_rw_t *lck, boolean_t force_yield)
2008{
2009 lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
2010
2011 if (lck->lck_rw_want_write || lck->lck_rw_want_upgrade || force_yield) {
2012 lck_rw_unlock_shared(lck);
2013 mutex_pause(2);
2014 lck_rw_lock_shared(lck);
2015 return TRUE;
2016 }
2017
2018 return FALSE;
2019}
39236c6e 2020
3e170ce0
A
2021/*
2022 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
2023 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2024 */
2025boolean_t
cb323159 2026kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck) {
3e170ce0
A
2027 if (not_in_kdp) {
2028 panic("panic: rw lock exclusive check done outside of kernel debugger");
2029 }
2030 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_write) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
2031}
2032
d9a64523
A
2033/*
2034 * Slow path routines for lck_mtx locking and unlocking functions.
2035 *
2036 * These functions were previously implemented in x86 assembly,
2037 * and some optimizations are in place in this c code to obtain a compiled code
2038 * as performant and compact as the assembly version.
2039 *
2040 * To avoid to inline these functions on the fast path, all functions directly called by
2041 * the fast paths have the __attribute__((noinline)) specified. Also they are all implemented
2042 * in such a way the fast path can tail call into them. In this way the return address
2043 * does not need to be pushed on the caller stack and stack optimization can happen on the caller.
2044 *
2045 * Slow path code is structured in such a way there are no calls to functions that will return
2046 * on the context of the caller function, i.e. all functions called are or tail call functions
2047 * or inline functions. The number of arguments of the tail call functions are less then six,
2048 * so that they can be passed over registers and do not need to be pushed on stack.
2049 * This allows the compiler to not create a stack frame for the functions.
2050 *
2051 * __improbable and __probable are used to compile the slow path code in such a way
2052 * the fast path case will be on a sequence of instructions with as less jumps as possible,
2053 * to make this case the most optimized even if falling through the slow path.
2054 */
2055
2056/*
2057 * Intel lock invariants:
2058 *
2059 * lck_mtx_waiters: contains the count of threads currently in the mutex waitqueue
d9a64523
A
2060 *
2061 * The lock owner is promoted to the max priority of all its waiters only if it
2062 * was a lower priority when it acquired or was an owner when a waiter waited.
2063 * Max priority is capped at MAXPRI_PROMOTE.
2064 *
2065 * The last waiter will not be promoted as it is woken up, but the last
2066 * lock owner may not have been the last thread to have been woken up depending on the
2067 * luck of the draw. Therefore a last-owner may still have the promoted-on-wakeup
2068 * flag set.
2069 *
2070 * TODO: Figure out an algorithm for stopping a lock holder which is already at the right
2071 * priority from dropping priority in the future without having to take thread lock
2072 * on acquire.
2073 */
3e170ce0 2074
cb323159 2075#ifdef MUTEX_ZONE
6d2010ae
A
2076extern zone_t lck_mtx_zone;
2077#endif
d9a64523 2078
91447636
A
2079/*
2080 * Routine: lck_mtx_alloc_init
2081 */
2082lck_mtx_t *
2083lck_mtx_alloc_init(
cb323159
A
2084 lck_grp_t *grp,
2085 lck_attr_t *attr)
91447636 2086{
cb323159
A
2087 lck_mtx_t *lck;
2088#ifdef MUTEX_ZONE
2089 if ((lck = (lck_mtx_t *)zalloc(lck_mtx_zone)) != 0)
6d2010ae
A
2090 lck_mtx_init(lck, grp, attr);
2091#else
cb323159 2092 if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0)
91447636 2093 lck_mtx_init(lck, grp, attr);
0a7de745 2094#endif
cb323159 2095 return(lck);
91447636
A
2096}
2097
2098/*
2099 * Routine: lck_mtx_free
2100 */
2101void
2102lck_mtx_free(
cb323159
A
2103 lck_mtx_t *lck,
2104 lck_grp_t *grp)
91447636
A
2105{
2106 lck_mtx_destroy(lck, grp);
cb323159 2107#ifdef MUTEX_ZONE
6d2010ae
A
2108 zfree(lck_mtx_zone, lck);
2109#else
91447636 2110 kfree(lck, sizeof(lck_mtx_t));
6d2010ae 2111#endif
91447636
A
2112}
2113
2114/*
2115 * Routine: lck_mtx_ext_init
2116 */
2117static void
2118lck_mtx_ext_init(
cb323159
A
2119 lck_mtx_ext_t *lck,
2120 lck_grp_t *grp,
2121 lck_attr_t *attr)
91447636 2122{
2d21ac55 2123 bzero((void *)lck, sizeof(lck_mtx_ext_t));
91447636
A
2124
2125 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
91447636
A
2126 lck->lck_mtx_deb.type = MUTEX_TAG;
2127 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
2128 }
2129
2130 lck->lck_mtx_grp = grp;
2d21ac55 2131
cb323159 2132 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
6d2010ae 2133 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
b0d623f7 2134
6d2010ae 2135 lck->lck_mtx.lck_mtx_is_ext = 1;
39037602 2136 lck->lck_mtx.lck_mtx_pad32 = 0xFFFFFFFF;
91447636
A
2137}
2138
2139/*
2140 * Routine: lck_mtx_init
2141 */
2142void
2143lck_mtx_init(
cb323159
A
2144 lck_mtx_t *lck,
2145 lck_grp_t *grp,
2146 lck_attr_t *attr)
91447636 2147{
cb323159
A
2148 lck_mtx_ext_t *lck_ext;
2149 lck_attr_t *lck_attr;
2d21ac55 2150
cb323159 2151 if (attr != LCK_ATTR_NULL)
2d21ac55 2152 lck_attr = attr;
cb323159 2153 else
2d21ac55 2154 lck_attr = &LockDefaultLckAttr;
91447636 2155
2d21ac55 2156 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
91447636 2157 if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) {
0a7de745 2158 lck_mtx_ext_init(lck_ext, grp, lck_attr);
91447636
A
2159 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2160 lck->lck_mtx_ptr = lck_ext;
2161 }
2162 } else {
b0d623f7 2163 lck->lck_mtx_owner = 0;
6d2010ae 2164 lck->lck_mtx_state = 0;
91447636 2165 }
39037602 2166 lck->lck_mtx_pad32 = 0xFFFFFFFF;
91447636
A
2167 lck_grp_reference(grp);
2168 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2169}
2170
2d21ac55
A
2171/*
2172 * Routine: lck_mtx_init_ext
2173 */
2174void
2175lck_mtx_init_ext(
cb323159
A
2176 lck_mtx_t *lck,
2177 lck_mtx_ext_t *lck_ext,
2178 lck_grp_t *grp,
2179 lck_attr_t *attr)
2d21ac55 2180{
cb323159 2181 lck_attr_t *lck_attr;
2d21ac55 2182
cb323159 2183 if (attr != LCK_ATTR_NULL)
2d21ac55 2184 lck_attr = attr;
cb323159 2185 else
2d21ac55
A
2186 lck_attr = &LockDefaultLckAttr;
2187
2188 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2189 lck_mtx_ext_init(lck_ext, grp, lck_attr);
2190 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2191 lck->lck_mtx_ptr = lck_ext;
2192 } else {
b0d623f7 2193 lck->lck_mtx_owner = 0;
6d2010ae 2194 lck->lck_mtx_state = 0;
2d21ac55 2195 }
39037602 2196 lck->lck_mtx_pad32 = 0xFFFFFFFF;
6d2010ae 2197
2d21ac55
A
2198 lck_grp_reference(grp);
2199 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2200}
2201
d9a64523
A
2202static void
2203lck_mtx_lock_mark_destroyed(
2204 lck_mtx_t *mutex,
2205 boolean_t indirect)
2206{
2207 uint32_t state;
2208
2209 if (indirect) {
2210 /* convert to destroyed state */
2211 ordered_store_mtx_state_release(mutex, LCK_MTX_TAG_DESTROYED);
2212 return;
2213 }
2214
2215 state = ordered_load_mtx_state(mutex);
2216 lck_mtx_interlock_lock(mutex, &state);
2217
2218 ordered_store_mtx_state_release(mutex, LCK_MTX_TAG_DESTROYED);
2219
2220 enable_preemption();
2221}
2222
91447636
A
2223/*
2224 * Routine: lck_mtx_destroy
2225 */
2226void
2227lck_mtx_destroy(
cb323159
A
2228 lck_mtx_t *lck,
2229 lck_grp_t *grp)
91447636 2230{
d9a64523 2231 boolean_t indirect;
0a7de745 2232
cb323159 2233 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
91447636 2234 return;
39236c6e
A
2235#if MACH_LDEBUG
2236 lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
2237#endif
d9a64523 2238 indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
b0d623f7 2239
d9a64523 2240 lck_mtx_lock_mark_destroyed(lck, indirect);
b0d623f7 2241
cb323159 2242 if (indirect)
91447636
A
2243 kfree(lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t));
2244 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
2245 lck_grp_deallocate(grp);
2246 return;
2247}
2248
b0d623f7 2249
d9a64523
A
2250#if DEVELOPMENT | DEBUG
2251__attribute__((noinline))
2252void
2253lck_mtx_owner_check_panic(
2254 lck_mtx_t *lock)
2255{
2256 thread_t owner = (thread_t)lock->lck_mtx_owner;
2257 panic("Mutex unlock attempted from non-owner thread. Owner=%p lock=%p", owner, lock);
2258}
2259#endif
2260
2261__attribute__((always_inline))
2262static boolean_t
2263get_indirect_mutex(
2264 lck_mtx_t **lock,
cb323159 2265 uint32_t *state)
d9a64523
A
2266{
2267 *lock = &((*lock)->lck_mtx_ptr->lck_mtx);
2268 *state = ordered_load_mtx_state(*lock);
2269 return TRUE;
2270}
2271
2272/*
cb323159 2273 * Routine: lck_mtx_unlock_slow
d9a64523
A
2274 *
2275 * Unlocks a mutex held by current thread.
2276 *
cb323159 2277 * It will wake up waiters if necessary.
d9a64523
A
2278 *
2279 * Interlock can be held.
2280 */
2281__attribute__((noinline))
2282void
2283lck_mtx_unlock_slow(
cb323159 2284 lck_mtx_t *lock)
d9a64523 2285{
cb323159
A
2286 thread_t thread;
2287 uint32_t state, prev;
2288 boolean_t indirect = FALSE;
d9a64523
A
2289
2290 state = ordered_load_mtx_state(lock);
2291
2292 /* Is this an indirect mutex? */
2293 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
2294 indirect = get_indirect_mutex(&lock, &state);
2295 }
2296
2297 thread = current_thread();
2298
2299#if DEVELOPMENT | DEBUG
2300 thread_t owner = (thread_t)lock->lck_mtx_owner;
cb323159
A
2301 if(__improbable(owner != thread))
2302 lck_mtx_owner_check_panic(lock);
d9a64523
A
2303#endif
2304
2305 /* check if it is held as a spinlock */
cb323159 2306 if (__improbable((state & LCK_MTX_MLOCKED_MSK) == 0))
d9a64523
A
2307 goto unlock;
2308
2309 lck_mtx_interlock_lock_clear_flags(lock, LCK_MTX_MLOCKED_MSK, &state);
2310
2311unlock:
2312 /* preemption disabled, interlock held and mutex not held */
2313
2314 /* clear owner */
2315 ordered_store_mtx_owner(lock, 0);
2316 /* keep original state in prev for later evaluation */
2317 prev = state;
d9a64523 2318
cb323159 2319 if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
0a7de745 2320#if MACH_LDEBUG
cb323159
A
2321 if (thread)
2322 thread->mutex_count--;
2323#endif
2324 return lck_mtx_unlock_wakeup_tail(lock, state, indirect);
0a7de745 2325 }
d9a64523 2326
cb323159
A
2327 /* release interlock, promotion and clear spin flag */
2328 state &= (~(LCK_MTX_ILOCKED_MSK | LCK_MTX_SPIN_MSK));
2329 ordered_store_mtx_state_release(lock, state); /* since I own the interlock, I don't need an atomic update */
2330
2331#if MACH_LDEBUG
2332 /* perform lock statistics after drop to prevent delay */
2333 if (thread)
2334 thread->mutex_count--; /* lock statistic */
2335#endif /* MACH_LDEBUG */
d9a64523
A
2336
2337 /* re-enable preemption */
2338 lck_mtx_unlock_finish_inline(lock, FALSE);
2339
2340 return;
2341}
2342
cb323159
A
2343#define LCK_MTX_LCK_WAIT_CODE 0x20
2344#define LCK_MTX_LCK_WAKEUP_CODE 0x21
2345#define LCK_MTX_LCK_SPIN_CODE 0x22
2346#define LCK_MTX_LCK_ACQUIRE_CODE 0x23
2347#define LCK_MTX_LCK_DEMOTE_CODE 0x24
b0d623f7 2348
b0d623f7 2349/*
d9a64523 2350 * Routine: lck_mtx_unlock_wakeup_tail
b0d623f7 2351 *
d9a64523
A
2352 * Invoked on unlock when there is
2353 * contention, i.e. the assembly routine sees
cb323159 2354 * that mutex->lck_mtx_waiters != 0
b0d623f7 2355 *
6d2010ae 2356 * neither the mutex or interlock is held
d9a64523
A
2357 *
2358 * Note that this routine might not be called if there are pending
2359 * waiters which have previously been woken up, and they didn't
2360 * end up boosting the old owner.
2361 *
2362 * assembly routine previously did the following to mutex:
2363 * (after saving the state in prior_lock_state)
d9a64523
A
2364 * decremented lck_mtx_waiters if nonzero
2365 *
2366 * This function needs to be called as a tail call
2367 * to optimize the compiled code.
b0d623f7 2368 */
d9a64523
A
2369__attribute__((noinline))
2370static void
cb323159
A
2371lck_mtx_unlock_wakeup_tail (
2372 lck_mtx_t *mutex,
2373 uint32_t state,
2374 boolean_t indirect)
b0d623f7 2375{
cb323159 2376 struct turnstile *ts;
6d2010ae 2377
cb323159
A
2378 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
2379 kern_return_t did_wake;
6d2010ae
A
2380
2381 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
cb323159 2382 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
b0d623f7 2383
cb323159 2384 ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
d9a64523 2385
cb323159
A
2386 if (mutex->lck_mtx_waiters > 1) {
2387 /* WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor on the wokenup thread */
2388 did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
2389 } else {
2390 did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
2391 turnstile_update_inheritor(ts, NULL, TURNSTILE_IMMEDIATE_UPDATE);
6d2010ae 2392 }
cb323159 2393 assert(did_wake == KERN_SUCCESS);
b0d623f7 2394
cb323159
A
2395 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2396 turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
b0d623f7 2397
cb323159
A
2398 state -= LCK_MTX_WAITER;
2399 state &= (~(LCK_MTX_SPIN_MSK | LCK_MTX_ILOCKED_MSK));
2400 ordered_store_mtx_state_release(mutex, state);
b0d623f7 2401
cb323159 2402 assert(current_thread()->turnstile != NULL);
b0d623f7 2403
cb323159 2404 turnstile_cleanup();
d9a64523 2405
6d2010ae 2406 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
cb323159 2407 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
b0d623f7 2408
d9a64523
A
2409 lck_mtx_unlock_finish_inline(mutex, indirect);
2410}
b0d623f7
A
2411
2412/*
cb323159 2413 * Routine: lck_mtx_lock_acquire_x86
b0d623f7
A
2414 *
2415 * Invoked on acquiring the mutex when there is
6d2010ae 2416 * contention (i.e. the assembly routine sees that
cb323159 2417 * that mutex->lck_mtx_waiters != 0
6d2010ae
A
2418 *
2419 * mutex is owned... interlock is held... preemption is disabled
b0d623f7 2420 */
d9a64523
A
2421__attribute__((always_inline))
2422static void
2423lck_mtx_lock_acquire_inline(
cb323159
A
2424 lck_mtx_t *mutex,
2425 struct turnstile *ts)
b0d623f7 2426{
cb323159 2427 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
b0d623f7 2428
6d2010ae 2429 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
cb323159 2430 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
b0d623f7 2431
d9a64523 2432 thread_t thread = (thread_t)mutex->lck_mtx_owner; /* faster than current_thread() */
cb323159 2433 assert(thread->waiting_for_mutex == NULL);
b0d623f7 2434
cb323159
A
2435 if (mutex->lck_mtx_waiters > 0) {
2436 if (ts == NULL) {
2437 ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
0a7de745 2438 }
d9a64523 2439
cb323159
A
2440 turnstile_update_inheritor(ts, thread, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
2441 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2442 }
d9a64523 2443
cb323159
A
2444 if (ts != NULL) {
2445 turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
2446 }
d9a64523 2447
cb323159 2448 assert(current_thread()->turnstile != NULL);
d9a64523 2449
6d2010ae 2450 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
cb323159 2451 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
b0d623f7
A
2452}
2453
d9a64523
A
2454void
2455lck_mtx_lock_acquire_x86(
cb323159 2456 lck_mtx_t *mutex)
d9a64523 2457{
cb323159 2458 return lck_mtx_lock_acquire_inline(mutex, NULL);
d9a64523
A
2459}
2460
2461/*
2462 * Tail call helpers for lock functions that perform
2463 * lck_mtx_lock_acquire followed by the caller's finish routine, to optimize
2464 * the caller's compiled code.
2465 */
b0d623f7 2466
d9a64523
A
2467__attribute__((noinline))
2468static void
2469lck_mtx_lock_acquire_tail(
cb323159
A
2470 lck_mtx_t *mutex,
2471 boolean_t indirect,
2472 struct turnstile *ts)
d9a64523 2473{
cb323159
A
2474 lck_mtx_lock_acquire_inline(mutex, ts);
2475 lck_mtx_lock_finish_inline_with_cleanup(mutex, ordered_load_mtx_state(mutex), indirect);
d9a64523
A
2476}
2477
2478__attribute__((noinline))
2479static boolean_t
2480lck_mtx_try_lock_acquire_tail(
cb323159 2481 lck_mtx_t *mutex)
d9a64523 2482{
cb323159 2483 lck_mtx_lock_acquire_inline(mutex, NULL);
d9a64523
A
2484 lck_mtx_try_lock_finish_inline(mutex, ordered_load_mtx_state(mutex));
2485
2486 return TRUE;
2487}
2488
2489__attribute__((noinline))
2490static void
2491lck_mtx_convert_spin_acquire_tail(
cb323159 2492 lck_mtx_t *mutex)
d9a64523 2493{
cb323159 2494 lck_mtx_lock_acquire_inline(mutex, NULL);
d9a64523
A
2495 lck_mtx_convert_spin_finish_inline(mutex, ordered_load_mtx_state(mutex));
2496}
2497
2498boolean_t
2499lck_mtx_ilk_unlock(
2500 lck_mtx_t *mutex)
2501{
2502 lck_mtx_ilk_unlock_inline(mutex, ordered_load_mtx_state(mutex));
2503 return TRUE;
2504}
2505
2506static inline void
2507lck_mtx_interlock_lock_set_and_clear_flags(
2508 lck_mtx_t *mutex,
2509 uint32_t xor_flags,
2510 uint32_t and_flags,
2511 uint32_t *new_state)
3e170ce0 2512{
d9a64523
A
2513 uint32_t state, prev;
2514 state = *new_state;
2515
cb323159 2516 for ( ; ; ) {
d9a64523
A
2517 /* have to wait for interlock to clear */
2518 while (__improbable(state & (LCK_MTX_ILOCKED_MSK | xor_flags))) {
2519 cpu_pause();
2520 state = ordered_load_mtx_state(mutex);
2521 }
2522 prev = state; /* prev contains snapshot for exchange */
2523 state |= LCK_MTX_ILOCKED_MSK | xor_flags; /* pick up interlock */
cb323159 2524 state &= ~and_flags; /* clear flags */
d9a64523
A
2525
2526 disable_preemption();
cb323159 2527 if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire))
d9a64523
A
2528 break;
2529 enable_preemption();
2530 cpu_pause();
2531 state = ordered_load_mtx_state(mutex);
2532 }
2533 *new_state = state;
2534 return;
2535}
2536
2537static inline void
2538lck_mtx_interlock_lock_clear_flags(
2539 lck_mtx_t *mutex,
2540 uint32_t and_flags,
2541 uint32_t *new_state)
2542{
2543 return lck_mtx_interlock_lock_set_and_clear_flags(mutex, 0, and_flags, new_state);
2544}
2545
2546static inline void
2547lck_mtx_interlock_lock(
2548 lck_mtx_t *mutex,
2549 uint32_t *new_state)
2550{
2551 return lck_mtx_interlock_lock_set_and_clear_flags(mutex, 0, 0, new_state);
2552}
2553
2554static inline int
2555lck_mtx_interlock_try_lock_set_flags(
2556 lck_mtx_t *mutex,
2557 uint32_t or_flags,
2558 uint32_t *new_state)
2559{
2560 uint32_t state, prev;
2561 state = *new_state;
2562
2563 /* have to wait for interlock to clear */
2564 if (state & (LCK_MTX_ILOCKED_MSK | or_flags)) {
2565 return 0;
2566 }
cb323159
A
2567 prev = state; /* prev contains snapshot for exchange */
2568 state |= LCK_MTX_ILOCKED_MSK | or_flags; /* pick up interlock */
d9a64523 2569 disable_preemption();
cb323159
A
2570 if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
2571 *new_state = state;
2572 return 1;
d9a64523
A
2573 }
2574
2575 enable_preemption();
2576 return 0;
2577}
2578
2579static inline int
2580lck_mtx_interlock_try_lock(
2581 lck_mtx_t *mutex,
2582 uint32_t *new_state)
2583{
2584 return lck_mtx_interlock_try_lock_set_flags(mutex, 0, new_state);
2585}
2586
2587static inline int
2588lck_mtx_interlock_try_lock_disable_interrupts(
2589 lck_mtx_t *mutex,
2590 boolean_t *istate)
2591{
cb323159 2592 uint32_t state;
3e170ce0
A
2593
2594 *istate = ml_set_interrupts_enabled(FALSE);
d9a64523 2595 state = ordered_load_mtx_state(mutex);
3e170ce0 2596
d9a64523
A
2597 if (lck_mtx_interlock_try_lock(mutex, &state)) {
2598 return 1;
2599 } else {
3e170ce0 2600 ml_set_interrupts_enabled(*istate);
d9a64523
A
2601 return 0;
2602 }
3e170ce0
A
2603}
2604
d9a64523
A
2605static inline void
2606lck_mtx_interlock_unlock_enable_interrupts(
2607 lck_mtx_t *mutex,
2608 boolean_t istate)
2609{
3e170ce0
A
2610 lck_mtx_ilk_unlock(mutex);
2611 ml_set_interrupts_enabled(istate);
2612}
2613
d9a64523
A
2614__attribute__((noinline))
2615static void
2616lck_mtx_lock_contended(
2617 lck_mtx_t *lock,
2618 boolean_t indirect,
2619 boolean_t *first_miss)
2620{
2621 lck_mtx_spinwait_ret_type_t ret;
2622 uint32_t state;
2623 thread_t thread;
cb323159 2624 struct turnstile *ts = NULL;
d9a64523
A
2625
2626try_again:
2627
2628 if (indirect) {
0a7de745 2629 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, first_miss);
d9a64523
A
2630 }
2631
2632 ret = lck_mtx_lock_spinwait_x86(lock);
2633 state = ordered_load_mtx_state(lock);
2634 switch (ret) {
2635 case LCK_MTX_SPINWAIT_NO_SPIN:
2636 /*
2637 * owner not on core, lck_mtx_lock_spinwait_x86 didn't even
2638 * try to spin.
2639 */
2640 if (indirect) {
0a7de745 2641 lck_grp_mtx_update_direct_wait((struct _lck_mtx_ext_*)lock);
d9a64523
A
2642 }
2643
cb323159 2644 /* just fall through case LCK_MTX_SPINWAIT_SPUN */
d9a64523
A
2645 case LCK_MTX_SPINWAIT_SPUN:
2646 /*
2647 * mutex not acquired but lck_mtx_lock_spinwait_x86 tried to spin
2648 * interlock not held
2649 */
2650 lck_mtx_interlock_lock(lock, &state);
2651 assert(state & LCK_MTX_ILOCKED_MSK);
2652
2653 if (state & LCK_MTX_MLOCKED_MSK) {
2654 if (indirect) {
0a7de745 2655 lck_grp_mtx_update_wait((struct _lck_mtx_ext_*)lock, first_miss);
d9a64523 2656 }
cb323159 2657 lck_mtx_lock_wait_x86(lock, &ts);
d9a64523
A
2658 /*
2659 * interlock is not held here.
2660 */
2661 goto try_again;
2662 } else {
cb323159 2663
d9a64523
A
2664 /* grab the mutex */
2665 state |= LCK_MTX_MLOCKED_MSK;
2666 ordered_store_mtx_state_release(lock, state);
2667 thread = current_thread();
2668 ordered_store_mtx_owner(lock, (uintptr_t)thread);
2669#if MACH_LDEBUG
2670 if (thread) {
2671 thread->mutex_count++;
2672 }
2673#endif /* MACH_LDEBUG */
2674 }
2675
2676 break;
2677 case LCK_MTX_SPINWAIT_ACQUIRED:
2678 /*
2679 * mutex has been acquired by lck_mtx_lock_spinwait_x86
2680 * interlock is held and preemption disabled
2681 * owner is set and mutex marked as locked
2682 * statistics updated too
2683 */
2684 break;
2685 default:
2686 panic("lck_mtx_lock_spinwait_x86 returned %d for mutex %p\n", ret, lock);
2687 }
2688
2689 /*
2690 * interlock is already acquired here
2691 */
2692
2693 /* mutex has been acquired */
2694 thread = (thread_t)lock->lck_mtx_owner;
cb323159
A
2695 if (state & LCK_MTX_WAITERS_MSK) {
2696 /*
2697 * lck_mtx_lock_acquire_tail will call
2698 * turnstile_complete.
2699 */
2700 return lck_mtx_lock_acquire_tail(lock, indirect, ts);
d9a64523
A
2701 }
2702
cb323159
A
2703 if (ts != NULL) {
2704 turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
2705 }
2706
2707 assert(current_thread()->turnstile != NULL);
2708
d9a64523 2709 /* release the interlock */
cb323159 2710 lck_mtx_lock_finish_inline_with_cleanup(lock, ordered_load_mtx_state(lock), indirect);
d9a64523
A
2711}
2712
2713/*
2714 * Helper noinline functions for calling
2715 * panic to optimize compiled code.
2716 */
2717
cb323159 2718__attribute__((noinline)) __abortlike
d9a64523
A
2719static void
2720lck_mtx_destroyed(
2721 lck_mtx_t *lock)
2722{
2723 panic("trying to interlock destroyed mutex (%p)", lock);
2724}
2725
2726__attribute__((noinline))
2727static boolean_t
2728lck_mtx_try_destroyed(
2729 lck_mtx_t *lock)
2730{
2731 panic("trying to interlock destroyed mutex (%p)", lock);
2732 return FALSE;
2733}
2734
2735__attribute__((always_inline))
2736static boolean_t
2737lck_mtx_lock_wait_interlock_to_clear(
2738 lck_mtx_t *lock,
2739 uint32_t* new_state)
2740{
2741 uint32_t state;
2742
cb323159 2743 for ( ; ; ) {
d9a64523
A
2744 cpu_pause();
2745 state = ordered_load_mtx_state(lock);
2746 if (!(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
2747 *new_state = state;
2748 return TRUE;
2749 }
2750 if (state & LCK_MTX_MLOCKED_MSK) {
2751 /* if it is held as mutex, just fail */
2752 return FALSE;
2753 }
2754 }
2755}
2756
2757__attribute__((always_inline))
2758static boolean_t
2759lck_mtx_try_lock_wait_interlock_to_clear(
2760 lck_mtx_t *lock,
2761 uint32_t* new_state)
2762{
2763 uint32_t state;
2764
cb323159 2765 for ( ; ; ) {
d9a64523
A
2766 cpu_pause();
2767 state = ordered_load_mtx_state(lock);
2768 if (state & (LCK_MTX_MLOCKED_MSK | LCK_MTX_SPIN_MSK)) {
2769 /* if it is held as mutex or spin, just fail */
2770 return FALSE;
2771 }
2772 if (!(state & LCK_MTX_ILOCKED_MSK)) {
2773 *new_state = state;
2774 return TRUE;
2775 }
2776 }
2777}
2778
2779/*
2780 * Routine: lck_mtx_lock_slow
2781 *
2782 * Locks a mutex for current thread.
2783 * If the lock is contended this function might
2784 * sleep.
2785 *
2786 * Called with interlock not held.
2787 */
2788__attribute__((noinline))
2789void
2790lck_mtx_lock_slow(
2791 lck_mtx_t *lock)
2792{
cb323159
A
2793 boolean_t indirect = FALSE;
2794 uint32_t state;
2795 int first_miss = 0;
d9a64523
A
2796
2797 state = ordered_load_mtx_state(lock);
2798
2799 /* is the interlock or mutex held */
2800 if (__improbable(state & ((LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))) {
2801 /*
2802 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
2803 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
2804 * set in state (state == lck_mtx_tag)
2805 */
2806
2807
2808 /* is the mutex already held and not indirect */
cb323159 2809 if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))){
d9a64523
A
2810 /* no, must have been the mutex */
2811 return lck_mtx_lock_contended(lock, indirect, &first_miss);
2812 }
2813
2814 /* check to see if it is marked destroyed */
2815 if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
cb323159 2816 lck_mtx_destroyed(lock);
d9a64523
A
2817 }
2818
2819 /* Is this an indirect mutex? */
2820 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
2821 indirect = get_indirect_mutex(&lock, &state);
2822
2823 first_miss = 0;
0a7de745 2824 lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
d9a64523
A
2825
2826 if (state & LCK_MTX_SPIN_MSK) {
cb323159 2827 /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
d9a64523 2828 assert(state & LCK_MTX_ILOCKED_MSK);
0a7de745 2829 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
d9a64523
A
2830 }
2831 }
2832
2833 if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
2834 return lck_mtx_lock_contended(lock, indirect, &first_miss);
2835 }
2836 }
2837
2838 /* no - can't be INDIRECT, DESTROYED or locked */
2839 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
2840 if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
2841 return lck_mtx_lock_contended(lock, indirect, &first_miss);
2842 }
2843 }
2844
2845 /* lock and interlock acquired */
2846
2847 thread_t thread = current_thread();
2848 /* record owner of mutex */
2849 ordered_store_mtx_owner(lock, (uintptr_t)thread);
2850
2851#if MACH_LDEBUG
2852 if (thread) {
cb323159 2853 thread->mutex_count++; /* lock statistic */
d9a64523
A
2854 }
2855#endif
2856 /*
2857 * Check if there are waiters to
2858 * inherit their priority.
2859 */
2860 if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
cb323159 2861 return lck_mtx_lock_acquire_tail(lock, indirect, NULL);
d9a64523
A
2862 }
2863
2864 /* release the interlock */
2865 lck_mtx_lock_finish_inline(lock, ordered_load_mtx_state(lock), indirect);
2866
2867 return;
2868}
2869
2870__attribute__((noinline))
2871boolean_t
2872lck_mtx_try_lock_slow(
2873 lck_mtx_t *lock)
2874{
2875 boolean_t indirect = FALSE;
2876 uint32_t state;
2877 int first_miss = 0;
2878
2879 state = ordered_load_mtx_state(lock);
2880
2881 /* is the interlock or mutex held */
2882 if (__improbable(state & ((LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))) {
2883 /*
2884 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
2885 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
2886 * set in state (state == lck_mtx_tag)
2887 */
2888
2889 /* is the mutex already held and not indirect */
cb323159 2890 if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))){
d9a64523
A
2891 return FALSE;
2892 }
2893
2894 /* check to see if it is marked destroyed */
2895 if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
cb323159 2896 lck_mtx_try_destroyed(lock);
d9a64523
A
2897 }
2898
2899 /* Is this an indirect mutex? */
2900 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
2901 indirect = get_indirect_mutex(&lock, &state);
2902
2903 first_miss = 0;
0a7de745 2904 lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
d9a64523
A
2905 }
2906
2907 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
cb323159 2908 if (indirect)
0a7de745 2909 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
d9a64523
A
2910 return FALSE;
2911 }
2912 }
2913
2914 /* no - can't be INDIRECT, DESTROYED or locked */
2915 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
2916 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
cb323159 2917 if (indirect)
0a7de745 2918 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
d9a64523
A
2919 return FALSE;
2920 }
2921 }
2922
2923 /* lock and interlock acquired */
2924
2925 thread_t thread = current_thread();
2926 /* record owner of mutex */
2927 ordered_store_mtx_owner(lock, (uintptr_t)thread);
2928
2929#if MACH_LDEBUG
2930 if (thread) {
cb323159 2931 thread->mutex_count++; /* lock statistic */
d9a64523
A
2932 }
2933#endif
2934 /*
2935 * Check if there are waiters to
2936 * inherit their priority.
2937 */
2938 if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
2939 return lck_mtx_try_lock_acquire_tail(lock);
2940 }
2941
2942 /* release the interlock */
2943 lck_mtx_try_lock_finish_inline(lock, ordered_load_mtx_state(lock));
2944
2945 return TRUE;
cb323159 2946
d9a64523
A
2947}
2948
2949__attribute__((noinline))
2950void
2951lck_mtx_lock_spin_slow(
cb323159 2952 lck_mtx_t *lock)
d9a64523
A
2953{
2954 boolean_t indirect = FALSE;
2955 uint32_t state;
2956 int first_miss = 0;
2957
2958 state = ordered_load_mtx_state(lock);
2959
2960 /* is the interlock or mutex held */
2961 if (__improbable(state & ((LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))) {
2962 /*
2963 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
2964 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
2965 * set in state (state == lck_mtx_tag)
2966 */
2967
2968
2969 /* is the mutex already held and not indirect */
cb323159 2970 if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))){
d9a64523
A
2971 /* no, must have been the mutex */
2972 return lck_mtx_lock_contended(lock, indirect, &first_miss);
2973 }
2974
2975 /* check to see if it is marked destroyed */
2976 if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
cb323159 2977 lck_mtx_destroyed(lock);
d9a64523
A
2978 }
2979
2980 /* Is this an indirect mutex? */
2981 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
2982 indirect = get_indirect_mutex(&lock, &state);
2983
2984 first_miss = 0;
0a7de745 2985 lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
d9a64523
A
2986
2987 if (state & LCK_MTX_SPIN_MSK) {
cb323159 2988 /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
d9a64523 2989 assert(state & LCK_MTX_ILOCKED_MSK);
0a7de745 2990 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
d9a64523
A
2991 }
2992 }
2993
2994 if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
2995 return lck_mtx_lock_contended(lock, indirect, &first_miss);
2996 }
2997 }
2998
2999 /* no - can't be INDIRECT, DESTROYED or locked */
cb323159 3000 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state) )) {
d9a64523
A
3001 if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
3002 return lck_mtx_lock_contended(lock, indirect, &first_miss);
3003 }
3004 }
3005
3006 /* lock as spinlock and interlock acquired */
3007
3008 thread_t thread = current_thread();
3009 /* record owner of mutex */
3010 ordered_store_mtx_owner(lock, (uintptr_t)thread);
3011
3012#if MACH_LDEBUG
3013 if (thread) {
3014 thread->mutex_count++; /* lock statistic */
3015 }
3016#endif
3017
cb323159 3018#if CONFIG_DTRACE
d9a64523
A
3019 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
3020#endif
3021 /* return with the interlock held and preemption disabled */
3022 return;
3023}
3024
3025__attribute__((noinline))
3026boolean_t
3027lck_mtx_try_lock_spin_slow(
3028 lck_mtx_t *lock)
3029{
3030 boolean_t indirect = FALSE;
3031 uint32_t state;
3032 int first_miss = 0;
3033
3034 state = ordered_load_mtx_state(lock);
3035
3036 /* is the interlock or mutex held */
3037 if (__improbable(state & ((LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))) {
3038 /*
3039 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
3040 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
3041 * set in state (state == lck_mtx_tag)
3042 */
3043
3044 /* is the mutex already held and not indirect */
cb323159 3045 if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))){
d9a64523
A
3046 return FALSE;
3047 }
3048
3049 /* check to see if it is marked destroyed */
3050 if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
cb323159 3051 lck_mtx_try_destroyed(lock);
d9a64523
A
3052 }
3053
3054 /* Is this an indirect mutex? */
3055 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
3056 indirect = get_indirect_mutex(&lock, &state);
3057
3058 first_miss = 0;
0a7de745 3059 lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
d9a64523
A
3060 }
3061
3062 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
cb323159 3063 if (indirect)
0a7de745 3064 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
d9a64523
A
3065 return FALSE;
3066 }
3067 }
3068
3069 /* no - can't be INDIRECT, DESTROYED or locked */
3070 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
3071 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
cb323159 3072 if (indirect)
0a7de745 3073 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
d9a64523
A
3074 return FALSE;
3075 }
3076 }
3077
3078 /* lock and interlock acquired */
3079
3080 thread_t thread = current_thread();
3081 /* record owner of mutex */
3082 ordered_store_mtx_owner(lock, (uintptr_t)thread);
3083
3084#if MACH_LDEBUG
3085 if (thread) {
cb323159 3086 thread->mutex_count++; /* lock statistic */
d9a64523
A
3087 }
3088#endif
3089
3090#if CONFIG_DTRACE
3091 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, lock, 0);
3092#endif
3093 return TRUE;
cb323159 3094
d9a64523
A
3095}
3096
3097__attribute__((noinline))
3098void
3099lck_mtx_convert_spin(
cb323159 3100 lck_mtx_t *lock)
d9a64523
A
3101{
3102 uint32_t state;
3103
3104 state = ordered_load_mtx_state(lock);
3105
3106 /* Is this an indirect mutex? */
3107 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
3108 /* If so, take indirection */
3109 get_indirect_mutex(&lock, &state);
3110 }
3111
3112 assertf((thread_t)lock->lck_mtx_owner == current_thread(), "lock %p not owned by thread %p (current owner %p)", lock, current_thread(), (thread_t)lock->lck_mtx_owner );
3113
3114 if (__improbable(state & LCK_MTX_MLOCKED_MSK)) {
3115 /* already owned as a mutex, just return */
3116 return;
3117 }
3118
3119 assert(get_preemption_level() > 0);
3120 assert(state & LCK_MTX_ILOCKED_MSK);
3121 assert(state & LCK_MTX_SPIN_MSK);
3122
3123 /*
3124 * Check if there are waiters to
3125 * inherit their priority.
3126 */
3127 if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
3128 return lck_mtx_convert_spin_acquire_tail(lock);
3129 }
3130
3131 lck_mtx_convert_spin_finish_inline(lock, ordered_load_mtx_state(lock));
3132
3133 return;
3134}
3135
3136static inline boolean_t
3137lck_mtx_lock_grab_mutex(
cb323159 3138 lck_mtx_t *lock)
d9a64523
A
3139{
3140 uint32_t state;
3141
3142 state = ordered_load_mtx_state(lock);
3143
3144 if (!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state)) {
3145 return FALSE;
3146 }
3147
3148 /* lock and interlock acquired */
3149
3150 thread_t thread = current_thread();
3151 /* record owner of mutex */
3152 ordered_store_mtx_owner(lock, (uintptr_t)thread);
3153
3154#if MACH_LDEBUG
3155 if (thread) {
cb323159 3156 thread->mutex_count++; /* lock statistic */
d9a64523
A
3157 }
3158#endif
3159 return TRUE;
3160}
3161
3162__attribute__((noinline))
3163void
3164lck_mtx_assert(
cb323159
A
3165 lck_mtx_t *lock,
3166 unsigned int type)
d9a64523
A
3167{
3168 thread_t thread, owner;
3169 uint32_t state;
3170
3171 thread = current_thread();
3172 state = ordered_load_mtx_state(lock);
3173
3174 if (state == LCK_MTX_TAG_INDIRECT) {
3175 get_indirect_mutex(&lock, &state);
3176 }
3177
3178 owner = (thread_t)lock->lck_mtx_owner;
3179
3180 if (type == LCK_MTX_ASSERT_OWNED) {
cb323159 3181 if (owner != thread || !(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))
d9a64523
A
3182 panic("mutex (%p) not owned\n", lock);
3183 } else {
cb323159
A
3184 assert (type == LCK_MTX_ASSERT_NOTOWNED);
3185 if (owner == thread)
d9a64523
A
3186 panic("mutex (%p) owned\n", lock);
3187 }
3188}
b0d623f7 3189
91447636 3190/*
cb323159 3191 * Routine: lck_mtx_lock_spinwait_x86
0c530ab8
A
3192 *
3193 * Invoked trying to acquire a mutex when there is contention but
3194 * the holder is running on another processor. We spin for up to a maximum
3195 * time waiting for the lock to be released.
3196 *
3197 * Called with the interlock unlocked.
d9a64523
A
3198 * returns LCK_MTX_SPINWAIT_ACQUIRED if mutex acquired
3199 * returns LCK_MTX_SPINWAIT_SPUN if we spun
3200 * returns LCK_MTX_SPINWAIT_NO_SPIN if we didn't spin due to the holder not running
0c530ab8 3201 */
d9a64523
A
3202__attribute__((noinline))
3203lck_mtx_spinwait_ret_type_t
b0d623f7 3204lck_mtx_lock_spinwait_x86(
cb323159 3205 lck_mtx_t *mutex)
0c530ab8 3206{
cb323159
A
3207 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
3208 thread_t holder;
3209 uint64_t overall_deadline;
3210 uint64_t check_owner_deadline;
3211 uint64_t cur_time;
3212 lck_mtx_spinwait_ret_type_t retval = LCK_MTX_SPINWAIT_SPUN;
3213 int loopcount = 0;
0c530ab8 3214
6d2010ae 3215 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
cb323159 3216 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, 0, 0);
0c530ab8 3217
3e170ce0
A
3218 cur_time = mach_absolute_time();
3219 overall_deadline = cur_time + MutexSpin;
3220 check_owner_deadline = cur_time;
b0d623f7 3221
0c530ab8
A
3222 /*
3223 * Spin while:
3224 * - mutex is locked, and
b0d623f7 3225 * - its locked as a spin lock, and
0c530ab8 3226 * - owner is running on another processor, and
2d21ac55 3227 * - owner (processor) is not idling, and
0c530ab8
A
3228 * - we haven't spun for long enough.
3229 */
b0d623f7 3230 do {
6d2010ae 3231 if (__probable(lck_mtx_lock_grab_mutex(mutex))) {
d9a64523 3232 retval = LCK_MTX_SPINWAIT_ACQUIRED;
b0d623f7 3233 break;
2d21ac55 3234 }
3e170ce0 3235 cur_time = mach_absolute_time();
b0d623f7 3236
cb323159 3237 if (cur_time >= overall_deadline)
3e170ce0
A
3238 break;
3239
3240 if (cur_time >= check_owner_deadline && mutex->lck_mtx_owner) {
cb323159 3241 boolean_t istate;
3e170ce0 3242
d9a64523
A
3243 /*
3244 * We will repeatedly peek at the state of the lock while spinning,
3245 * and we will acquire the interlock to do so.
3246 * The thread that will unlock the mutex will also need to acquire
3247 * the interlock, and we want to avoid to slow it down.
3248 * To avoid to get an interrupt while holding the interlock
3249 * and increase the time we are holding it, we
3250 * will try to acquire the interlock with interrupts disabled.
3251 * This is safe because it is a "try_lock", if we can't acquire
3252 * the interlock we re-enable the interrupts and fail, so it is
3253 * ok to call it even if the interlock was already held.
cb323159 3254 */
d9a64523 3255 if (lck_mtx_interlock_try_lock_disable_interrupts(mutex, &istate)) {
cb323159 3256
3e170ce0 3257 if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) {
cb323159
A
3258
3259 if ( !(holder->machine.specFlags & OnProc) ||
3260 (holder->state & TH_IDLE)) {
3261
d9a64523 3262 lck_mtx_interlock_unlock_enable_interrupts(mutex, istate);
3e170ce0 3263
cb323159 3264 if (loopcount == 0)
d9a64523 3265 retval = LCK_MTX_SPINWAIT_NO_SPIN;
3e170ce0
A
3266 break;
3267 }
3268 }
d9a64523 3269 lck_mtx_interlock_unlock_enable_interrupts(mutex, istate);
3e170ce0
A
3270
3271 check_owner_deadline = cur_time + (MutexSpin / 4);
b0d623f7
A
3272 }
3273 }
3274 cpu_pause();
3275
3276 loopcount++;
cb323159 3277
3e170ce0 3278 } while (TRUE);
b0d623f7 3279
cb323159 3280#if CONFIG_DTRACE
2d21ac55 3281 /*
3e170ce0 3282 * We've already kept a count via overall_deadline of how long we spun.
2d21ac55
A
3283 * If dtrace is active, then we compute backwards to decide how
3284 * long we spun.
3285 *
3286 * Note that we record a different probe id depending on whether
cb323159 3287 * this is a direct or indirect mutex. This allows us to
2d21ac55
A
3288 * penalize only lock groups that have debug/stats enabled
3289 * with dtrace processing if desired.
3290 */
6d2010ae 3291 if (__probable(mutex->lck_mtx_is_ext == 0)) {
b0d623f7 3292 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex,
cb323159 3293 mach_absolute_time() - (overall_deadline - MutexSpin));
2d21ac55 3294 } else {
b0d623f7 3295 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, mutex,
cb323159 3296 mach_absolute_time() - (overall_deadline - MutexSpin));
2d21ac55
A
3297 }
3298 /* The lockstat acquire event is recorded by the assembly code beneath us. */
3299#endif
b0d623f7 3300
6d2010ae 3301 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
cb323159 3302 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, retval, 0);
b0d623f7
A
3303
3304 return retval;
0c530ab8
A
3305}
3306
b0d623f7
A
3307
3308
0c530ab8 3309/*
cb323159 3310 * Routine: lck_mtx_lock_wait_x86
b0d623f7
A
3311 *
3312 * Invoked in order to wait on contention.
3313 *
3314 * Called with the interlock locked and
d9a64523 3315 * preemption disabled...
6d2010ae 3316 * returns it unlocked and with preemption enabled
d9a64523
A
3317 *
3318 * lck_mtx_waiters is 1:1 with a wakeup needing to occur.
3319 * A runnable waiter can exist between wait and acquire
3320 * without a waiters count being set.
3321 * This allows us to never make a spurious wakeup call.
3322 *
3323 * Priority:
3324 * This avoids taking the thread lock if the owning thread is the same priority.
3325 * This optimizes the case of same-priority threads contending on a lock.
3326 * However, that allows the owning thread to drop in priority while holding the lock,
3327 * because there is no state that the priority change can notice that
3328 * says that the targeted thread holds a contended mutex.
3329 *
3330 * One possible solution: priority changes could look for some atomic tag
3331 * on the thread saying 'holding contended lock', and then set up a promotion.
3332 * Needs a story for dropping that promotion - the last contended unlock
3333 * has to notice that this has happened.
0c530ab8 3334 */
d9a64523 3335__attribute__((noinline))
0c530ab8 3336void
cb323159
A
3337lck_mtx_lock_wait_x86 (
3338 lck_mtx_t *mutex,
3339 struct turnstile **ts)
0c530ab8 3340{
cb323159
A
3341 thread_t self = current_thread();
3342
3343#if CONFIG_DTRACE
d9a64523 3344 uint64_t sleep_start = 0;
b0d623f7
A
3345
3346 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
3347 sleep_start = mach_absolute_time();
3348 }
3349#endif
d9a64523
A
3350 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
3351
6d2010ae 3352 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
cb323159
A
3353 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner),
3354 mutex->lck_mtx_waiters, 0, 0);
b0d623f7 3355
cb323159
A
3356 assert(self->waiting_for_mutex == NULL);
3357 self->waiting_for_mutex = mutex;
3358 mutex->lck_mtx_waiters++;
39236c6e 3359
d9a64523 3360 thread_t holder = (thread_t)mutex->lck_mtx_owner;
d9a64523
A
3361 assert(holder != NULL);
3362
3363 /*
cb323159
A
3364 * lck_mtx_lock_wait_x86 might be called on a loop. Call prepare just once and reuse
3365 * the same turnstile while looping, the matching turnstile compleate will be called
3366 * by lck_mtx_lock_contended when finally acquiring the lock.
d9a64523 3367 */
cb323159
A
3368 if (*ts == NULL) {
3369 *ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
b0d623f7 3370 }
d9a64523 3371
cb323159 3372 struct turnstile *turnstile = *ts;
813fb2f6 3373 thread_set_pending_block_hint(self, kThreadWaitKernelMutex);
cb323159
A
3374 turnstile_update_inheritor(turnstile, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
3375
3376 waitq_assert_wait64(&turnstile->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
b0d623f7
A
3377
3378 lck_mtx_ilk_unlock(mutex);
3379
cb323159
A
3380 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
3381
b0d623f7
A
3382 thread_block(THREAD_CONTINUE_NULL);
3383
d9a64523
A
3384 self->waiting_for_mutex = NULL;
3385
6d2010ae 3386 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
cb323159
A
3387 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner),
3388 mutex->lck_mtx_waiters, 0, 0);
b0d623f7 3389
cb323159 3390#if CONFIG_DTRACE
b0d623f7
A
3391 /*
3392 * Record the Dtrace lockstat probe for blocking, block time
3393 * measured from when we were entered.
3394 */
3395 if (sleep_start) {
6d2010ae 3396 if (mutex->lck_mtx_is_ext == 0) {
b0d623f7
A
3397 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, mutex,
3398 mach_absolute_time() - sleep_start);
3399 } else {
3400 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, mutex,
3401 mach_absolute_time() - sleep_start);
3402 }
3403 }
3404#endif
0c530ab8 3405}
3e170ce0
A
3406
3407/*
3408 * Routine: kdp_lck_mtx_lock_spin_is_acquired
3409 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
3410 * Returns: TRUE if lock is acquired.
3411 */
3412boolean_t
cb323159 3413kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
3e170ce0
A
3414{
3415 if (not_in_kdp) {
3416 panic("panic: kdp_lck_mtx_lock_spin_is_acquired called outside of kernel debugger");
3417 }
3418
39037602 3419 if (lck->lck_mtx_ilocked || lck->lck_mtx_mlocked) {
3e170ce0
A
3420 return TRUE;
3421 }
3422
3423 return FALSE;
3424}
3425
813fb2f6
A
3426void
3427kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
3428{
3429 lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
3430 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
3431 thread_t holder = (thread_t)mutex->lck_mtx_owner;
3432 waitinfo->owner = thread_tid(holder);
3433}
3434
3435void
3436kdp_rwlck_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
3437{
3438 lck_rw_t *rwlck = NULL;
cb323159
A
3439 switch(waitinfo->wait_type) {
3440 case kThreadWaitKernelRWLockRead:
3441 rwlck = READ_EVENT_TO_RWLOCK(event);
3442 break;
3443 case kThreadWaitKernelRWLockWrite:
3444 case kThreadWaitKernelRWLockUpgrade:
3445 rwlck = WRITE_EVENT_TO_RWLOCK(event);
3446 break;
3447 default:
3448 panic("%s was called with an invalid blocking type", __FUNCTION__);
3449 break;
813fb2f6
A
3450 }
3451 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
3452 waitinfo->owner = 0;
3453}