]> git.saurik.com Git - apple/xnu.git/blame - osfmk/arm/locks_arm.c
xnu-6153.141.1.tar.gz
[apple/xnu.git] / osfmk / arm / locks_arm.c
CommitLineData
5ba3f43e 1/*
cb323159 2 * Copyright (c) 2007-2018 Apple Inc. All rights reserved.
5ba3f43e
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
33 * Mellon University All Rights Reserved.
0a7de745 34 *
5ba3f43e
A
35 * Permission to use, copy, modify and distribute this software and its
36 * documentation is hereby granted, provided that both the copyright notice
37 * and this permission notice appear in all copies of the software,
38 * derivative works or modified versions, and any portions thereof, and that
39 * both notices appear in supporting documentation.
0a7de745 40 *
5ba3f43e
A
41 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
42 * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
43 * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
0a7de745 44 *
5ba3f43e 45 * Carnegie Mellon requests users of this software to return to
0a7de745 46 *
5ba3f43e
A
47 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
48 * School of Computer Science Carnegie Mellon University Pittsburgh PA
49 * 15213-3890
0a7de745 50 *
5ba3f43e
A
51 * any improvements or extensions that they make and grant Carnegie Mellon the
52 * rights to redistribute these changes.
53 */
54/*
55 * File: kern/lock.c
56 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * Date: 1985
58 *
59 * Locking primitives implementation
60 */
61
5ba3f43e
A
62#define LOCK_PRIVATE 1
63
64#include <mach_ldebug.h>
65
66#include <kern/kalloc.h>
0a7de745 67#include <kern/lock_stat.h>
5ba3f43e
A
68#include <kern/locks.h>
69#include <kern/misc_protos.h>
70#include <kern/thread.h>
71#include <kern/processor.h>
72#include <kern/sched_prim.h>
5ba3f43e
A
73#include <kern/debug.h>
74#include <kern/kcdata.h>
75#include <string.h>
ea3f0419
A
76#include <arm/cpu_internal.h>
77#include <os/hash.h>
78#include <arm/cpu_data.h>
5ba3f43e
A
79
80#include <arm/cpu_data_internal.h>
81#include <arm/proc_reg.h>
82#include <arm/smp.h>
83#include <machine/atomic.h>
84#include <machine/machine_cpu.h>
85
86#include <sys/kdebug.h>
87
0a7de745
A
88#if CONFIG_DTRACE
89#define DTRACE_RW_SHARED 0x0 //reader
90#define DTRACE_RW_EXCL 0x1 //writer
91#define DTRACE_NO_FLAG 0x0 //not applicable
92#endif /* CONFIG_DTRACE */
5ba3f43e 93
0a7de745
A
94#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
95#define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
96#define LCK_RW_LCK_SHARED_CODE 0x102
97#define LCK_RW_LCK_SH_TO_EX_CODE 0x103
98#define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
99#define LCK_RW_LCK_EX_TO_SH_CODE 0x105
5ba3f43e
A
100
101
0a7de745 102#define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
5ba3f43e
A
103
104// Panic in tests that check lock usage correctness
105// These are undesirable when in a panic or a debugger is runnning.
106#define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
107
108unsigned int LcksOpts = 0;
109
0a7de745
A
110#define ADAPTIVE_SPIN_ENABLE 0x1
111
112#if __SMP__
113int lck_mtx_adaptive_spin_mode = ADAPTIVE_SPIN_ENABLE;
114#else /* __SMP__ */
115int lck_mtx_adaptive_spin_mode = 0;
116#endif /* __SMP__ */
117
118#define SPINWAIT_OWNER_CHECK_COUNT 4
119
120typedef enum {
121 SPINWAIT_ACQUIRED, /* Got the lock. */
122 SPINWAIT_INTERLOCK, /* Got the interlock, no owner, but caller must finish acquiring the lock. */
ea3f0419
A
123 SPINWAIT_DID_SPIN_HIGH_THR, /* Got the interlock, spun, but failed to get the lock. */
124 SPINWAIT_DID_SPIN_OWNER_NOT_CORE, /* Got the interlock, spun, but failed to get the lock. */
125 SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION, /* Got the interlock, spun, but failed to get the lock. */
126 SPINWAIT_DID_SPIN_SLIDING_THR,/* Got the interlock, spun, but failed to get the lock. */
0a7de745
A
127 SPINWAIT_DID_NOT_SPIN, /* Got the interlock, did not spin. */
128} spinwait_result_t;
129
5ba3f43e
A
130#if CONFIG_DTRACE && __SMP__
131extern uint64_t dtrace_spin_threshold;
132#endif
133
134/* Forwards */
135
5ba3f43e
A
136extern unsigned int not_in_kdp;
137
138/*
139 * We often want to know the addresses of the callers
140 * of the various lock routines. However, this information
141 * is only used for debugging and statistics.
142 */
143typedef void *pc_t;
0a7de745
A
144#define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
145#define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
5ba3f43e 146
0a7de745 147#ifdef lint
5ba3f43e
A
148/*
149 * Eliminate lint complaints about unused local pc variables.
150 */
0a7de745
A
151#define OBTAIN_PC(pc, l) ++pc
152#else /* lint */
153#define OBTAIN_PC(pc, l)
154#endif /* lint */
5ba3f43e
A
155
156
157/*
158 * Portable lock package implementation of usimple_locks.
159 */
160
5ba3f43e
A
161/*
162 * Owner thread pointer when lock held in spin mode
163 */
164#define LCK_MTX_SPIN_TAG 0xfffffff0
165
166
0a7de745
A
167#define interlock_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
168#define interlock_try(lock) hw_lock_bit_try((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
169#define interlock_unlock(lock) hw_unlock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
170#define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT, LCK_GRP_NULL)
171#define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
5ba3f43e 172
cb323159 173#define load_memory_barrier() os_atomic_thread_fence(acquire)
5ba3f43e
A
174
175// Enforce program order of loads and stores.
cb323159
A
176#define ordered_load(target) \
177 os_atomic_load(target, compiler_acq_rel)
178#define ordered_store(target, value) \
179 os_atomic_store(target, value, compiler_acq_rel)
180
181#define ordered_load_mtx(lock) ordered_load(&(lock)->lck_mtx_data)
182#define ordered_store_mtx(lock, value) ordered_store(&(lock)->lck_mtx_data, (value))
183#define ordered_load_rw(lock) ordered_load(&(lock)->lck_rw_data)
184#define ordered_store_rw(lock, value) ordered_store(&(lock)->lck_rw_data, (value))
185#define ordered_load_rw_owner(lock) ordered_load(&(lock)->lck_rw_owner)
186#define ordered_store_rw_owner(lock, value) ordered_store(&(lock)->lck_rw_owner, (value))
187#define ordered_load_hw(lock) ordered_load(&(lock)->lock_data)
188#define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, (value))
189#define ordered_load_bit(lock) ordered_load((lock))
190#define ordered_store_bit(lock, value) ordered_store((lock), (value))
5ba3f43e
A
191
192
193// Prevent the compiler from reordering memory operations around this
0a7de745 194#define compiler_memory_fence() __asm__ volatile ("" ::: "memory")
5ba3f43e 195
0a7de745
A
196#define LOCK_PANIC_TIMEOUT 0xc00000
197#define NOINLINE __attribute__((noinline))
5ba3f43e
A
198
199
200#if __arm__
201#define interrupts_disabled(mask) (mask & PSR_INTMASK)
202#else
203#define interrupts_disabled(mask) (mask & DAIF_IRQF)
204#endif
205
206
207#if __arm__
0a7de745
A
208#define enable_fiq() __asm__ volatile ("cpsie f" ::: "memory");
209#define enable_interrupts() __asm__ volatile ("cpsie if" ::: "memory");
5ba3f43e
A
210#endif
211
212/*
213 * Forward declarations
214 */
215
216static void lck_rw_lock_shared_gen(lck_rw_t *lck);
217static void lck_rw_lock_exclusive_gen(lck_rw_t *lck);
218static boolean_t lck_rw_lock_shared_to_exclusive_success(lck_rw_t *lck);
219static boolean_t lck_rw_lock_shared_to_exclusive_failure(lck_rw_t *lck, uint32_t prior_lock_state);
220static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t *lck, uint32_t prior_lock_state);
221static lck_rw_type_t lck_rw_done_gen(lck_rw_t *lck, uint32_t prior_lock_state);
5ba3f43e
A
222static boolean_t lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait);
223
224/*
225 * atomic exchange API is a low level abstraction of the operations
226 * to atomically read, modify, and write a pointer. This abstraction works
227 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
228 * well as the ARM exclusive instructions.
229 *
230 * atomic_exchange_begin() - begin exchange and retrieve current value
231 * atomic_exchange_complete() - conclude an exchange
232 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
233 */
cb323159
A
234__unused static uint32_t
235load_exclusive32(uint32_t *target, enum memory_order ord)
236{
237 uint32_t value;
238
239#if __arm__
240 if (memory_order_has_release(ord)) {
241 // Pre-load release barrier
242 atomic_thread_fence(memory_order_release);
243 }
244 value = __builtin_arm_ldrex(target);
245#else
246 if (memory_order_has_acquire(ord)) {
247 value = __builtin_arm_ldaex(target); // ldaxr
248 } else {
249 value = __builtin_arm_ldrex(target); // ldxr
250 }
251#endif // __arm__
252 return value;
253}
254
255__unused static boolean_t
256store_exclusive32(uint32_t *target, uint32_t value, enum memory_order ord)
257{
258 boolean_t err;
259
260#if __arm__
261 err = __builtin_arm_strex(value, target);
262 if (memory_order_has_acquire(ord)) {
263 // Post-store acquire barrier
264 atomic_thread_fence(memory_order_acquire);
265 }
266#else
267 if (memory_order_has_release(ord)) {
268 err = __builtin_arm_stlex(value, target); // stlxr
269 } else {
270 err = __builtin_arm_strex(value, target); // stxr
271 }
272#endif // __arm__
273 return !err;
274}
275
5ba3f43e
A
276static uint32_t
277atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
278{
0a7de745 279 uint32_t val;
5ba3f43e 280
cb323159
A
281#if __ARM_ATOMICS_8_1
282 ord = memory_order_relaxed;
283#endif
5ba3f43e
A
284 val = load_exclusive32(target, ord);
285 *previous = val;
286 return val;
287}
288
289static boolean_t
290atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
291{
cb323159
A
292#if __ARM_ATOMICS_8_1
293 return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
294#else
0a7de745 295 (void)previous; // Previous not needed, monitor is held
5ba3f43e 296 return store_exclusive32(target, newval, ord);
cb323159 297#endif
5ba3f43e
A
298}
299
300static void
301atomic_exchange_abort(void)
302{
cb323159 303 os_atomic_clear_exclusive();
5ba3f43e
A
304}
305
306static boolean_t
307atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
308{
0a7de745 309 uint32_t value, prev;
5ba3f43e 310
0a7de745 311 for (;;) {
5ba3f43e
A
312 value = atomic_exchange_begin32(target, &prev, ord);
313 if (value & test_mask) {
0a7de745
A
314 if (wait) {
315 wait_for_event(); // Wait with monitor held
316 } else {
317 atomic_exchange_abort(); // Clear exclusive monitor
318 }
5ba3f43e
A
319 return FALSE;
320 }
321 value |= set_mask;
0a7de745 322 if (atomic_exchange_complete32(target, prev, value, ord)) {
5ba3f43e 323 return TRUE;
0a7de745 324 }
5ba3f43e
A
325 }
326}
327
cb323159
A
328inline boolean_t
329hw_atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
330{
331 return atomic_test_and_set32(target, test_mask, set_mask, ord, wait);
332}
333
0a7de745
A
334void
335_disable_preemption(void)
5ba3f43e 336{
cb323159
A
337 thread_t thread = current_thread();
338 unsigned int count = thread->machine.preemption_count;
5ba3f43e 339
cb323159
A
340 count += 1;
341 if (__improbable(count == 0)) {
342 panic("Preemption count overflow");
343 }
344
345 os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
5ba3f43e
A
346}
347
cb323159
A
348/*
349 * This function checks whether an AST_URGENT has been pended.
350 *
351 * It is called once the preemption has been reenabled, which means the thread
352 * may have been preempted right before this was called, and when this function
353 * actually performs the check, we've changed CPU.
354 *
355 * This race is however benign: the point of AST_URGENT is to trigger a context
356 * switch, so if one happened, there's nothing left to check for, and AST_URGENT
357 * was cleared in the process.
358 *
359 * It follows that this check cannot have false negatives, which allows us
360 * to avoid fiddling with interrupt state for the vast majority of cases
361 * when the check will actually be negative.
362 */
363static NOINLINE void
364kernel_preempt_check(thread_t thread)
5ba3f43e 365{
cb323159
A
366 cpu_data_t *cpu_data_ptr;
367 long state;
368
5ba3f43e
A
369#if __arm__
370#define INTERRUPT_MASK PSR_IRQF
0a7de745 371#else // __arm__
5ba3f43e 372#define INTERRUPT_MASK DAIF_IRQF
0a7de745 373#endif // __arm__
5ba3f43e 374
cb323159
A
375 /*
376 * This check is racy and could load from another CPU's pending_ast mask,
377 * but as described above, this can't have false negatives.
378 */
379 cpu_data_ptr = os_atomic_load(&thread->machine.CpuDatap, compiler_acq_rel);
380 if (__probable((cpu_data_ptr->cpu_pending_ast & AST_URGENT) == 0)) {
381 return;
0a7de745 382 }
cb323159
A
383
384 /* If interrupts are masked, we can't take an AST here */
385 state = get_interrupts();
386 if ((state & INTERRUPT_MASK) == 0) {
387 disable_interrupts_noread(); // Disable interrupts
388
389 /*
390 * Reload cpu_data_ptr: a context switch would cause it to change.
391 * Now that interrupts are disabled, this will debounce false positives.
392 */
393 cpu_data_ptr = os_atomic_load(&thread->machine.CpuDatap, compiler_acq_rel);
394 if (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
5ba3f43e
A
395#if __arm__
396#if __ARM_USER_PROTECT__
cb323159 397 uintptr_t up = arm_user_protect_begin(thread);
0a7de745 398#endif // __ARM_USER_PROTECT__
cb323159 399 enable_fiq();
0a7de745 400#endif // __arm__
cb323159 401 ast_taken_kernel(); // Handle urgent AST
5ba3f43e
A
402#if __arm__
403#if __ARM_USER_PROTECT__
cb323159 404 arm_user_protect_end(thread, up, TRUE);
0a7de745 405#endif // __ARM_USER_PROTECT__
cb323159
A
406 enable_interrupts();
407 return; // Return early on arm only due to FIQ enabling
0a7de745 408#endif // __arm__
0a7de745 409 }
cb323159 410 restore_interrupts(state); // Enable interrupts
5ba3f43e 411 }
5ba3f43e 412}
5ba3f43e
A
413
414void
cb323159 415_enable_preemption(void)
5ba3f43e 416{
cb323159
A
417 thread_t thread = current_thread();
418 unsigned int count = thread->machine.preemption_count;
5ba3f43e 419
cb323159
A
420 if (__improbable(count == 0)) {
421 panic("Preemption count underflow");
0a7de745 422 }
cb323159 423 count -= 1;
5ba3f43e 424
cb323159
A
425 os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
426 if (count == 0) {
427 kernel_preempt_check(thread);
0a7de745 428 }
d9a64523
A
429}
430
cb323159
A
431int
432get_preemption_level(void)
d9a64523 433{
cb323159 434 return current_thread()->machine.preemption_count;
d9a64523 435}
5ba3f43e
A
436
437/*
438 * Routine: lck_spin_alloc_init
439 */
440lck_spin_t *
441lck_spin_alloc_init(
0a7de745
A
442 lck_grp_t * grp,
443 lck_attr_t * attr)
5ba3f43e
A
444{
445 lck_spin_t *lck;
446
0a7de745 447 if ((lck = (lck_spin_t *) kalloc(sizeof(lck_spin_t))) != 0) {
5ba3f43e 448 lck_spin_init(lck, grp, attr);
0a7de745 449 }
5ba3f43e 450
0a7de745 451 return lck;
5ba3f43e
A
452}
453
454/*
455 * Routine: lck_spin_free
456 */
457void
458lck_spin_free(
0a7de745
A
459 lck_spin_t * lck,
460 lck_grp_t * grp)
5ba3f43e
A
461{
462 lck_spin_destroy(lck, grp);
0a7de745 463 kfree(lck, sizeof(lck_spin_t));
5ba3f43e
A
464}
465
466/*
467 * Routine: lck_spin_init
468 */
469void
470lck_spin_init(
0a7de745
A
471 lck_spin_t * lck,
472 lck_grp_t * grp,
473 __unused lck_attr_t * attr)
5ba3f43e 474{
5ba3f43e 475 lck->type = LCK_SPIN_TYPE;
cb323159
A
476 hw_lock_init(&lck->hwlock);
477 if (grp) {
478 lck_grp_reference(grp);
479 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
480 }
5ba3f43e
A
481}
482
483/*
484 * arm_usimple_lock is a lck_spin_t without a group or attributes
485 */
486void inline
487arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
488{
489 lck->type = LCK_SPIN_TYPE;
490 hw_lock_init(&lck->hwlock);
5ba3f43e
A
491}
492
493
494/*
495 * Routine: lck_spin_lock
496 */
497void
498lck_spin_lock(lck_spin_t *lock)
499{
0a7de745
A
500#if DEVELOPMENT || DEBUG
501 if (lock->type != LCK_SPIN_TYPE) {
502 panic("Invalid spinlock %p", lock);
503 }
504#endif // DEVELOPMENT || DEBUG
505 hw_lock_lock(&lock->hwlock, LCK_GRP_NULL);
506}
507
508void
509lck_spin_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
510{
511#pragma unused(grp)
512#if DEVELOPMENT || DEBUG
513 if (lock->type != LCK_SPIN_TYPE) {
5ba3f43e 514 panic("Invalid spinlock %p", lock);
0a7de745
A
515 }
516#endif // DEVELOPMENT || DEBUG
517 hw_lock_lock(&lock->hwlock, grp);
5ba3f43e
A
518}
519
d9a64523
A
520/*
521 * Routine: lck_spin_lock_nopreempt
522 */
523void
524lck_spin_lock_nopreempt(lck_spin_t *lock)
525{
0a7de745
A
526#if DEVELOPMENT || DEBUG
527 if (lock->type != LCK_SPIN_TYPE) {
528 panic("Invalid spinlock %p", lock);
529 }
530#endif // DEVELOPMENT || DEBUG
531 hw_lock_lock_nopreempt(&lock->hwlock, LCK_GRP_NULL);
532}
533
534void
535lck_spin_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
536{
537#pragma unused(grp)
538#if DEVELOPMENT || DEBUG
539 if (lock->type != LCK_SPIN_TYPE) {
d9a64523 540 panic("Invalid spinlock %p", lock);
0a7de745
A
541 }
542#endif // DEVELOPMENT || DEBUG
543 hw_lock_lock_nopreempt(&lock->hwlock, grp);
d9a64523
A
544}
545
5ba3f43e
A
546/*
547 * Routine: lck_spin_try_lock
548 */
549int
550lck_spin_try_lock(lck_spin_t *lock)
551{
0a7de745
A
552 return hw_lock_try(&lock->hwlock, LCK_GRP_NULL);
553}
554
555int
556lck_spin_try_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
557{
558#pragma unused(grp)
559 return hw_lock_try(&lock->hwlock, grp);
5ba3f43e
A
560}
561
d9a64523
A
562/*
563 * Routine: lck_spin_try_lock_nopreempt
564 */
565int
566lck_spin_try_lock_nopreempt(lck_spin_t *lock)
567{
0a7de745
A
568 return hw_lock_try_nopreempt(&lock->hwlock, LCK_GRP_NULL);
569}
570
571int
572lck_spin_try_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
573{
574#pragma unused(grp)
575 return hw_lock_try_nopreempt(&lock->hwlock, grp);
d9a64523
A
576}
577
5ba3f43e
A
578/*
579 * Routine: lck_spin_unlock
580 */
581void
582lck_spin_unlock(lck_spin_t *lock)
583{
0a7de745
A
584#if DEVELOPMENT || DEBUG
585 if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
5ba3f43e 586 panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
0a7de745
A
587 }
588 if (lock->type != LCK_SPIN_TYPE) {
5ba3f43e 589 panic("Invalid spinlock type %p", lock);
0a7de745
A
590 }
591#endif // DEVELOPMENT || DEBUG
5ba3f43e
A
592 hw_lock_unlock(&lock->hwlock);
593}
594
d9a64523
A
595/*
596 * Routine: lck_spin_unlock_nopreempt
597 */
598void
599lck_spin_unlock_nopreempt(lck_spin_t *lock)
600{
0a7de745
A
601#if DEVELOPMENT || DEBUG
602 if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
d9a64523 603 panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
0a7de745
A
604 }
605 if (lock->type != LCK_SPIN_TYPE) {
d9a64523 606 panic("Invalid spinlock type %p", lock);
0a7de745
A
607 }
608#endif // DEVELOPMENT || DEBUG
d9a64523
A
609 hw_lock_unlock_nopreempt(&lock->hwlock);
610}
611
5ba3f43e
A
612/*
613 * Routine: lck_spin_destroy
614 */
615void
616lck_spin_destroy(
0a7de745
A
617 lck_spin_t * lck,
618 lck_grp_t * grp)
5ba3f43e 619{
0a7de745 620 if (lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED) {
5ba3f43e 621 return;
0a7de745 622 }
5ba3f43e 623 lck->lck_spin_data = LCK_SPIN_TAG_DESTROYED;
cb323159
A
624 if (grp) {
625 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
626 lck_grp_deallocate(grp);
627 }
5ba3f43e
A
628}
629
630/*
631 * Routine: kdp_lck_spin_is_acquired
632 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
633 */
634boolean_t
0a7de745
A
635kdp_lck_spin_is_acquired(lck_spin_t *lck)
636{
5ba3f43e
A
637 if (not_in_kdp) {
638 panic("panic: spinlock acquired check done outside of kernel debugger");
639 }
640 return ((lck->lck_spin_data & ~LCK_SPIN_TAG_DESTROYED) != 0) ? TRUE:FALSE;
641}
642
643/*
644 * Initialize a usimple_lock.
645 *
646 * No change in preemption state.
647 */
648void
649usimple_lock_init(
0a7de745
A
650 usimple_lock_t l,
651 unsigned short tag)
5ba3f43e 652{
5ba3f43e 653 simple_lock_init((simple_lock_t) l, tag);
5ba3f43e
A
654}
655
656
657/*
658 * Acquire a usimple_lock.
659 *
660 * Returns with preemption disabled. Note
661 * that the hw_lock routines are responsible for
662 * maintaining preemption state.
663 */
664void
0a7de745
A
665(usimple_lock)(
666 usimple_lock_t l
667 LCK_GRP_ARG(lck_grp_t *grp))
5ba3f43e 668{
0a7de745 669 simple_lock((simple_lock_t) l, LCK_GRP_PROBEARG(grp));
5ba3f43e
A
670}
671
672
673extern void sync(void);
674
675/*
676 * Release a usimple_lock.
677 *
678 * Returns with preemption enabled. Note
679 * that the hw_lock routines are responsible for
680 * maintaining preemption state.
681 */
682void
0a7de745
A
683(usimple_unlock)(
684 usimple_lock_t l)
5ba3f43e 685{
0a7de745 686 simple_unlock((simple_lock_t)l);
5ba3f43e
A
687}
688
689
690/*
691 * Conditionally acquire a usimple_lock.
692 *
693 * On success, returns with preemption disabled.
694 * On failure, returns with preemption in the same state
695 * as when first invoked. Note that the hw_lock routines
696 * are responsible for maintaining preemption state.
697 *
698 * XXX No stats are gathered on a miss; I preserved this
699 * behavior from the original assembly-language code, but
700 * doesn't it make sense to log misses? XXX
701 */
0a7de745
A
702unsigned
703int
704(usimple_lock_try)(
705 usimple_lock_t l
706 LCK_GRP_ARG(lck_grp_t *grp))
5ba3f43e 707{
0a7de745 708 return simple_lock_try((simple_lock_t) l, grp);
5ba3f43e
A
709}
710
5ba3f43e
A
711/*
712 * The C portion of the shared/exclusive locks package.
713 */
714
715/*
716 * compute the deadline to spin against when
717 * waiting for a change of state on a lck_rw_t
718 */
0a7de745 719#if __SMP__
5ba3f43e
A
720static inline uint64_t
721lck_rw_deadline_for_spin(lck_rw_t *lck)
722{
0a7de745 723 lck_rw_word_t word;
5ba3f43e
A
724
725 word.data = ordered_load_rw(lck);
726 if (word.can_sleep) {
727 if (word.r_waiting || word.w_waiting || (word.shared_count > machine_info.max_cpus)) {
728 /*
729 * there are already threads waiting on this lock... this
730 * implies that they have spun beyond their deadlines waiting for
731 * the desired state to show up so we will not bother spinning at this time...
732 * or
733 * the current number of threads sharing this lock exceeds our capacity to run them
734 * concurrently and since all states we're going to spin for require the rw_shared_count
735 * to be at 0, we'll not bother spinning since the latency for this to happen is
736 * unpredictable...
737 */
0a7de745 738 return mach_absolute_time();
5ba3f43e 739 }
0a7de745
A
740 return mach_absolute_time() + MutexSpin;
741 } else {
742 return mach_absolute_time() + (100000LL * 1000000000LL);
743 }
5ba3f43e 744}
0a7de745 745#endif // __SMP__
5ba3f43e
A
746
747static boolean_t
748lck_rw_drain_status(lck_rw_t *lock, uint32_t status_mask, boolean_t wait __unused)
749{
0a7de745
A
750#if __SMP__
751 uint64_t deadline = 0;
752 uint32_t data;
5ba3f43e 753
0a7de745 754 if (wait) {
5ba3f43e 755 deadline = lck_rw_deadline_for_spin(lock);
0a7de745 756 }
5ba3f43e 757
0a7de745 758 for (;;) {
5ba3f43e 759 data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp);
0a7de745 760 if ((data & status_mask) == 0) {
5ba3f43e 761 break;
0a7de745
A
762 }
763 if (wait) {
5ba3f43e 764 wait_for_event();
0a7de745 765 } else {
cb323159 766 os_atomic_clear_exclusive();
0a7de745
A
767 }
768 if (!wait || (mach_absolute_time() >= deadline)) {
5ba3f43e 769 return FALSE;
0a7de745 770 }
5ba3f43e 771 }
cb323159 772 os_atomic_clear_exclusive();
5ba3f43e
A
773 return TRUE;
774#else
0a7de745 775 uint32_t data;
5ba3f43e
A
776
777 data = ordered_load_rw(lock);
0a7de745 778 if ((data & status_mask) == 0) {
5ba3f43e 779 return TRUE;
0a7de745 780 } else {
5ba3f43e 781 return FALSE;
0a7de745
A
782 }
783#endif // __SMP__
5ba3f43e
A
784}
785
786/*
787 * Spin while interlock is held.
788 */
789static inline void
790lck_rw_interlock_spin(lck_rw_t *lock)
791{
792#if __SMP__
0a7de745 793 uint32_t data;
5ba3f43e 794
0a7de745 795 for (;;) {
5ba3f43e 796 data = load_exclusive32(&lock->lck_rw_data, memory_order_relaxed);
0a7de745 797 if (data & LCK_RW_INTERLOCK) {
5ba3f43e 798 wait_for_event();
0a7de745 799 } else {
cb323159 800 os_atomic_clear_exclusive();
5ba3f43e
A
801 return;
802 }
803 }
804#else
805 panic("lck_rw_interlock_spin(): Interlock locked %p %x", lock, lock->lck_rw_data);
806#endif
807}
808
809/*
810 * We disable interrupts while holding the RW interlock to prevent an
811 * interrupt from exacerbating hold time.
812 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
813 */
814static inline boolean_t
815lck_interlock_lock(lck_rw_t *lck)
816{
0a7de745 817 boolean_t istate;
5ba3f43e 818
0a7de745 819 istate = ml_set_interrupts_enabled(FALSE);
5ba3f43e
A
820 lck_rw_ilk_lock(lck);
821 return istate;
822}
823
824static inline void
825lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
826{
827 lck_rw_ilk_unlock(lck);
828 ml_set_interrupts_enabled(istate);
829}
830
831
0a7de745
A
832#define LCK_RW_GRAB_WANT 0
833#define LCK_RW_GRAB_SHARED 1
5ba3f43e
A
834
835static boolean_t
836lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait)
837{
0a7de745
A
838 uint64_t deadline = 0;
839 uint32_t data, prev;
840 boolean_t do_exch;
5ba3f43e
A
841
842#if __SMP__
0a7de745 843 if (wait) {
5ba3f43e 844 deadline = lck_rw_deadline_for_spin(lock);
0a7de745 845 }
5ba3f43e 846#else
0a7de745 847 wait = FALSE; // Don't spin on UP systems
5ba3f43e
A
848#endif
849
0a7de745 850 for (;;) {
5ba3f43e
A
851 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
852 if (data & LCK_RW_INTERLOCK) {
853 atomic_exchange_abort();
854 lck_rw_interlock_spin(lock);
855 continue;
856 }
857 do_exch = FALSE;
858 if (mode == LCK_RW_GRAB_WANT) {
859 if ((data & LCK_RW_WANT_EXCL) == 0) {
860 data |= LCK_RW_WANT_EXCL;
861 do_exch = TRUE;
862 }
0a7de745 863 } else { // LCK_RW_GRAB_SHARED
5ba3f43e 864 if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) ||
0a7de745 865 (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
5ba3f43e
A
866 data += LCK_RW_SHARED_READER;
867 do_exch = TRUE;
868 }
869 }
870 if (do_exch) {
0a7de745 871 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
5ba3f43e 872 return TRUE;
0a7de745 873 }
5ba3f43e 874 } else {
0a7de745 875 if (wait) { // Non-waiting
5ba3f43e 876 wait_for_event();
0a7de745 877 } else {
5ba3f43e 878 atomic_exchange_abort();
0a7de745
A
879 }
880 if (!wait || (mach_absolute_time() >= deadline)) {
5ba3f43e 881 return FALSE;
0a7de745 882 }
5ba3f43e
A
883 }
884 }
885}
886
887
888/*
889 * Routine: lck_rw_alloc_init
890 */
891lck_rw_t *
892lck_rw_alloc_init(
0a7de745
A
893 lck_grp_t *grp,
894 lck_attr_t *attr)
5ba3f43e 895{
0a7de745 896 lck_rw_t *lck;
5ba3f43e 897
0a7de745 898 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) {
5ba3f43e 899 lck_rw_init(lck, grp, attr);
0a7de745 900 }
5ba3f43e
A
901
902 return lck;
903}
904
905/*
906 * Routine: lck_rw_free
907 */
908void
909lck_rw_free(
0a7de745
A
910 lck_rw_t *lck,
911 lck_grp_t *grp)
5ba3f43e
A
912{
913 lck_rw_destroy(lck, grp);
914 kfree(lck, sizeof(lck_rw_t));
915}
916
917/*
918 * Routine: lck_rw_init
919 */
920void
921lck_rw_init(
0a7de745
A
922 lck_rw_t *lck,
923 lck_grp_t *grp,
924 lck_attr_t *attr)
5ba3f43e 925{
0a7de745 926 if (attr == LCK_ATTR_NULL) {
5ba3f43e 927 attr = &LockDefaultLckAttr;
0a7de745 928 }
5ba3f43e
A
929 memset(lck, 0, sizeof(lck_rw_t));
930 lck->lck_rw_can_sleep = TRUE;
0a7de745 931 if ((attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY) == 0) {
5ba3f43e 932 lck->lck_rw_priv_excl = TRUE;
0a7de745 933 }
5ba3f43e
A
934
935 lck_grp_reference(grp);
936 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
937}
938
939
940/*
941 * Routine: lck_rw_destroy
942 */
943void
944lck_rw_destroy(
0a7de745
A
945 lck_rw_t *lck,
946 lck_grp_t *grp)
5ba3f43e 947{
0a7de745 948 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) {
5ba3f43e 949 return;
0a7de745 950 }
5ba3f43e
A
951#if MACH_LDEBUG
952 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
953#endif
954 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
955 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
956 lck_grp_deallocate(grp);
957 return;
958}
959
960/*
961 * Routine: lck_rw_lock
962 */
963void
964lck_rw_lock(
0a7de745
A
965 lck_rw_t *lck,
966 lck_rw_type_t lck_rw_type)
5ba3f43e 967{
0a7de745 968 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
5ba3f43e 969 lck_rw_lock_shared(lck);
0a7de745 970 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
5ba3f43e 971 lck_rw_lock_exclusive(lck);
0a7de745 972 } else {
5ba3f43e 973 panic("lck_rw_lock(): Invalid RW lock type: %x", lck_rw_type);
0a7de745 974 }
5ba3f43e
A
975}
976
977/*
978 * Routine: lck_rw_lock_exclusive
979 */
980void
981lck_rw_lock_exclusive(lck_rw_t *lock)
982{
0a7de745 983 thread_t thread = current_thread();
5ba3f43e
A
984
985 thread->rwlock_count++;
986 if (atomic_test_and_set32(&lock->lck_rw_data,
0a7de745
A
987 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
988 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
989#if CONFIG_DTRACE
5ba3f43e 990 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
0a7de745
A
991#endif /* CONFIG_DTRACE */
992 } else {
5ba3f43e 993 lck_rw_lock_exclusive_gen(lock);
0a7de745 994 }
5ba3f43e
A
995#if MACH_ASSERT
996 thread_t owner = ordered_load_rw_owner(lock);
997 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
998#endif
999 ordered_store_rw_owner(lock, thread);
1000}
1001
1002/*
1003 * Routine: lck_rw_lock_shared
1004 */
1005void
1006lck_rw_lock_shared(lck_rw_t *lock)
1007{
0a7de745 1008 uint32_t data, prev;
5ba3f43e
A
1009
1010 current_thread()->rwlock_count++;
0a7de745 1011 for (;;) {
5ba3f43e
A
1012 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1013 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
1014 atomic_exchange_abort();
1015 lck_rw_lock_shared_gen(lock);
1016 break;
1017 }
1018 data += LCK_RW_SHARED_READER;
0a7de745 1019 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
5ba3f43e 1020 break;
0a7de745 1021 }
5ba3f43e
A
1022 cpu_pause();
1023 }
1024#if MACH_ASSERT
1025 thread_t owner = ordered_load_rw_owner(lock);
1026 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1027#endif
0a7de745 1028#if CONFIG_DTRACE
5ba3f43e 1029 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
0a7de745 1030#endif /* CONFIG_DTRACE */
5ba3f43e
A
1031 return;
1032}
1033
1034/*
1035 * Routine: lck_rw_lock_shared_to_exclusive
cb323159
A
1036 *
1037 * False returned upon failure, in this case the shared lock is dropped.
5ba3f43e
A
1038 */
1039boolean_t
1040lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
1041{
0a7de745 1042 uint32_t data, prev;
5ba3f43e 1043
0a7de745 1044 for (;;) {
5ba3f43e
A
1045 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1046 if (data & LCK_RW_INTERLOCK) {
1047 atomic_exchange_abort();
1048 lck_rw_interlock_spin(lock);
1049 continue;
1050 }
1051 if (data & LCK_RW_WANT_UPGRADE) {
1052 data -= LCK_RW_SHARED_READER;
0a7de745
A
1053 if ((data & LCK_RW_SHARED_MASK) == 0) { /* we were the last reader */
1054 data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
1055 }
1056 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
5ba3f43e 1057 return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
0a7de745 1058 }
5ba3f43e 1059 } else {
0a7de745
A
1060 data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
1061 data -= LCK_RW_SHARED_READER; /* and shed our read count */
1062 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
5ba3f43e 1063 break;
0a7de745 1064 }
5ba3f43e
A
1065 }
1066 cpu_pause();
1067 }
0a7de745
A
1068 /* we now own the WANT_UPGRADE */
1069 if (data & LCK_RW_SHARED_MASK) { /* check to see if all of the readers are drained */
1070 lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
1071 }
5ba3f43e
A
1072#if MACH_ASSERT
1073 thread_t owner = ordered_load_rw_owner(lock);
1074 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1075#endif
1076 ordered_store_rw_owner(lock, current_thread());
0a7de745 1077#if CONFIG_DTRACE
5ba3f43e 1078 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
0a7de745 1079#endif /* CONFIG_DTRACE */
5ba3f43e
A
1080 return TRUE;
1081}
1082
1083
1084/*
1085 * Routine: lck_rw_lock_shared_to_exclusive_failure
1086 * Function:
1087 * Fast path code has already dropped our read
1088 * count and determined that someone else owns 'lck_rw_want_upgrade'
1089 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1090 * all we need to do here is determine if a wakeup is needed
1091 */
1092static boolean_t
1093lck_rw_lock_shared_to_exclusive_failure(
0a7de745
A
1094 lck_rw_t *lck,
1095 uint32_t prior_lock_state)
5ba3f43e 1096{
0a7de745
A
1097 thread_t thread = current_thread();
1098 uint32_t rwlock_count;
5ba3f43e
A
1099
1100 /* Check if dropping the lock means that we need to unpromote */
1101 rwlock_count = thread->rwlock_count--;
1102#if MACH_LDEBUG
1103 if (rwlock_count == 0) {
1104 panic("rw lock count underflow for thread %p", thread);
1105 }
1106#endif
1107 if ((prior_lock_state & LCK_RW_W_WAITING) &&
0a7de745 1108 ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
5ba3f43e
A
1109 /*
1110 * Someone else has requested upgrade.
1111 * Since we've released the read lock, wake
1112 * him up if he's blocked waiting
1113 */
1114 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1115 }
1116
1117 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1118 /* sched_flags checked without lock, but will be rechecked while clearing */
d9a64523 1119 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
5ba3f43e
A
1120 }
1121
1122 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
0a7de745 1123 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
5ba3f43e 1124
0a7de745 1125 return FALSE;
5ba3f43e
A
1126}
1127
1128/*
1129 * Routine: lck_rw_lock_shared_to_exclusive_success
1130 * Function:
1131 * assembly fast path code has already dropped our read
1132 * count and successfully acquired 'lck_rw_want_upgrade'
1133 * we just need to wait for the rest of the readers to drain
1134 * and then we can return as the exclusive holder of this lock
1135 */
1136static boolean_t
1137lck_rw_lock_shared_to_exclusive_success(
0a7de745
A
1138 lck_rw_t *lock)
1139{
1140 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1141 int slept = 0;
1142 lck_rw_word_t word;
1143 wait_result_t res;
1144 boolean_t istate;
1145 boolean_t not_shared;
1146
1147#if CONFIG_DTRACE
1148 uint64_t wait_interval = 0;
1149 int readers_at_sleep = 0;
1150 boolean_t dtrace_ls_initialized = FALSE;
1151 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
5ba3f43e
A
1152#endif
1153
1154 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE)) {
5ba3f43e 1155 word.data = ordered_load_rw(lock);
0a7de745 1156#if CONFIG_DTRACE
5ba3f43e
A
1157 if (dtrace_ls_initialized == FALSE) {
1158 dtrace_ls_initialized = TRUE;
1159 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1160 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1161 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1162 if (dtrace_ls_enabled) {
1163 /*
1164 * Either sleeping or spinning is happening,
1165 * start a timing of our delay interval now.
1166 */
1167 readers_at_sleep = word.shared_count;
1168 wait_interval = mach_absolute_time();
1169 }
1170 }
1171#endif
1172
1173 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
0a7de745 1174 trace_lck, word.shared_count, 0, 0, 0);
5ba3f43e
A
1175
1176 not_shared = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE);
1177
1178 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
0a7de745 1179 trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
5ba3f43e 1180
0a7de745 1181 if (not_shared) {
5ba3f43e 1182 break;
0a7de745 1183 }
5ba3f43e
A
1184
1185 /*
1186 * if we get here, the spin deadline in lck_rw_wait_on_status()
1187 * has expired w/o the rw_shared_count having drained to 0
1188 * check to see if we're allowed to do a thread_block
1189 */
1190 if (word.can_sleep) {
5ba3f43e 1191 istate = lck_interlock_lock(lock);
0a7de745 1192
5ba3f43e
A
1193 word.data = ordered_load_rw(lock);
1194 if (word.shared_count != 0) {
1195 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
0a7de745 1196 trace_lck, word.shared_count, 0, 0, 0);
5ba3f43e
A
1197
1198 word.w_waiting = 1;
1199 ordered_store_rw(lock, word.data);
1200
1201 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
d9a64523 1202 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
0a7de745 1203 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
5ba3f43e
A
1204 lck_interlock_unlock(lock, istate);
1205
1206 if (res == THREAD_WAITING) {
1207 res = thread_block(THREAD_CONTINUE_NULL);
1208 slept++;
1209 }
1210 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
0a7de745 1211 trace_lck, res, slept, 0, 0);
5ba3f43e
A
1212 } else {
1213 lck_interlock_unlock(lock, istate);
1214 break;
1215 }
1216 }
1217 }
0a7de745 1218#if CONFIG_DTRACE
5ba3f43e
A
1219 /*
1220 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1221 */
1222 if (dtrace_ls_enabled == TRUE) {
1223 if (slept == 0) {
0a7de745 1224 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
5ba3f43e 1225 } else {
0a7de745 1226 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
5ba3f43e
A
1227 mach_absolute_time() - wait_interval, 1,
1228 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1229 }
1230 }
1231 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
1232#endif
0a7de745 1233 return TRUE;
5ba3f43e
A
1234}
1235
1236
1237/*
1238 * Routine: lck_rw_lock_exclusive_to_shared
1239 */
1240
0a7de745
A
1241void
1242lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
5ba3f43e 1243{
0a7de745 1244 uint32_t data, prev;
5ba3f43e
A
1245
1246 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
1247 ordered_store_rw_owner(lock, THREAD_NULL);
0a7de745 1248 for (;;) {
5ba3f43e
A
1249 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
1250 if (data & LCK_RW_INTERLOCK) {
1251#if __SMP__
1252 atomic_exchange_abort();
0a7de745 1253 lck_rw_interlock_spin(lock); /* wait for interlock to clear */
5ba3f43e
A
1254 continue;
1255#else
1256 panic("lck_rw_lock_exclusive_to_shared(): Interlock locked (%p): %x", lock, data);
1257#endif // __SMP__
1258 }
1259 data += LCK_RW_SHARED_READER;
0a7de745 1260 if (data & LCK_RW_WANT_UPGRADE) {
5ba3f43e 1261 data &= ~(LCK_RW_WANT_UPGRADE);
0a7de745 1262 } else {
5ba3f43e 1263 data &= ~(LCK_RW_WANT_EXCL);
0a7de745
A
1264 }
1265 if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL))) {
5ba3f43e 1266 data &= ~(LCK_RW_W_WAITING);
0a7de745
A
1267 }
1268 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
5ba3f43e 1269 break;
0a7de745 1270 }
5ba3f43e
A
1271 cpu_pause();
1272 }
1273 return lck_rw_lock_exclusive_to_shared_gen(lock, prev);
1274}
1275
1276/*
1277 * Routine: lck_rw_lock_exclusive_to_shared_gen
0a7de745 1278 * Function:
5ba3f43e
A
1279 * Fast path has already dropped
1280 * our exclusive state and bumped lck_rw_shared_count
1281 * all we need to do here is determine if anyone
1282 * needs to be awakened.
1283 */
1284static void
1285lck_rw_lock_exclusive_to_shared_gen(
0a7de745
A
1286 lck_rw_t *lck,
1287 uint32_t prior_lock_state)
5ba3f43e 1288{
0a7de745
A
1289 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1290 lck_rw_word_t fake_lck;
5ba3f43e
A
1291
1292 /*
1293 * prior_lock state is a snapshot of the 1st word of the
1294 * lock in question... we'll fake up a pointer to it
1295 * and carefully not access anything beyond whats defined
1296 * in the first word of a lck_rw_t
1297 */
1298 fake_lck.data = prior_lock_state;
1299
1300 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
0a7de745 1301 trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
5ba3f43e
A
1302
1303 /*
1304 * don't wake up anyone waiting to take the lock exclusively
1305 * since we hold a read count... when the read count drops to 0,
1306 * the writers will be woken.
1307 *
1308 * wake up any waiting readers if we don't have any writers waiting,
1309 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1310 */
0a7de745 1311 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
5ba3f43e 1312 thread_wakeup(LCK_RW_READER_EVENT(lck));
0a7de745 1313 }
5ba3f43e
A
1314
1315 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
0a7de745 1316 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
5ba3f43e
A
1317
1318#if CONFIG_DTRACE
1319 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1320#endif
1321}
1322
1323
1324/*
1325 * Routine: lck_rw_try_lock
1326 */
1327boolean_t
1328lck_rw_try_lock(
0a7de745
A
1329 lck_rw_t *lck,
1330 lck_rw_type_t lck_rw_type)
5ba3f43e 1331{
0a7de745 1332 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
5ba3f43e 1333 return lck_rw_try_lock_shared(lck);
0a7de745 1334 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
5ba3f43e 1335 return lck_rw_try_lock_exclusive(lck);
0a7de745 1336 } else {
5ba3f43e 1337 panic("lck_rw_try_lock(): Invalid rw lock type: %x", lck_rw_type);
0a7de745 1338 }
5ba3f43e
A
1339 return FALSE;
1340}
1341
1342/*
1343 * Routine: lck_rw_try_lock_shared
1344 */
1345
0a7de745
A
1346boolean_t
1347lck_rw_try_lock_shared(lck_rw_t *lock)
5ba3f43e 1348{
0a7de745 1349 uint32_t data, prev;
5ba3f43e 1350
0a7de745 1351 for (;;) {
5ba3f43e
A
1352 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1353 if (data & LCK_RW_INTERLOCK) {
1354#if __SMP__
1355 atomic_exchange_abort();
1356 lck_rw_interlock_spin(lock);
1357 continue;
1358#else
1359 panic("lck_rw_try_lock_shared(): Interlock locked (%p): %x", lock, data);
1360#endif
1361 }
1362 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1363 atomic_exchange_abort();
0a7de745 1364 return FALSE; /* lock is busy */
5ba3f43e 1365 }
0a7de745
A
1366 data += LCK_RW_SHARED_READER; /* Increment reader refcount */
1367 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
5ba3f43e 1368 break;
0a7de745 1369 }
5ba3f43e
A
1370 cpu_pause();
1371 }
1372#if MACH_ASSERT
1373 thread_t owner = ordered_load_rw_owner(lock);
1374 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1375#endif
1376 current_thread()->rwlock_count++;
0a7de745 1377#if CONFIG_DTRACE
5ba3f43e 1378 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
0a7de745 1379#endif /* CONFIG_DTRACE */
5ba3f43e
A
1380 return TRUE;
1381}
1382
1383
1384/*
1385 * Routine: lck_rw_try_lock_exclusive
1386 */
1387
0a7de745
A
1388boolean_t
1389lck_rw_try_lock_exclusive(lck_rw_t *lock)
5ba3f43e 1390{
0a7de745
A
1391 uint32_t data, prev;
1392 thread_t thread;
5ba3f43e 1393
0a7de745 1394 for (;;) {
5ba3f43e
A
1395 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1396 if (data & LCK_RW_INTERLOCK) {
1397#if __SMP__
1398 atomic_exchange_abort();
1399 lck_rw_interlock_spin(lock);
1400 continue;
1401#else
1402 panic("lck_rw_try_lock_exclusive(): Interlock locked (%p): %x", lock, data);
1403#endif
1404 }
1405 if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1406 atomic_exchange_abort();
1407 return FALSE;
1408 }
1409 data |= LCK_RW_WANT_EXCL;
0a7de745 1410 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
5ba3f43e 1411 break;
0a7de745 1412 }
5ba3f43e
A
1413 cpu_pause();
1414 }
1415 thread = current_thread();
1416 thread->rwlock_count++;
1417#if MACH_ASSERT
1418 thread_t owner = ordered_load_rw_owner(lock);
1419 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1420#endif
1421 ordered_store_rw_owner(lock, thread);
0a7de745 1422#if CONFIG_DTRACE
5ba3f43e 1423 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
0a7de745 1424#endif /* CONFIG_DTRACE */
5ba3f43e
A
1425 return TRUE;
1426}
1427
1428
1429/*
1430 * Routine: lck_rw_unlock
1431 */
1432void
1433lck_rw_unlock(
0a7de745
A
1434 lck_rw_t *lck,
1435 lck_rw_type_t lck_rw_type)
5ba3f43e 1436{
0a7de745 1437 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
5ba3f43e 1438 lck_rw_unlock_shared(lck);
0a7de745 1439 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
5ba3f43e 1440 lck_rw_unlock_exclusive(lck);
0a7de745 1441 } else {
5ba3f43e 1442 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
0a7de745 1443 }
5ba3f43e
A
1444}
1445
1446
1447/*
1448 * Routine: lck_rw_unlock_shared
1449 */
1450void
1451lck_rw_unlock_shared(
0a7de745 1452 lck_rw_t *lck)
5ba3f43e 1453{
0a7de745 1454 lck_rw_type_t ret;
5ba3f43e
A
1455
1456 assertf(lck->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
1457 assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
1458 ret = lck_rw_done(lck);
1459
0a7de745 1460 if (ret != LCK_RW_TYPE_SHARED) {
5ba3f43e 1461 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
0a7de745 1462 }
5ba3f43e
A
1463}
1464
1465
1466/*
1467 * Routine: lck_rw_unlock_exclusive
1468 */
1469void
1470lck_rw_unlock_exclusive(
0a7de745 1471 lck_rw_t *lck)
5ba3f43e 1472{
0a7de745 1473 lck_rw_type_t ret;
5ba3f43e
A
1474
1475 assertf(lck->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
1476 ret = lck_rw_done(lck);
1477
0a7de745 1478 if (ret != LCK_RW_TYPE_EXCLUSIVE) {
5ba3f43e 1479 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
0a7de745 1480 }
5ba3f43e
A
1481}
1482
1483
1484/*
1485 * Routine: lck_rw_lock_exclusive_gen
1486 */
1487static void
1488lck_rw_lock_exclusive_gen(
0a7de745 1489 lck_rw_t *lock)
5ba3f43e 1490{
0a7de745
A
1491 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1492 lck_rw_word_t word;
1493 int slept = 0;
1494 boolean_t gotlock = 0;
1495 boolean_t not_shared_or_upgrade = 0;
1496 wait_result_t res = 0;
1497 boolean_t istate;
5ba3f43e 1498
0a7de745 1499#if CONFIG_DTRACE
5ba3f43e 1500 boolean_t dtrace_ls_initialized = FALSE;
0a7de745 1501 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled = FALSE;
5ba3f43e
A
1502 uint64_t wait_interval = 0;
1503 int readers_at_sleep = 0;
1504#endif
1505
1506 /*
1507 * Try to acquire the lck_rw_want_excl bit.
1508 */
1509 while (!lck_rw_grab(lock, LCK_RW_GRAB_WANT, FALSE)) {
0a7de745 1510#if CONFIG_DTRACE
5ba3f43e
A
1511 if (dtrace_ls_initialized == FALSE) {
1512 dtrace_ls_initialized = TRUE;
1513 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1514 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1515 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1516 if (dtrace_ls_enabled) {
1517 /*
1518 * Either sleeping or spinning is happening,
1519 * start a timing of our delay interval now.
1520 */
1521 readers_at_sleep = lock->lck_rw_shared_count;
1522 wait_interval = mach_absolute_time();
1523 }
1524 }
1525#endif
1526
1527 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1528
1529 gotlock = lck_rw_grab(lock, LCK_RW_GRAB_WANT, TRUE);
1530
1531 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
1532
0a7de745 1533 if (gotlock) {
5ba3f43e 1534 break;
0a7de745 1535 }
5ba3f43e
A
1536 /*
1537 * if we get here, the deadline has expired w/o us
1538 * being able to grab the lock exclusively
1539 * check to see if we're allowed to do a thread_block
1540 */
1541 word.data = ordered_load_rw(lock);
1542 if (word.can_sleep) {
5ba3f43e
A
1543 istate = lck_interlock_lock(lock);
1544 word.data = ordered_load_rw(lock);
1545
1546 if (word.want_excl) {
5ba3f43e
A
1547 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1548
1549 word.w_waiting = 1;
1550 ordered_store_rw(lock, word.data);
1551
1552 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
d9a64523 1553 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
0a7de745 1554 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
5ba3f43e
A
1555 lck_interlock_unlock(lock, istate);
1556
1557 if (res == THREAD_WAITING) {
1558 res = thread_block(THREAD_CONTINUE_NULL);
1559 slept++;
1560 }
1561 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1562 } else {
1563 word.want_excl = 1;
1564 ordered_store_rw(lock, word.data);
1565 lck_interlock_unlock(lock, istate);
1566 break;
1567 }
1568 }
1569 }
1570 /*
1571 * Wait for readers (and upgrades) to finish...
1572 */
1573 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE)) {
0a7de745 1574#if CONFIG_DTRACE
5ba3f43e
A
1575 /*
1576 * Either sleeping or spinning is happening, start
1577 * a timing of our delay interval now. If we set it
1578 * to -1 we don't have accurate data so we cannot later
1579 * decide to record a dtrace spin or sleep event.
1580 */
1581 if (dtrace_ls_initialized == FALSE) {
1582 dtrace_ls_initialized = TRUE;
1583 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1584 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1585 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1586 if (dtrace_ls_enabled) {
1587 /*
1588 * Either sleeping or spinning is happening,
1589 * start a timing of our delay interval now.
1590 */
1591 readers_at_sleep = lock->lck_rw_shared_count;
1592 wait_interval = mach_absolute_time();
1593 }
1594 }
1595#endif
1596
1597 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1598
1599 not_shared_or_upgrade = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, TRUE);
1600
1601 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, not_shared_or_upgrade, 0);
1602
0a7de745 1603 if (not_shared_or_upgrade) {
5ba3f43e 1604 break;
0a7de745 1605 }
5ba3f43e
A
1606 /*
1607 * if we get here, the deadline has expired w/o us
1608 * being able to grab the lock exclusively
1609 * check to see if we're allowed to do a thread_block
1610 */
1611 word.data = ordered_load_rw(lock);
1612 if (word.can_sleep) {
5ba3f43e
A
1613 istate = lck_interlock_lock(lock);
1614 word.data = ordered_load_rw(lock);
1615
1616 if (word.shared_count != 0 || word.want_upgrade) {
1617 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1618
1619 word.w_waiting = 1;
1620 ordered_store_rw(lock, word.data);
1621
1622 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
d9a64523 1623 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
0a7de745 1624 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
5ba3f43e
A
1625 lck_interlock_unlock(lock, istate);
1626
1627 if (res == THREAD_WAITING) {
1628 res = thread_block(THREAD_CONTINUE_NULL);
1629 slept++;
1630 }
1631 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1632 } else {
1633 lck_interlock_unlock(lock, istate);
1634 /*
1635 * must own the lock now, since we checked for
1636 * readers or upgrade owner behind the interlock
1637 * no need for a call to 'lck_rw_drain_status'
1638 */
1639 break;
1640 }
1641 }
1642 }
1643
0a7de745 1644#if CONFIG_DTRACE
5ba3f43e
A
1645 /*
1646 * Decide what latencies we suffered that are Dtrace events.
1647 * If we have set wait_interval, then we either spun or slept.
1648 * At least we get out from under the interlock before we record
1649 * which is the best we can do here to minimize the impact
1650 * of the tracing.
1651 * If we have set wait_interval to -1, then dtrace was not enabled when we
1652 * started sleeping/spinning so we don't record this event.
1653 */
1654 if (dtrace_ls_enabled == TRUE) {
1655 if (slept == 0) {
0a7de745 1656 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
5ba3f43e
A
1657 mach_absolute_time() - wait_interval, 1);
1658 } else {
1659 /*
1660 * For the blocking case, we also record if when we blocked
1661 * it was held for read or write, and how many readers.
1662 * Notice that above we recorded this before we dropped
1663 * the interlock so the count is accurate.
1664 */
0a7de745 1665 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
5ba3f43e
A
1666 mach_absolute_time() - wait_interval, 1,
1667 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1668 }
1669 }
1670 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
0a7de745 1671#endif /* CONFIG_DTRACE */
5ba3f43e
A
1672}
1673
1674/*
1675 * Routine: lck_rw_done
1676 */
1677
0a7de745
A
1678lck_rw_type_t
1679lck_rw_done(lck_rw_t *lock)
5ba3f43e 1680{
0a7de745
A
1681 uint32_t data, prev;
1682 boolean_t once = FALSE;
5ba3f43e 1683
0a7de745 1684 for (;;) {
5ba3f43e 1685 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
0a7de745 1686 if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
5ba3f43e
A
1687#if __SMP__
1688 atomic_exchange_abort();
1689 lck_rw_interlock_spin(lock);
1690 continue;
1691#else
1692 panic("lck_rw_done(): Interlock locked (%p): %x", lock, data);
1693#endif // __SMP__
1694 }
0a7de745 1695 if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
5ba3f43e
A
1696 assertf(lock->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
1697 data -= LCK_RW_SHARED_READER;
0a7de745 1698 if ((data & LCK_RW_SHARED_MASK) == 0) { /* if reader count has now gone to 0, check for waiters */
5ba3f43e 1699 goto check_waiters;
0a7de745
A
1700 }
1701 } else { /* if reader count == 0, must be exclusive lock */
5ba3f43e
A
1702 if (data & LCK_RW_WANT_UPGRADE) {
1703 data &= ~(LCK_RW_WANT_UPGRADE);
1704 } else {
0a7de745 1705 if (data & LCK_RW_WANT_EXCL) {
5ba3f43e 1706 data &= ~(LCK_RW_WANT_EXCL);
0a7de745 1707 } else { /* lock is not 'owned', panic */
5ba3f43e 1708 panic("Releasing non-exclusive RW lock without a reader refcount!");
0a7de745 1709 }
5ba3f43e
A
1710 }
1711 if (!once) {
1712 // Only check for holder and clear it once
1713 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
1714 ordered_store_rw_owner(lock, THREAD_NULL);
1715 once = TRUE;
1716 }
1717check_waiters:
1718 /*
1719 * test the original values to match what
1720 * lck_rw_done_gen is going to do to determine
1721 * which wakeups need to happen...
1722 *
1723 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
1724 */
1725 if (prev & LCK_RW_W_WAITING) {
1726 data &= ~(LCK_RW_W_WAITING);
0a7de745 1727 if ((prev & LCK_RW_PRIV_EXCL) == 0) {
5ba3f43e 1728 data &= ~(LCK_RW_R_WAITING);
0a7de745
A
1729 }
1730 } else {
5ba3f43e 1731 data &= ~(LCK_RW_R_WAITING);
0a7de745 1732 }
5ba3f43e 1733 }
0a7de745 1734 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
5ba3f43e 1735 break;
0a7de745 1736 }
5ba3f43e
A
1737 cpu_pause();
1738 }
1739 return lck_rw_done_gen(lock, prev);
1740}
1741
1742/*
1743 * Routine: lck_rw_done_gen
1744 *
1745 * called from the assembly language wrapper...
1746 * prior_lock_state is the value in the 1st
0a7de745 1747 * word of the lock at the time of a successful
5ba3f43e 1748 * atomic compare and exchange with the new value...
0a7de745 1749 * it represents the state of the lock before we
5ba3f43e 1750 * decremented the rw_shared_count or cleared either
0a7de745 1751 * rw_want_upgrade or rw_want_write and
5ba3f43e 1752 * the lck_x_waiting bits... since the wrapper
0a7de745 1753 * routine has already changed the state atomically,
5ba3f43e
A
1754 * we just need to decide if we should
1755 * wake up anyone and what value to return... we do
1756 * this by examining the state of the lock before
1757 * we changed it
1758 */
1759static lck_rw_type_t
1760lck_rw_done_gen(
0a7de745
A
1761 lck_rw_t *lck,
1762 uint32_t prior_lock_state)
5ba3f43e 1763{
0a7de745
A
1764 lck_rw_word_t fake_lck;
1765 lck_rw_type_t lock_type;
1766 thread_t thread;
1767 uint32_t rwlock_count;
5ba3f43e
A
1768
1769 /*
1770 * prior_lock state is a snapshot of the 1st word of the
1771 * lock in question... we'll fake up a pointer to it
1772 * and carefully not access anything beyond whats defined
1773 * in the first word of a lck_rw_t
1774 */
1775 fake_lck.data = prior_lock_state;
1776
1777 if (fake_lck.shared_count <= 1) {
0a7de745 1778 if (fake_lck.w_waiting) {
5ba3f43e 1779 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
0a7de745 1780 }
5ba3f43e 1781
0a7de745 1782 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
5ba3f43e 1783 thread_wakeup(LCK_RW_READER_EVENT(lck));
0a7de745 1784 }
5ba3f43e 1785 }
0a7de745 1786 if (fake_lck.shared_count) {
5ba3f43e 1787 lock_type = LCK_RW_TYPE_SHARED;
0a7de745 1788 } else {
5ba3f43e 1789 lock_type = LCK_RW_TYPE_EXCLUSIVE;
0a7de745 1790 }
5ba3f43e
A
1791
1792 /* Check if dropping the lock means that we need to unpromote */
1793 thread = current_thread();
1794 rwlock_count = thread->rwlock_count--;
1795#if MACH_LDEBUG
0a7de745 1796 if (rwlock_count == 0) {
5ba3f43e 1797 panic("rw lock count underflow for thread %p", thread);
0a7de745 1798 }
5ba3f43e
A
1799#endif
1800 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1801 /* sched_flags checked without lock, but will be rechecked while clearing */
d9a64523 1802 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
5ba3f43e
A
1803 }
1804#if CONFIG_DTRACE
1805 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
1806#endif
1807 return lock_type;
1808}
1809
1810/*
1811 * Routine: lck_rw_lock_shared_gen
1812 * Function:
1813 * Fast path code has determined that this lock
1814 * is held exclusively... this is where we spin/block
1815 * until we can acquire the lock in the shared mode
1816 */
1817static void
1818lck_rw_lock_shared_gen(
0a7de745 1819 lck_rw_t *lck)
5ba3f43e 1820{
0a7de745
A
1821 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1822 lck_rw_word_t word;
1823 boolean_t gotlock = 0;
1824 int slept = 0;
1825 wait_result_t res = 0;
1826 boolean_t istate;
5ba3f43e 1827
0a7de745 1828#if CONFIG_DTRACE
5ba3f43e
A
1829 uint64_t wait_interval = 0;
1830 int readers_at_sleep = 0;
1831 boolean_t dtrace_ls_initialized = FALSE;
1832 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
1833#endif /* CONFIG_DTRACE */
1834
0a7de745
A
1835 while (!lck_rw_grab(lck, LCK_RW_GRAB_SHARED, FALSE)) {
1836#if CONFIG_DTRACE
5ba3f43e
A
1837 if (dtrace_ls_initialized == FALSE) {
1838 dtrace_ls_initialized = TRUE;
1839 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1840 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1841 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1842 if (dtrace_ls_enabled) {
1843 /*
1844 * Either sleeping or spinning is happening,
1845 * start a timing of our delay interval now.
1846 */
1847 readers_at_sleep = lck->lck_rw_shared_count;
1848 wait_interval = mach_absolute_time();
1849 }
1850 }
1851#endif
1852
1853 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
0a7de745 1854 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
5ba3f43e
A
1855
1856 gotlock = lck_rw_grab(lck, LCK_RW_GRAB_SHARED, TRUE);
1857
1858 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
0a7de745 1859 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, gotlock, 0);
5ba3f43e 1860
0a7de745 1861 if (gotlock) {
5ba3f43e 1862 break;
0a7de745 1863 }
5ba3f43e
A
1864 /*
1865 * if we get here, the deadline has expired w/o us
1866 * being able to grab the lock for read
1867 * check to see if we're allowed to do a thread_block
1868 */
1869 if (lck->lck_rw_can_sleep) {
5ba3f43e
A
1870 istate = lck_interlock_lock(lck);
1871
1872 word.data = ordered_load_rw(lck);
1873 if ((word.want_excl || word.want_upgrade) &&
1874 ((word.shared_count == 0) || word.priv_excl)) {
5ba3f43e 1875 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
0a7de745 1876 trace_lck, word.want_excl, word.want_upgrade, 0, 0);
5ba3f43e
A
1877
1878 word.r_waiting = 1;
1879 ordered_store_rw(lck, word.data);
1880
1881 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
d9a64523 1882 res = assert_wait(LCK_RW_READER_EVENT(lck),
0a7de745 1883 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
5ba3f43e
A
1884 lck_interlock_unlock(lck, istate);
1885
1886 if (res == THREAD_WAITING) {
1887 res = thread_block(THREAD_CONTINUE_NULL);
1888 slept++;
1889 }
1890 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
0a7de745 1891 trace_lck, res, slept, 0, 0);
5ba3f43e
A
1892 } else {
1893 word.shared_count++;
1894 ordered_store_rw(lck, word.data);
1895 lck_interlock_unlock(lck, istate);
1896 break;
1897 }
1898 }
1899 }
1900
0a7de745 1901#if CONFIG_DTRACE
5ba3f43e
A
1902 if (dtrace_ls_enabled == TRUE) {
1903 if (slept == 0) {
0a7de745 1904 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
5ba3f43e 1905 } else {
0a7de745 1906 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
5ba3f43e
A
1907 mach_absolute_time() - wait_interval, 0,
1908 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1909 }
1910 }
1911 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
0a7de745 1912#endif /* CONFIG_DTRACE */
5ba3f43e
A
1913}
1914
1915
1916void
1917lck_rw_assert(
0a7de745
A
1918 lck_rw_t *lck,
1919 unsigned int type)
5ba3f43e
A
1920{
1921 switch (type) {
1922 case LCK_RW_ASSERT_SHARED:
1923 if ((lck->lck_rw_shared_count != 0) &&
1924 (lck->lck_rw_owner == THREAD_NULL)) {
1925 return;
1926 }
1927 break;
1928 case LCK_RW_ASSERT_EXCLUSIVE:
1929 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
0a7de745 1930 (lck->lck_rw_shared_count == 0) &&
5ba3f43e
A
1931 (lck->lck_rw_owner == current_thread())) {
1932 return;
1933 }
1934 break;
1935 case LCK_RW_ASSERT_HELD:
0a7de745
A
1936 if (lck->lck_rw_shared_count != 0) {
1937 return; // Held shared
1938 }
5ba3f43e
A
1939 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
1940 (lck->lck_rw_owner == current_thread())) {
0a7de745 1941 return; // Held exclusive
5ba3f43e
A
1942 }
1943 break;
1944 case LCK_RW_ASSERT_NOTHELD:
1945 if ((lck->lck_rw_shared_count == 0) &&
0a7de745 1946 !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
5ba3f43e
A
1947 (lck->lck_rw_owner == THREAD_NULL)) {
1948 return;
1949 }
1950 break;
1951 default:
1952 break;
1953 }
1954 panic("rw lock (%p)%s held (mode=%u)", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type);
1955}
1956
1957
1958/*
1959 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
1960 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
1961 */
1962boolean_t
0a7de745
A
1963kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck)
1964{
5ba3f43e
A
1965 if (not_in_kdp) {
1966 panic("panic: rw lock exclusive check done outside of kernel debugger");
1967 }
1968 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
1969}
1970
1971/*
1972 * The C portion of the mutex package. These routines are only invoked
1973 * if the optimized assembler routines can't do the work.
1974 */
1975
1976/*
1977 * Forward declaration
1978 */
1979
0a7de745 1980void
5ba3f43e 1981lck_mtx_ext_init(
0a7de745
A
1982 lck_mtx_ext_t * lck,
1983 lck_grp_t * grp,
1984 lck_attr_t * attr);
5ba3f43e
A
1985
1986/*
1987 * Routine: lck_mtx_alloc_init
1988 */
1989lck_mtx_t *
1990lck_mtx_alloc_init(
0a7de745
A
1991 lck_grp_t * grp,
1992 lck_attr_t * attr)
5ba3f43e
A
1993{
1994 lck_mtx_t *lck;
1995
0a7de745 1996 if ((lck = (lck_mtx_t *) kalloc(sizeof(lck_mtx_t))) != 0) {
5ba3f43e 1997 lck_mtx_init(lck, grp, attr);
0a7de745 1998 }
5ba3f43e 1999
0a7de745 2000 return lck;
5ba3f43e
A
2001}
2002
2003/*
2004 * Routine: lck_mtx_free
2005 */
2006void
2007lck_mtx_free(
0a7de745
A
2008 lck_mtx_t * lck,
2009 lck_grp_t * grp)
5ba3f43e
A
2010{
2011 lck_mtx_destroy(lck, grp);
0a7de745 2012 kfree(lck, sizeof(lck_mtx_t));
5ba3f43e
A
2013}
2014
2015/*
2016 * Routine: lck_mtx_init
2017 */
2018void
2019lck_mtx_init(
0a7de745
A
2020 lck_mtx_t * lck,
2021 lck_grp_t * grp,
2022 lck_attr_t * attr)
5ba3f43e 2023{
0a7de745 2024#ifdef BER_XXX
5ba3f43e
A
2025 lck_mtx_ext_t *lck_ext;
2026#endif
2027 lck_attr_t *lck_attr;
2028
0a7de745 2029 if (attr != LCK_ATTR_NULL) {
5ba3f43e 2030 lck_attr = attr;
0a7de745 2031 } else {
5ba3f43e 2032 lck_attr = &LockDefaultLckAttr;
0a7de745 2033 }
5ba3f43e 2034
0a7de745 2035#ifdef BER_XXX
5ba3f43e
A
2036 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2037 if ((lck_ext = (lck_mtx_ext_t *) kalloc(sizeof(lck_mtx_ext_t))) != 0) {
2038 lck_mtx_ext_init(lck_ext, grp, lck_attr);
2039 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2040 lck->lck_mtx_ptr = lck_ext;
2041 lck->lck_mtx_type = LCK_MTX_TYPE;
2042 }
2043 } else
2044#endif
2045 {
0a7de745 2046 lck->lck_mtx_ptr = NULL; // Clear any padding in the union fields below
5ba3f43e 2047 lck->lck_mtx_waiters = 0;
5ba3f43e
A
2048 lck->lck_mtx_type = LCK_MTX_TYPE;
2049 ordered_store_mtx(lck, 0);
2050 }
2051 lck_grp_reference(grp);
2052 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2053}
2054
2055/*
2056 * Routine: lck_mtx_init_ext
2057 */
2058void
2059lck_mtx_init_ext(
0a7de745
A
2060 lck_mtx_t * lck,
2061 lck_mtx_ext_t * lck_ext,
2062 lck_grp_t * grp,
2063 lck_attr_t * attr)
5ba3f43e
A
2064{
2065 lck_attr_t *lck_attr;
2066
0a7de745 2067 if (attr != LCK_ATTR_NULL) {
5ba3f43e 2068 lck_attr = attr;
0a7de745 2069 } else {
5ba3f43e 2070 lck_attr = &LockDefaultLckAttr;
0a7de745 2071 }
5ba3f43e
A
2072
2073 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2074 lck_mtx_ext_init(lck_ext, grp, lck_attr);
2075 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2076 lck->lck_mtx_ptr = lck_ext;
2077 lck->lck_mtx_type = LCK_MTX_TYPE;
2078 } else {
2079 lck->lck_mtx_waiters = 0;
5ba3f43e
A
2080 lck->lck_mtx_type = LCK_MTX_TYPE;
2081 ordered_store_mtx(lck, 0);
2082 }
2083 lck_grp_reference(grp);
2084 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2085}
2086
2087/*
2088 * Routine: lck_mtx_ext_init
2089 */
2090void
2091lck_mtx_ext_init(
0a7de745
A
2092 lck_mtx_ext_t * lck,
2093 lck_grp_t * grp,
2094 lck_attr_t * attr)
5ba3f43e
A
2095{
2096 bzero((void *) lck, sizeof(lck_mtx_ext_t));
2097
2098 lck->lck_mtx.lck_mtx_type = LCK_MTX_TYPE;
2099
2100 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2101 lck->lck_mtx_deb.type = MUTEX_TAG;
2102 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
2103 }
2104 lck->lck_mtx_grp = grp;
2105
0a7de745 2106 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) {
5ba3f43e 2107 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
0a7de745 2108 }
5ba3f43e
A
2109}
2110
2111/* The slow versions */
2112static void lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2113static boolean_t lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread);
2114static void lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2115
0a7de745
A
2116/* The adaptive spin function */
2117static spinwait_result_t lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2118
5ba3f43e
A
2119/*
2120 * Routine: lck_mtx_verify
2121 *
2122 * Verify if a mutex is valid
2123 */
2124static inline void
2125lck_mtx_verify(lck_mtx_t *lock)
2126{
0a7de745 2127 if (lock->lck_mtx_type != LCK_MTX_TYPE) {
5ba3f43e 2128 panic("Invalid mutex %p", lock);
0a7de745
A
2129 }
2130#if DEVELOPMENT || DEBUG
2131 if (lock->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
5ba3f43e 2132 panic("Mutex destroyed %p", lock);
0a7de745
A
2133 }
2134#endif /* DEVELOPMENT || DEBUG */
5ba3f43e
A
2135}
2136
2137/*
2138 * Routine: lck_mtx_check_preemption
2139 *
2140 * Verify preemption is enabled when attempting to acquire a mutex.
2141 */
2142
2143static inline void
2144lck_mtx_check_preemption(lck_mtx_t *lock)
2145{
0a7de745 2146#if DEVELOPMENT || DEBUG
5ba3f43e
A
2147 int pl = get_preemption_level();
2148
0a7de745 2149 if (pl != 0) {
5ba3f43e 2150 panic("Attempt to take mutex with preemption disabled. Lock=%p, level=%d", lock, pl);
0a7de745 2151 }
5ba3f43e
A
2152#else
2153 (void)lock;
2154#endif
2155}
2156
2157/*
2158 * Routine: lck_mtx_lock
2159 */
2160void
2161lck_mtx_lock(lck_mtx_t *lock)
2162{
0a7de745 2163 thread_t thread;
5ba3f43e
A
2164
2165 lck_mtx_verify(lock);
2166 lck_mtx_check_preemption(lock);
2167 thread = current_thread();
cb323159
A
2168 if (os_atomic_cmpxchg(&lock->lck_mtx_data,
2169 0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
0a7de745 2170#if CONFIG_DTRACE
5ba3f43e
A
2171 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
2172#endif /* CONFIG_DTRACE */
2173 return;
2174 }
2175 lck_mtx_lock_contended(lock, thread, FALSE);
2176}
2177
2178/*
0a7de745 2179 * This is the slow version of mutex locking.
5ba3f43e
A
2180 */
2181static void NOINLINE
2182lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
2183{
0a7de745
A
2184 thread_t holding_thread;
2185 uintptr_t state;
2186 int waiters = 0;
2187 spinwait_result_t sw_res;
cb323159 2188 struct turnstile *ts = NULL;
d9a64523
A
2189
2190 /* Loop waiting until I see that the mutex is unowned */
0a7de745
A
2191 for (;;) {
2192 sw_res = lck_mtx_lock_contended_spinwait_arm(lock, thread, interlocked);
2193 interlocked = FALSE;
2194
2195 switch (sw_res) {
2196 case SPINWAIT_ACQUIRED:
cb323159
A
2197 if (ts != NULL) {
2198 interlock_lock(lock);
2199 turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
2200 interlock_unlock(lock);
2201 }
0a7de745
A
2202 goto done;
2203 case SPINWAIT_INTERLOCK:
2204 goto set_owner;
2205 default:
2206 break;
2207 }
2208
5ba3f43e
A
2209 state = ordered_load_mtx(lock);
2210 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
0a7de745 2211 if (holding_thread == NULL) {
5ba3f43e 2212 break;
0a7de745 2213 }
5ba3f43e 2214 ordered_store_mtx(lock, (state | LCK_ILOCK | ARM_LCK_WAITERS)); // Set waiters bit and wait
cb323159 2215 lck_mtx_lock_wait(lock, holding_thread, &ts);
d9a64523 2216 /* returns interlock unlocked */
5ba3f43e 2217 }
d9a64523 2218
0a7de745 2219set_owner:
d9a64523 2220 /* Hooray, I'm the new owner! */
0a7de745
A
2221 state = ordered_load_mtx(lock);
2222
2223 if (state & ARM_LCK_WAITERS) {
2224 /* Skip lck_mtx_lock_acquire if there are no waiters. */
cb323159
A
2225 waiters = lck_mtx_lock_acquire(lock, ts);
2226 /*
2227 * lck_mtx_lock_acquire will call
2228 * turnstile_complete
2229 */
2230 } else {
2231 if (ts != NULL) {
2232 turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
2233 }
0a7de745
A
2234 }
2235
5ba3f43e 2236 state = LCK_MTX_THREAD_TO_STATE(thread);
0a7de745 2237 if (waiters != 0) {
5ba3f43e 2238 state |= ARM_LCK_WAITERS;
0a7de745 2239 }
5ba3f43e 2240#if __SMP__
0a7de745
A
2241 state |= LCK_ILOCK; // Preserve interlock
2242 ordered_store_mtx(lock, state); // Set ownership
2243 interlock_unlock(lock); // Release interlock, enable preemption
5ba3f43e 2244#else
0a7de745 2245 ordered_store_mtx(lock, state); // Set ownership
5ba3f43e
A
2246 enable_preemption();
2247#endif
0a7de745
A
2248
2249done:
5ba3f43e
A
2250 load_memory_barrier();
2251
cb323159
A
2252 assert(thread->turnstile != NULL);
2253
2254 if (ts != NULL) {
2255 turnstile_cleanup();
2256 }
2257
0a7de745 2258#if CONFIG_DTRACE
5ba3f43e
A
2259 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
2260#endif /* CONFIG_DTRACE */
2261}
2262
0a7de745
A
2263/*
2264 * Routine: lck_mtx_lock_spinwait_arm
2265 *
2266 * Invoked trying to acquire a mutex when there is contention but
2267 * the holder is running on another processor. We spin for up to a maximum
2268 * time waiting for the lock to be released.
2269 */
2270static spinwait_result_t
2271lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
2272{
2273 int has_interlock = (int)interlocked;
2274#if __SMP__
2275 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
ea3f0419
A
2276 thread_t owner, prev_owner;
2277 uint64_t window_deadline, sliding_deadline, high_deadline;
2278 uint64_t start_time, cur_time, avg_hold_time, bias, delta;
2279 int loopcount = 0;
2280 uint i, prev_owner_cpu;
2281 int total_hold_time_samples, window_hold_time_samples, unfairness;
2282 bool owner_on_core, adjust;
2283 uintptr_t state, new_state, waiters;
2284 spinwait_result_t retval = SPINWAIT_DID_SPIN_HIGH_THR;
0a7de745
A
2285
2286 if (__improbable(!(lck_mtx_adaptive_spin_mode & ADAPTIVE_SPIN_ENABLE))) {
2287 if (!has_interlock) {
2288 interlock_lock(lock);
2289 }
2290
2291 return SPINWAIT_DID_NOT_SPIN;
2292 }
2293
0a7de745
A
2294 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
2295 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, 0, 0);
2296
ea3f0419
A
2297 start_time = mach_absolute_time();
2298 /*
2299 * window_deadline represents the "learning" phase.
2300 * The thread collects statistics about the lock during
2301 * window_deadline and then it makes a decision on whether to spin more
2302 * or block according to the concurrency behavior
2303 * observed.
2304 *
2305 * Every thread can spin at least low_MutexSpin.
2306 */
2307 window_deadline = start_time + low_MutexSpin;
2308 /*
2309 * Sliding_deadline is the adjusted spin deadline
2310 * computed after the "learning" phase.
2311 */
2312 sliding_deadline = window_deadline;
2313 /*
2314 * High_deadline is a hard deadline. No thread
2315 * can spin more than this deadline.
2316 */
2317 if (high_MutexSpin >= 0) {
2318 high_deadline = start_time + high_MutexSpin;
2319 } else {
2320 high_deadline = start_time + low_MutexSpin * real_ncpus;
0a7de745
A
2321 }
2322
ea3f0419
A
2323 /*
2324 * Do not know yet which is the owner cpu.
2325 * Initialize prev_owner_cpu with next cpu.
2326 */
2327 prev_owner_cpu = (cpu_number() + 1) % real_ncpus;
2328 total_hold_time_samples = 0;
2329 window_hold_time_samples = 0;
2330 avg_hold_time = 0;
2331 adjust = TRUE;
2332 bias = (os_hash_kernel_pointer(lock) + cpu_number()) % real_ncpus;
2333
0a7de745
A
2334 /* Snoop the lock state */
2335 state = ordered_load_mtx(lock);
ea3f0419
A
2336 owner = LCK_MTX_STATE_TO_THREAD(state);
2337 prev_owner = owner;
2338
2339 if (has_interlock) {
2340 if (owner == NULL) {
2341 retval = SPINWAIT_INTERLOCK;
2342 goto done_spinning;
2343 } else {
2344 /*
2345 * We are holding the interlock, so
2346 * we can safely dereference owner.
2347 */
2348 if (!(owner->machine.machine_thread_flags & MACHINE_THREAD_FLAGS_ON_CPU) ||
2349 (owner->state & TH_IDLE)) {
2350 retval = SPINWAIT_DID_NOT_SPIN;
2351 goto done_spinning;
2352 }
2353 }
2354 interlock_unlock(lock);
2355 has_interlock = 0;
2356 }
0a7de745
A
2357
2358 /*
2359 * Spin while:
2360 * - mutex is locked, and
2361 * - it's locked as a spin lock, and
2362 * - owner is running on another processor, and
0a7de745
A
2363 * - we haven't spun for long enough.
2364 */
2365 do {
ea3f0419
A
2366 /*
2367 * Try to acquire the lock.
2368 */
2369 owner = LCK_MTX_STATE_TO_THREAD(state);
2370 if (owner == NULL) {
2371 waiters = state & ARM_LCK_WAITERS;
2372 if (waiters) {
2373 /*
2374 * preserve the waiter bit
2375 * and try acquire the interlock.
2376 * Note: we will successfully acquire
2377 * the interlock only if we can also
2378 * acquire the lock.
2379 */
2380 new_state = ARM_LCK_WAITERS | LCK_ILOCK;
2381 has_interlock = 1;
2382 retval = SPINWAIT_INTERLOCK;
2383 disable_preemption();
2384 } else {
2385 new_state = LCK_MTX_THREAD_TO_STATE(thread);
2386 retval = SPINWAIT_ACQUIRED;
0a7de745
A
2387 }
2388
ea3f0419
A
2389 /*
2390 * The cmpxchg will succed only if the lock
2391 * is not owned (doesn't have an owner set)
2392 * and it is not interlocked.
2393 * It will not fail if there are waiters.
2394 */
2395 if (os_atomic_cmpxchgv(&lock->lck_mtx_data,
2396 waiters, new_state, &state, acquire)) {
2397 goto done_spinning;
2398 } else {
2399 if (waiters) {
2400 has_interlock = 0;
2401 enable_preemption();
2402 }
2403 }
2404 }
0a7de745 2405
ea3f0419 2406 cur_time = mach_absolute_time();
0a7de745 2407
ea3f0419
A
2408 /*
2409 * Never spin past high_deadline.
2410 */
2411 if (cur_time >= high_deadline) {
2412 retval = SPINWAIT_DID_SPIN_HIGH_THR;
2413 break;
2414 }
0a7de745 2415
ea3f0419
A
2416 /*
2417 * Check if owner is on core. If not block.
2418 */
2419 owner = LCK_MTX_STATE_TO_THREAD(state);
2420 if (owner) {
2421 i = prev_owner_cpu;
2422 owner_on_core = FALSE;
0a7de745 2423
ea3f0419
A
2424 disable_preemption();
2425 state = ordered_load_mtx(lock);
2426 owner = LCK_MTX_STATE_TO_THREAD(state);
0a7de745 2427
ea3f0419
A
2428 /*
2429 * For scalability we want to check if the owner is on core
2430 * without locking the mutex interlock.
2431 * If we do not lock the mutex interlock, the owner that we see might be
2432 * invalid, so we cannot dereference it. Therefore we cannot check
2433 * any field of the thread to tell us if it is on core.
2434 * Check if the thread that is running on the other cpus matches the owner.
2435 */
2436 if (owner) {
2437 do {
2438 cpu_data_t *cpu_data_ptr = CpuDataEntries[i].cpu_data_vaddr;
2439 if ((cpu_data_ptr != NULL) && (cpu_data_ptr->cpu_active_thread == owner)) {
2440 owner_on_core = TRUE;
2441 break;
0a7de745 2442 }
ea3f0419
A
2443 if (++i >= real_ncpus) {
2444 i = 0;
2445 }
2446 } while (i != prev_owner_cpu);
2447 enable_preemption();
2448
2449 if (owner_on_core) {
2450 prev_owner_cpu = i;
2451 } else {
2452 prev_owner = owner;
2453 state = ordered_load_mtx(lock);
2454 owner = LCK_MTX_STATE_TO_THREAD(state);
2455 if (owner == prev_owner) {
2456 /*
2457 * Owner is not on core.
2458 * Stop spinning.
2459 */
2460 if (loopcount == 0) {
2461 retval = SPINWAIT_DID_NOT_SPIN;
2462 } else {
2463 retval = SPINWAIT_DID_SPIN_OWNER_NOT_CORE;
2464 }
2465 break;
2466 }
2467 /*
2468 * Fall through if the owner changed while we were scanning.
2469 * The new owner could potentially be on core, so loop
2470 * again.
2471 */
0a7de745 2472 }
ea3f0419
A
2473 } else {
2474 enable_preemption();
0a7de745
A
2475 }
2476 }
2477
ea3f0419
A
2478 /*
2479 * Save how many times we see the owner changing.
2480 * We can roughly estimate the the mutex hold
2481 * time and the fairness with that.
2482 */
2483 if (owner != prev_owner) {
2484 prev_owner = owner;
2485 total_hold_time_samples++;
2486 window_hold_time_samples++;
0a7de745
A
2487 }
2488
ea3f0419
A
2489 /*
2490 * Learning window expired.
2491 * Try to adjust the sliding_deadline.
2492 */
2493 if (cur_time >= window_deadline) {
2494 /*
2495 * If there was not contention during the window
2496 * stop spinning.
2497 */
2498 if (window_hold_time_samples < 1) {
2499 retval = SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION;
2500 break;
2501 }
2502
2503 if (adjust) {
2504 /*
2505 * For a fair lock, we'd wait for at most (NCPU-1) periods,
2506 * but the lock is unfair, so let's try to estimate by how much.
2507 */
2508 unfairness = total_hold_time_samples / real_ncpus;
2509
2510 if (unfairness == 0) {
2511 /*
2512 * We observed the owner changing `total_hold_time_samples` times which
2513 * let us estimate the average hold time of this mutex for the duration
2514 * of the spin time.
2515 * avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
2516 *
2517 * In this case spin at max avg_hold_time * (real_ncpus - 1)
2518 */
2519 delta = cur_time - start_time;
2520 sliding_deadline = start_time + (delta * (real_ncpus - 1)) / total_hold_time_samples;
2521 } else {
2522 /*
2523 * In this case at least one of the other cpus was able to get the lock twice
2524 * while I was spinning.
2525 * We could spin longer but it won't necessarily help if the system is unfair.
2526 * Try to randomize the wait to reduce contention.
2527 *
2528 * We compute how much time we could potentially spin
2529 * and distribute it over the cpus.
2530 *
2531 * bias is an integer between 0 and real_ncpus.
2532 * distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
2533 */
2534 delta = high_deadline - cur_time;
2535 sliding_deadline = cur_time + ((delta * bias) / real_ncpus);
2536 adjust = FALSE;
2537 }
2538 }
0a7de745 2539
ea3f0419
A
2540 window_deadline += low_MutexSpin;
2541 window_hold_time_samples = 0;
0a7de745
A
2542 }
2543
ea3f0419
A
2544 /*
2545 * Stop spinning if we past
2546 * the adjusted deadline.
2547 */
2548 if (cur_time >= sliding_deadline) {
2549 retval = SPINWAIT_DID_SPIN_SLIDING_THR;
2550 break;
2551 }
0a7de745 2552
ea3f0419
A
2553 /*
2554 * We want to arm the monitor for wfe,
2555 * so load exclusively the lock.
2556 *
2557 * NOTE:
2558 * we rely on the fact that wfe will
2559 * eventually return even if the cache line
2560 * is not modified. This way we will keep
2561 * looping and checking if the deadlines expired.
2562 */
2563 state = os_atomic_load_exclusive(&lock->lck_mtx_data, relaxed);
2564 owner = LCK_MTX_STATE_TO_THREAD(state);
2565 if (owner != NULL) {
2566 wait_for_event();
2567 state = ordered_load_mtx(lock);
2568 } else {
2569 atomic_exchange_abort();
0a7de745
A
2570 }
2571
2572 loopcount++;
2573 } while (TRUE);
2574
ea3f0419 2575done_spinning:
0a7de745
A
2576#if CONFIG_DTRACE
2577 /*
0a7de745
A
2578 * Note that we record a different probe id depending on whether
2579 * this is a direct or indirect mutex. This allows us to
2580 * penalize only lock groups that have debug/stats enabled
2581 * with dtrace processing if desired.
2582 */
2583 if (__probable(lock->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)) {
2584 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, lock,
ea3f0419 2585 mach_absolute_time() - start_time);
0a7de745
A
2586 } else {
2587 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, lock,
ea3f0419 2588 mach_absolute_time() - start_time);
0a7de745
A
2589 }
2590 /* The lockstat acquire event is recorded by the caller. */
2591#endif
2592
2593 state = ordered_load_mtx(lock);
2594
2595 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
2596 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, retval, 0);
2597#else /* __SMP__ */
2598 /* Spinwaiting is not useful on UP systems. */
2599#pragma unused(lock, thread)
2600 int retval = SPINWAIT_DID_NOT_SPIN;
2601#endif /* __SMP__ */
2602 if ((!has_interlock) && (retval != SPINWAIT_ACQUIRED)) {
2603 /* We must own either the lock or the interlock on return. */
2604 interlock_lock(lock);
2605 }
2606
2607 return retval;
2608}
2609
ea3f0419 2610
5ba3f43e
A
2611/*
2612 * Common code for mutex locking as spinlock
2613 */
2614static inline void
2615lck_mtx_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
2616{
0a7de745 2617 uintptr_t state;
5ba3f43e
A
2618
2619 interlock_lock(lock);
2620 state = ordered_load_mtx(lock);
2621 if (LCK_MTX_STATE_TO_THREAD(state)) {
0a7de745 2622 if (allow_held_as_mutex) {
5ba3f43e 2623 lck_mtx_lock_contended(lock, current_thread(), TRUE);
0a7de745 2624 } else {
5ba3f43e
A
2625 // "Always" variants can never block. If the lock is held and blocking is not allowed
2626 // then someone is mixing always and non-always calls on the same lock, which is
2627 // forbidden.
2628 panic("Attempting to block on a lock taken as spin-always %p", lock);
0a7de745 2629 }
5ba3f43e
A
2630 return;
2631 }
0a7de745
A
2632 state &= ARM_LCK_WAITERS; // Preserve waiters bit
2633 state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK); // Add spin tag and maintain interlock
5ba3f43e
A
2634 ordered_store_mtx(lock, state);
2635 load_memory_barrier();
2636
0a7de745 2637#if CONFIG_DTRACE
5ba3f43e
A
2638 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
2639#endif /* CONFIG_DTRACE */
2640}
2641
2642/*
2643 * Routine: lck_mtx_lock_spin
2644 */
2645void
2646lck_mtx_lock_spin(lck_mtx_t *lock)
2647{
2648 lck_mtx_check_preemption(lock);
2649 lck_mtx_lock_spin_internal(lock, TRUE);
2650}
2651
2652/*
2653 * Routine: lck_mtx_lock_spin_always
2654 */
2655void
2656lck_mtx_lock_spin_always(lck_mtx_t *lock)
2657{
2658 lck_mtx_lock_spin_internal(lock, FALSE);
2659}
2660
2661/*
2662 * Routine: lck_mtx_try_lock
2663 */
2664boolean_t
2665lck_mtx_try_lock(lck_mtx_t *lock)
2666{
0a7de745 2667 thread_t thread = current_thread();
5ba3f43e
A
2668
2669 lck_mtx_verify(lock);
cb323159
A
2670 if (os_atomic_cmpxchg(&lock->lck_mtx_data,
2671 0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
0a7de745 2672#if CONFIG_DTRACE
5ba3f43e
A
2673 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, lock, 0);
2674#endif /* CONFIG_DTRACE */
2675 return TRUE;
2676 }
2677 return lck_mtx_try_lock_contended(lock, thread);
2678}
2679
2680static boolean_t NOINLINE
2681lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread)
2682{
0a7de745
A
2683 thread_t holding_thread;
2684 uintptr_t state;
2685 int waiters;
5ba3f43e 2686
0a7de745 2687#if __SMP__
5ba3f43e
A
2688 interlock_lock(lock);
2689 state = ordered_load_mtx(lock);
2690 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2691 if (holding_thread) {
2692 interlock_unlock(lock);
2693 return FALSE;
2694 }
2695#else
2696 disable_preemption_for_thread(thread);
2697 state = ordered_load_mtx(lock);
0a7de745 2698 if (state & LCK_ILOCK) {
5ba3f43e 2699 panic("Unexpected interlock set (%p)", lock);
0a7de745 2700 }
5ba3f43e
A
2701 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2702 if (holding_thread) {
2703 enable_preemption();
2704 return FALSE;
2705 }
2706 state |= LCK_ILOCK;
2707 ordered_store_mtx(lock, state);
0a7de745 2708#endif // __SMP__
cb323159 2709 waiters = lck_mtx_lock_acquire(lock, NULL);
5ba3f43e 2710 state = LCK_MTX_THREAD_TO_STATE(thread);
0a7de745 2711 if (waiters != 0) {
5ba3f43e 2712 state |= ARM_LCK_WAITERS;
0a7de745 2713 }
5ba3f43e 2714#if __SMP__
0a7de745
A
2715 state |= LCK_ILOCK; // Preserve interlock
2716 ordered_store_mtx(lock, state); // Set ownership
2717 interlock_unlock(lock); // Release interlock, enable preemption
5ba3f43e 2718#else
0a7de745 2719 ordered_store_mtx(lock, state); // Set ownership
5ba3f43e
A
2720 enable_preemption();
2721#endif
2722 load_memory_barrier();
cb323159
A
2723
2724 turnstile_cleanup();
2725
5ba3f43e
A
2726 return TRUE;
2727}
2728
2729static inline boolean_t
2730lck_mtx_try_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
2731{
0a7de745 2732 uintptr_t state;
5ba3f43e 2733
0a7de745 2734 if (!interlock_try(lock)) {
5ba3f43e 2735 return FALSE;
0a7de745 2736 }
5ba3f43e 2737 state = ordered_load_mtx(lock);
0a7de745 2738 if (LCK_MTX_STATE_TO_THREAD(state)) {
5ba3f43e 2739 // Lock is held as mutex
0a7de745 2740 if (allow_held_as_mutex) {
5ba3f43e 2741 interlock_unlock(lock);
0a7de745 2742 } else {
5ba3f43e
A
2743 // "Always" variants can never block. If the lock is held as a normal mutex
2744 // then someone is mixing always and non-always calls on the same lock, which is
2745 // forbidden.
2746 panic("Spin-mutex held as full mutex %p", lock);
0a7de745 2747 }
5ba3f43e
A
2748 return FALSE;
2749 }
0a7de745
A
2750 state &= ARM_LCK_WAITERS; // Preserve waiters bit
2751 state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK); // Add spin tag and maintain interlock
5ba3f43e
A
2752 ordered_store_mtx(lock, state);
2753 load_memory_barrier();
2754
0a7de745 2755#if CONFIG_DTRACE
5ba3f43e
A
2756 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, lock, 0);
2757#endif /* CONFIG_DTRACE */
2758 return TRUE;
2759}
2760
2761/*
2762 * Routine: lck_mtx_try_lock_spin
2763 */
2764boolean_t
2765lck_mtx_try_lock_spin(lck_mtx_t *lock)
2766{
2767 return lck_mtx_try_lock_spin_internal(lock, TRUE);
2768}
2769
2770/*
2771 * Routine: lck_mtx_try_lock_spin_always
2772 */
2773boolean_t
2774lck_mtx_try_lock_spin_always(lck_mtx_t *lock)
2775{
2776 return lck_mtx_try_lock_spin_internal(lock, FALSE);
2777}
2778
2779
2780
2781/*
2782 * Routine: lck_mtx_unlock
2783 */
2784void
2785lck_mtx_unlock(lck_mtx_t *lock)
2786{
0a7de745
A
2787 thread_t thread = current_thread();
2788 uintptr_t state;
2789 boolean_t ilk_held = FALSE;
5ba3f43e
A
2790
2791 lck_mtx_verify(lock);
2792
2793 state = ordered_load_mtx(lock);
2794 if (state & LCK_ILOCK) {
0a7de745
A
2795 if (LCK_MTX_STATE_TO_THREAD(state) == (thread_t)LCK_MTX_SPIN_TAG) {
2796 ilk_held = TRUE; // Interlock is held by (presumably) this thread
2797 }
5ba3f43e
A
2798 goto slow_case;
2799 }
2800 // Locked as a mutex
cb323159
A
2801 if (os_atomic_cmpxchg(&lock->lck_mtx_data,
2802 LCK_MTX_THREAD_TO_STATE(thread), 0, release)) {
0a7de745 2803#if CONFIG_DTRACE
5ba3f43e
A
2804 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
2805#endif /* CONFIG_DTRACE */
2806 return;
2807 }
2808slow_case:
2809 lck_mtx_unlock_contended(lock, thread, ilk_held);
2810}
2811
2812static void NOINLINE
2813lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t ilk_held)
2814{
0a7de745 2815 uintptr_t state;
cb323159 2816 boolean_t cleanup = FALSE;
5ba3f43e
A
2817
2818 if (ilk_held) {
2819 state = ordered_load_mtx(lock);
2820 } else {
0a7de745 2821#if __SMP__
5ba3f43e
A
2822 interlock_lock(lock);
2823 state = ordered_load_mtx(lock);
0a7de745 2824 if (thread != LCK_MTX_STATE_TO_THREAD(state)) {
5ba3f43e 2825 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
0a7de745 2826 }
5ba3f43e
A
2827#else
2828 disable_preemption_for_thread(thread);
2829 state = ordered_load_mtx(lock);
0a7de745 2830 if (state & LCK_ILOCK) {
5ba3f43e 2831 panic("lck_mtx_unlock(): Unexpected interlock set (%p)", lock);
0a7de745
A
2832 }
2833 if (thread != LCK_MTX_STATE_TO_THREAD(state)) {
5ba3f43e 2834 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
0a7de745 2835 }
5ba3f43e
A
2836 state |= LCK_ILOCK;
2837 ordered_store_mtx(lock, state);
2838#endif
d9a64523 2839 if (state & ARM_LCK_WAITERS) {
cb323159
A
2840 if (lck_mtx_unlock_wakeup(lock, thread)) {
2841 state = ARM_LCK_WAITERS;
2842 } else {
2843 state = 0;
2844 }
2845 cleanup = TRUE;
2846 goto unlock;
d9a64523 2847 }
5ba3f43e 2848 }
d9a64523 2849 state &= ARM_LCK_WAITERS; /* Clear state, retain waiters bit */
cb323159 2850unlock:
5ba3f43e
A
2851#if __SMP__
2852 state |= LCK_ILOCK;
2853 ordered_store_mtx(lock, state);
2854 interlock_unlock(lock);
2855#else
2856 ordered_store_mtx(lock, state);
2857 enable_preemption();
2858#endif
cb323159
A
2859 if (cleanup) {
2860 /*
2861 * Do not do any turnstile operations outside of this block.
2862 * lock/unlock is called at early stage of boot with single thread,
2863 * when turnstile is not yet initialized.
2864 * Even without contention we can come throught the slow path
2865 * if the mutex is acquired as a spin lock.
2866 */
2867 turnstile_cleanup();
2868 }
5ba3f43e 2869
0a7de745 2870#if CONFIG_DTRACE
5ba3f43e
A
2871 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
2872#endif /* CONFIG_DTRACE */
2873}
2874
2875/*
2876 * Routine: lck_mtx_assert
2877 */
2878void
2879lck_mtx_assert(lck_mtx_t *lock, unsigned int type)
2880{
0a7de745
A
2881 thread_t thread, holder;
2882 uintptr_t state;
5ba3f43e
A
2883
2884 state = ordered_load_mtx(lock);
2885 holder = LCK_MTX_STATE_TO_THREAD(state);
2886 if (holder == (thread_t)LCK_MTX_SPIN_TAG) {
0a7de745
A
2887 // Lock is held in spin mode, owner is unknown.
2888 return; // Punt
5ba3f43e
A
2889 }
2890 thread = current_thread();
2891 if (type == LCK_MTX_ASSERT_OWNED) {
0a7de745 2892 if (thread != holder) {
5ba3f43e 2893 panic("lck_mtx_assert(): mutex (%p) owned", lock);
0a7de745 2894 }
5ba3f43e 2895 } else if (type == LCK_MTX_ASSERT_NOTOWNED) {
0a7de745 2896 if (thread == holder) {
5ba3f43e 2897 panic("lck_mtx_assert(): mutex (%p) not owned", lock);
0a7de745
A
2898 }
2899 } else {
5ba3f43e 2900 panic("lck_mtx_assert(): invalid arg (%u)", type);
0a7de745 2901 }
5ba3f43e
A
2902}
2903
2904/*
2905 * Routine: lck_mtx_ilk_unlock
2906 */
2907boolean_t
2908lck_mtx_ilk_unlock(lck_mtx_t *lock)
2909{
2910 interlock_unlock(lock);
2911 return TRUE;
2912}
2913
2914/*
2915 * Routine: lck_mtx_convert_spin
2916 *
2917 * Convert a mutex held for spin into a held full mutex
2918 */
2919void
2920lck_mtx_convert_spin(lck_mtx_t *lock)
2921{
0a7de745
A
2922 thread_t thread = current_thread();
2923 uintptr_t state;
2924 int waiters;
5ba3f43e
A
2925
2926 state = ordered_load_mtx(lock);
0a7de745
A
2927 if (LCK_MTX_STATE_TO_THREAD(state) == thread) {
2928 return; // Already owned as mutex, return
2929 }
2930 if ((state & LCK_ILOCK) == 0 || (LCK_MTX_STATE_TO_THREAD(state) != (thread_t)LCK_MTX_SPIN_TAG)) {
5ba3f43e 2931 panic("lck_mtx_convert_spin: Not held as spinlock (%p)", lock);
0a7de745
A
2932 }
2933 state &= ~(LCK_MTX_THREAD_MASK); // Clear the spin tag
5ba3f43e 2934 ordered_store_mtx(lock, state);
cb323159 2935 waiters = lck_mtx_lock_acquire(lock, NULL); // Acquire to manage priority boosts
5ba3f43e 2936 state = LCK_MTX_THREAD_TO_STATE(thread);
0a7de745 2937 if (waiters != 0) {
5ba3f43e 2938 state |= ARM_LCK_WAITERS;
0a7de745 2939 }
5ba3f43e
A
2940#if __SMP__
2941 state |= LCK_ILOCK;
0a7de745
A
2942 ordered_store_mtx(lock, state); // Set ownership
2943 interlock_unlock(lock); // Release interlock, enable preemption
5ba3f43e 2944#else
0a7de745 2945 ordered_store_mtx(lock, state); // Set ownership
5ba3f43e
A
2946 enable_preemption();
2947#endif
cb323159 2948 turnstile_cleanup();
5ba3f43e
A
2949}
2950
2951
2952/*
2953 * Routine: lck_mtx_destroy
2954 */
2955void
2956lck_mtx_destroy(
0a7de745
A
2957 lck_mtx_t * lck,
2958 lck_grp_t * grp)
5ba3f43e 2959{
0a7de745 2960 if (lck->lck_mtx_type != LCK_MTX_TYPE) {
5ba3f43e 2961 panic("Destroying invalid mutex %p", lck);
0a7de745
A
2962 }
2963 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
5ba3f43e 2964 panic("Destroying previously destroyed lock %p", lck);
0a7de745 2965 }
5ba3f43e
A
2966 lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
2967 lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED;
2968 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
2969 lck_grp_deallocate(grp);
2970 return;
2971}
2972
2973/*
2974 * Routine: lck_spin_assert
2975 */
2976void
2977lck_spin_assert(lck_spin_t *lock, unsigned int type)
2978{
0a7de745
A
2979 thread_t thread, holder;
2980 uintptr_t state;
5ba3f43e 2981
0a7de745 2982 if (lock->type != LCK_SPIN_TYPE) {
5ba3f43e 2983 panic("Invalid spinlock %p", lock);
0a7de745 2984 }
5ba3f43e
A
2985
2986 state = lock->lck_spin_data;
2987 holder = (thread_t)(state & ~LCK_ILOCK);
2988 thread = current_thread();
2989 if (type == LCK_ASSERT_OWNED) {
0a7de745 2990 if (holder == 0) {
5ba3f43e 2991 panic("Lock not owned %p = %lx", lock, state);
0a7de745
A
2992 }
2993 if (holder != thread) {
5ba3f43e 2994 panic("Lock not owned by current thread %p = %lx", lock, state);
0a7de745
A
2995 }
2996 if ((state & LCK_ILOCK) == 0) {
5ba3f43e 2997 panic("Lock bit not set %p = %lx", lock, state);
0a7de745 2998 }
5ba3f43e
A
2999 } else if (type == LCK_ASSERT_NOTOWNED) {
3000 if (holder != 0) {
0a7de745 3001 if (holder == thread) {
5ba3f43e 3002 panic("Lock owned by current thread %p = %lx", lock, state);
0a7de745 3003 }
5ba3f43e 3004 }
0a7de745 3005 } else {
5ba3f43e 3006 panic("lck_spin_assert(): invalid arg (%u)", type);
0a7de745 3007 }
5ba3f43e
A
3008}
3009
3010boolean_t
3011lck_rw_lock_yield_shared(lck_rw_t *lck, boolean_t force_yield)
3012{
0a7de745 3013 lck_rw_word_t word;
5ba3f43e
A
3014
3015 lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
3016
3017 word.data = ordered_load_rw(lck);
3018 if (word.want_excl || word.want_upgrade || force_yield) {
3019 lck_rw_unlock_shared(lck);
3020 mutex_pause(2);
3021 lck_rw_lock_shared(lck);
3022 return TRUE;
3023 }
3024
3025 return FALSE;
3026}
3027
3028/*
3029 * Routine: kdp_lck_mtx_lock_spin_is_acquired
3030 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
3031 */
3032boolean_t
3033kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
3034{
0a7de745 3035 uintptr_t state;
5ba3f43e
A
3036
3037 if (not_in_kdp) {
3038 panic("panic: spinlock acquired check done outside of kernel debugger");
3039 }
3040 state = ordered_load_mtx(lck);
0a7de745 3041 if (state == LCK_MTX_TAG_DESTROYED) {
5ba3f43e 3042 return FALSE;
0a7de745
A
3043 }
3044 if (LCK_MTX_STATE_TO_THREAD(state) || (state & LCK_ILOCK)) {
5ba3f43e 3045 return TRUE;
0a7de745 3046 }
5ba3f43e
A
3047 return FALSE;
3048}
3049
3050void
3051kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
3052{
3053 lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
3054 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
3055 uintptr_t state = ordered_load_mtx(mutex);
3056 thread_t holder = LCK_MTX_STATE_TO_THREAD(state);
3057 if ((uintptr_t)holder == (uintptr_t)LCK_MTX_SPIN_TAG) {
3058 waitinfo->owner = STACKSHOT_WAITOWNER_MTXSPIN;
3059 } else {
3060 assertf(state != (uintptr_t)LCK_MTX_TAG_DESTROYED, "state=0x%llx", (uint64_t)state);
3061 assertf(state != (uintptr_t)LCK_MTX_TAG_INDIRECT, "state=0x%llx", (uint64_t)state);
3062 waitinfo->owner = thread_tid(holder);
3063 }
3064}
3065
3066void
3067kdp_rwlck_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
3068{
0a7de745
A
3069 lck_rw_t *rwlck = NULL;
3070 switch (waitinfo->wait_type) {
3071 case kThreadWaitKernelRWLockRead:
3072 rwlck = READ_EVENT_TO_RWLOCK(event);
3073 break;
3074 case kThreadWaitKernelRWLockWrite:
3075 case kThreadWaitKernelRWLockUpgrade:
3076 rwlck = WRITE_EVENT_TO_RWLOCK(event);
3077 break;
3078 default:
3079 panic("%s was called with an invalid blocking type", __FUNCTION__);
3080 break;
5ba3f43e
A
3081 }
3082 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
3083 waitinfo->owner = thread_tid(rwlck->lck_rw_owner);
3084}