]> git.saurik.com Git - apple/xnu.git/blame - osfmk/kern/locks.c
xnu-4570.41.2.tar.gz
[apple/xnu.git] / osfmk / kern / locks.c
CommitLineData
91447636 1/*
39037602 2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
91447636 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
91447636 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
91447636
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
39037602
A
56
57#define ATOMIC_PRIVATE 1
58#define LOCK_PRIVATE 1
59
91447636
A
60#include <mach_ldebug.h>
61#include <debug.h>
62
63#include <mach/kern_return.h>
64#include <mach/mach_host_server.h>
65#include <mach_debug/lockgroup_info.h>
66
67#include <kern/locks.h>
68#include <kern/misc_protos.h>
69#include <kern/kalloc.h>
70#include <kern/thread.h>
71#include <kern/processor.h>
72#include <kern/sched_prim.h>
73#include <kern/debug.h>
39037602
A
74#include <machine/atomic.h>
75#include <machine/machine_cpu.h>
91447636
A
76#include <string.h>
77
78
79#include <sys/kdebug.h>
80
2d21ac55
A
81#if CONFIG_DTRACE
82/*
83 * We need only enough declarations from the BSD-side to be able to
84 * test if our probe is active, and to call __dtrace_probe(). Setting
85 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
86 */
87#define NEED_DTRACE_DEFS
88#include <../bsd/sys/lockstat.h>
89#endif
90
91447636
A
91#define LCK_MTX_SLEEP_CODE 0
92#define LCK_MTX_SLEEP_DEADLINE_CODE 1
93#define LCK_MTX_LCK_WAIT_CODE 2
94#define LCK_MTX_UNLCK_WAKEUP_CODE 3
95
39037602
A
96#if MACH_LDEBUG
97#define ALIGN_TEST(p,t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
98#else
99#define ALIGN_TEST(p,t) do{}while(0)
100#endif
101
102/* Silence the volatile to _Atomic cast warning */
103#define ATOMIC_CAST(t,p) ((_Atomic t*)(uintptr_t)(p))
104
105/* Enforce program order of loads and stores. */
106#define ordered_load(target, type) \
107 __c11_atomic_load((_Atomic type *)(target), memory_order_relaxed)
108#define ordered_store(target, type, value) \
109 __c11_atomic_store((_Atomic type *)(target), value, memory_order_relaxed)
110
111#define ordered_load_hw(lock) ordered_load(&(lock)->lock_data, uintptr_t)
112#define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, uintptr_t, (value))
113
114#define NOINLINE __attribute__((noinline))
115
116
91447636
A
117static queue_head_t lck_grp_queue;
118static unsigned int lck_grp_cnt;
119
b0d623f7
A
120decl_lck_mtx_data(static,lck_grp_lock)
121static lck_mtx_ext_t lck_grp_lock_ext;
91447636
A
122
123lck_grp_attr_t LockDefaultGroupAttr;
b0d623f7
A
124lck_grp_t LockCompatGroup;
125lck_attr_t LockDefaultLckAttr;
91447636 126
5ba3f43e
A
127#if CONFIG_DTRACE && __SMP__
128#if defined (__x86_64__)
129uint64_t dtrace_spin_threshold = 500; // 500ns
130#elif defined(__arm__) || defined(__arm64__)
131uint64_t dtrace_spin_threshold = LOCK_PANIC_TIMEOUT / 1000000; // 500ns
132#endif
133#endif
134
91447636
A
135/*
136 * Routine: lck_mod_init
137 */
138
139void
140lck_mod_init(
141 void)
142{
6d2010ae
A
143 /*
144 * Obtain "lcks" options:this currently controls lock statistics
145 */
146 if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts)))
147 LcksOpts = 0;
148
5ba3f43e
A
149
150#if (DEVELOPMENT || DEBUG) && defined(__x86_64__)
151 if (!PE_parse_boot_argn("-disable_mtx_chk", &LckDisablePreemptCheck, sizeof (LckDisablePreemptCheck)))
152 LckDisablePreemptCheck = 0;
153#endif /* (DEVELOPMENT || DEBUG) && defined(__x86_64__) */
154
91447636 155 queue_init(&lck_grp_queue);
b0d623f7
A
156
157 /*
158 * Need to bootstrap the LockCompatGroup instead of calling lck_grp_init() here. This avoids
159 * grabbing the lck_grp_lock before it is initialized.
160 */
161
162 bzero(&LockCompatGroup, sizeof(lck_grp_t));
163 (void) strncpy(LockCompatGroup.lck_grp_name, "Compatibility APIs", LCK_GRP_MAX_NAME);
164
165 if (LcksOpts & enaLkStat)
166 LockCompatGroup.lck_grp_attr = LCK_GRP_ATTR_STAT;
167 else
168 LockCompatGroup.lck_grp_attr = LCK_ATTR_NONE;
169
170 LockCompatGroup.lck_grp_refcnt = 1;
171
172 enqueue_tail(&lck_grp_queue, (queue_entry_t)&LockCompatGroup);
173 lck_grp_cnt = 1;
174
175 lck_grp_attr_setdefault(&LockDefaultGroupAttr);
91447636 176 lck_attr_setdefault(&LockDefaultLckAttr);
b0d623f7
A
177
178 lck_mtx_init_ext(&lck_grp_lock, &lck_grp_lock_ext, &LockCompatGroup, &LockDefaultLckAttr);
91447636
A
179}
180
181/*
182 * Routine: lck_grp_attr_alloc_init
183 */
184
185lck_grp_attr_t *
186lck_grp_attr_alloc_init(
187 void)
188{
189 lck_grp_attr_t *attr;
190
191 if ((attr = (lck_grp_attr_t *)kalloc(sizeof(lck_grp_attr_t))) != 0)
192 lck_grp_attr_setdefault(attr);
193
194 return(attr);
195}
196
197
198/*
199 * Routine: lck_grp_attr_setdefault
200 */
201
202void
203lck_grp_attr_setdefault(
204 lck_grp_attr_t *attr)
205{
206 if (LcksOpts & enaLkStat)
207 attr->grp_attr_val = LCK_GRP_ATTR_STAT;
208 else
209 attr->grp_attr_val = 0;
210}
211
212
213/*
214 * Routine: lck_grp_attr_setstat
215 */
216
217void
218lck_grp_attr_setstat(
219 lck_grp_attr_t *attr)
220{
2d21ac55 221 (void)hw_atomic_or(&attr->grp_attr_val, LCK_GRP_ATTR_STAT);
91447636
A
222}
223
224
225/*
226 * Routine: lck_grp_attr_free
227 */
228
229void
230lck_grp_attr_free(
231 lck_grp_attr_t *attr)
232{
233 kfree(attr, sizeof(lck_grp_attr_t));
234}
235
236
237/*
3e170ce0 238 * Routine: lck_grp_alloc_init
91447636
A
239 */
240
241lck_grp_t *
242lck_grp_alloc_init(
243 const char* grp_name,
244 lck_grp_attr_t *attr)
245{
246 lck_grp_t *grp;
247
248 if ((grp = (lck_grp_t *)kalloc(sizeof(lck_grp_t))) != 0)
249 lck_grp_init(grp, grp_name, attr);
250
251 return(grp);
252}
253
91447636 254/*
3e170ce0 255 * Routine: lck_grp_init
91447636
A
256 */
257
258void
3e170ce0 259lck_grp_init(lck_grp_t * grp, const char * grp_name, lck_grp_attr_t * attr)
91447636 260{
39037602
A
261 /* make sure locking infrastructure has been initialized */
262 assert(lck_grp_cnt > 0);
263
91447636
A
264 bzero((void *)grp, sizeof(lck_grp_t));
265
3e170ce0 266 (void)strlcpy(grp->lck_grp_name, grp_name, LCK_GRP_MAX_NAME);
91447636
A
267
268 if (attr != LCK_GRP_ATTR_NULL)
269 grp->lck_grp_attr = attr->grp_attr_val;
270 else if (LcksOpts & enaLkStat)
3e170ce0
A
271 grp->lck_grp_attr = LCK_GRP_ATTR_STAT;
272 else
273 grp->lck_grp_attr = LCK_ATTR_NONE;
91447636
A
274
275 grp->lck_grp_refcnt = 1;
276
b0d623f7 277 lck_mtx_lock(&lck_grp_lock);
91447636
A
278 enqueue_tail(&lck_grp_queue, (queue_entry_t)grp);
279 lck_grp_cnt++;
b0d623f7 280 lck_mtx_unlock(&lck_grp_lock);
91447636
A
281}
282
91447636
A
283/*
284 * Routine: lck_grp_free
285 */
286
287void
288lck_grp_free(
289 lck_grp_t *grp)
290{
b0d623f7 291 lck_mtx_lock(&lck_grp_lock);
91447636
A
292 lck_grp_cnt--;
293 (void)remque((queue_entry_t)grp);
b0d623f7 294 lck_mtx_unlock(&lck_grp_lock);
91447636
A
295 lck_grp_deallocate(grp);
296}
297
298
299/*
300 * Routine: lck_grp_reference
301 */
302
303void
304lck_grp_reference(
305 lck_grp_t *grp)
306{
2d21ac55 307 (void)hw_atomic_add(&grp->lck_grp_refcnt, 1);
91447636
A
308}
309
310
311/*
312 * Routine: lck_grp_deallocate
313 */
314
315void
316lck_grp_deallocate(
317 lck_grp_t *grp)
318{
2d21ac55 319 if (hw_atomic_sub(&grp->lck_grp_refcnt, 1) == 0)
91447636
A
320 kfree(grp, sizeof(lck_grp_t));
321}
322
323/*
324 * Routine: lck_grp_lckcnt_incr
325 */
326
327void
328lck_grp_lckcnt_incr(
329 lck_grp_t *grp,
330 lck_type_t lck_type)
331{
332 unsigned int *lckcnt;
333
334 switch (lck_type) {
335 case LCK_TYPE_SPIN:
336 lckcnt = &grp->lck_grp_spincnt;
337 break;
338 case LCK_TYPE_MTX:
339 lckcnt = &grp->lck_grp_mtxcnt;
340 break;
341 case LCK_TYPE_RW:
342 lckcnt = &grp->lck_grp_rwcnt;
343 break;
344 default:
345 return panic("lck_grp_lckcnt_incr(): invalid lock type: %d\n", lck_type);
346 }
347
2d21ac55 348 (void)hw_atomic_add(lckcnt, 1);
91447636
A
349}
350
351/*
352 * Routine: lck_grp_lckcnt_decr
353 */
354
355void
356lck_grp_lckcnt_decr(
357 lck_grp_t *grp,
358 lck_type_t lck_type)
359{
360 unsigned int *lckcnt;
39037602 361 int updated;
91447636
A
362
363 switch (lck_type) {
364 case LCK_TYPE_SPIN:
365 lckcnt = &grp->lck_grp_spincnt;
366 break;
367 case LCK_TYPE_MTX:
368 lckcnt = &grp->lck_grp_mtxcnt;
369 break;
370 case LCK_TYPE_RW:
371 lckcnt = &grp->lck_grp_rwcnt;
372 break;
373 default:
39037602
A
374 panic("lck_grp_lckcnt_decr(): invalid lock type: %d\n", lck_type);
375 return;
91447636
A
376 }
377
39037602
A
378 updated = (int)hw_atomic_sub(lckcnt, 1);
379 assert(updated >= 0);
91447636
A
380}
381
382/*
383 * Routine: lck_attr_alloc_init
384 */
385
386lck_attr_t *
387lck_attr_alloc_init(
388 void)
389{
390 lck_attr_t *attr;
391
392 if ((attr = (lck_attr_t *)kalloc(sizeof(lck_attr_t))) != 0)
393 lck_attr_setdefault(attr);
394
395 return(attr);
396}
397
398
399/*
400 * Routine: lck_attr_setdefault
401 */
402
403void
404lck_attr_setdefault(
405 lck_attr_t *attr)
406{
5ba3f43e
A
407#if __arm__ || __arm64__
408 /* <rdar://problem/4404579>: Using LCK_ATTR_DEBUG here causes panic at boot time for arm */
409 attr->lck_attr_val = LCK_ATTR_NONE;
410#elif __i386__ || __x86_64__
91447636 411#if !DEBUG
593a1d5f
A
412 if (LcksOpts & enaLkDeb)
413 attr->lck_attr_val = LCK_ATTR_DEBUG;
414 else
415 attr->lck_attr_val = LCK_ATTR_NONE;
91447636 416#else
593a1d5f
A
417 attr->lck_attr_val = LCK_ATTR_DEBUG;
418#endif /* !DEBUG */
316670eb
A
419#else
420#error Unknown architecture.
421#endif /* __arm__ */
91447636
A
422}
423
424
425/*
426 * Routine: lck_attr_setdebug
427 */
428void
429lck_attr_setdebug(
430 lck_attr_t *attr)
431{
2d21ac55
A
432 (void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_DEBUG);
433}
434
435/*
436 * Routine: lck_attr_setdebug
437 */
438void
439lck_attr_cleardebug(
440 lck_attr_t *attr)
441{
442 (void)hw_atomic_and(&attr->lck_attr_val, ~LCK_ATTR_DEBUG);
91447636
A
443}
444
445
0c530ab8
A
446/*
447 * Routine: lck_attr_rw_shared_priority
448 */
449void
450lck_attr_rw_shared_priority(
451 lck_attr_t *attr)
452{
2d21ac55 453 (void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_RW_SHARED_PRIORITY);
0c530ab8
A
454}
455
456
91447636
A
457/*
458 * Routine: lck_attr_free
459 */
460void
461lck_attr_free(
462 lck_attr_t *attr)
463{
464 kfree(attr, sizeof(lck_attr_t));
465}
466
39037602
A
467/*
468 * Routine: hw_lock_init
469 *
470 * Initialize a hardware lock.
471 */
472void
473hw_lock_init(hw_lock_t lock)
474{
475 ordered_store_hw(lock, 0);
476}
477
478/*
479 * Routine: hw_lock_lock_contended
480 *
481 * Spin until lock is acquired or timeout expires.
5ba3f43e
A
482 * timeout is in mach_absolute_time ticks. Called with
483 * preemption disabled.
39037602
A
484 */
485
486#if __SMP__
487static unsigned int NOINLINE
488hw_lock_lock_contended(hw_lock_t lock, uintptr_t data, uint64_t timeout, boolean_t do_panic)
489{
490 uint64_t end = 0;
491 uintptr_t holder = lock->lock_data;
492 int i;
493
494 if (timeout == 0)
495 timeout = LOCK_PANIC_TIMEOUT;
5ba3f43e
A
496#if CONFIG_DTRACE
497 uint64_t begin;
498 boolean_t dtrace_enabled = lockstat_probemap[LS_LCK_SPIN_LOCK_SPIN] != 0;
499 if (__improbable(dtrace_enabled))
500 begin = mach_absolute_time();
501#endif
39037602
A
502 for ( ; ; ) {
503 for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
39037602
A
504 cpu_pause();
505#if (!__ARM_ENABLE_WFE_) || (LOCK_PRETEST)
506 holder = ordered_load_hw(lock);
507 if (holder != 0)
508 continue;
39037602
A
509#endif
510 if (atomic_compare_exchange(&lock->lock_data, 0, data,
5ba3f43e
A
511 memory_order_acquire_smp, TRUE)) {
512#if CONFIG_DTRACE
513 if (__improbable(dtrace_enabled)) {
514 uint64_t spintime = mach_absolute_time() - begin;
515 if (spintime > dtrace_spin_threshold)
516 LOCKSTAT_RECORD2(LS_LCK_SPIN_LOCK_SPIN, lock, spintime, dtrace_spin_threshold);
517 }
518#endif
39037602 519 return 1;
5ba3f43e 520 }
39037602 521 }
5ba3f43e 522 if (end == 0) {
39037602 523 end = ml_get_timebase() + timeout;
5ba3f43e 524 }
39037602
A
525 else if (ml_get_timebase() >= end)
526 break;
527 }
528 if (do_panic) {
529 // Capture the actual time spent blocked, which may be higher than the timeout
530 // if a misbehaving interrupt stole this thread's CPU time.
531 panic("Spinlock timeout after %llu ticks, %p = %lx",
532 (ml_get_timebase() - end + timeout), lock, holder);
533 }
534 return 0;
535}
536#endif // __SMP__
537
538/*
539 * Routine: hw_lock_lock
540 *
5ba3f43e
A
541 * Acquire lock, spinning until it becomes available,
542 * return with preemption disabled.
39037602
A
543 */
544void
545hw_lock_lock(hw_lock_t lock)
546{
547 thread_t thread;
548 uintptr_t state;
549
550 thread = current_thread();
551 disable_preemption_for_thread(thread);
552 state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
553#if __SMP__
5ba3f43e 554
39037602
A
555#if LOCK_PRETEST
556 if (ordered_load_hw(lock))
557 goto contended;
558#endif // LOCK_PRETEST
559 if (atomic_compare_exchange(&lock->lock_data, 0, state,
5ba3f43e
A
560 memory_order_acquire_smp, TRUE)) {
561 goto end;
562 }
39037602
A
563#if LOCK_PRETEST
564contended:
565#endif // LOCK_PRETEST
566 hw_lock_lock_contended(lock, state, 0, TRUE);
5ba3f43e 567end:
39037602
A
568#else // __SMP__
569 if (lock->lock_data)
570 panic("Spinlock held %p", lock);
571 lock->lock_data = state;
572#endif // __SMP__
5ba3f43e
A
573#if CONFIG_DTRACE
574 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0);
575#endif
39037602
A
576 return;
577}
578
579/*
580 * Routine: hw_lock_to
581 *
582 * Acquire lock, spinning until it becomes available or timeout.
5ba3f43e
A
583 * Timeout is in mach_absolute_time ticks, return with
584 * preemption disabled.
39037602
A
585 */
586unsigned int
587hw_lock_to(hw_lock_t lock, uint64_t timeout)
588{
589 thread_t thread;
590 uintptr_t state;
5ba3f43e 591 unsigned int success = 0;
39037602
A
592
593 thread = current_thread();
594 disable_preemption_for_thread(thread);
595 state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
596#if __SMP__
5ba3f43e 597
39037602
A
598#if LOCK_PRETEST
599 if (ordered_load_hw(lock))
600 goto contended;
601#endif // LOCK_PRETEST
602 if (atomic_compare_exchange(&lock->lock_data, 0, state,
5ba3f43e
A
603 memory_order_acquire_smp, TRUE)) {
604 success = 1;
605 goto end;
606 }
39037602
A
607#if LOCK_PRETEST
608contended:
609#endif // LOCK_PRETEST
5ba3f43e
A
610 success = hw_lock_lock_contended(lock, state, timeout, FALSE);
611end:
39037602
A
612#else // __SMP__
613 (void)timeout;
614 if (ordered_load_hw(lock) == 0) {
615 ordered_store_hw(lock, state);
5ba3f43e 616 success = 1;
39037602 617 }
39037602 618#endif // __SMP__
5ba3f43e
A
619#if CONFIG_DTRACE
620 if (success)
621 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0);
622#endif
623 return success;
39037602
A
624}
625
626/*
627 * Routine: hw_lock_try
5ba3f43e
A
628 *
629 * returns with preemption disabled on success.
39037602
A
630 */
631unsigned int
632hw_lock_try(hw_lock_t lock)
633{
634 thread_t thread = current_thread();
635 int success = 0;
636#if LOCK_TRY_DISABLE_INT
637 long intmask;
638
639 intmask = disable_interrupts();
640#else
641 disable_preemption_for_thread(thread);
642#endif // LOCK_TRY_DISABLE_INT
643
644#if __SMP__
645#if LOCK_PRETEST
646 if (ordered_load_hw(lock))
647 goto failed;
648#endif // LOCK_PRETEST
649 success = atomic_compare_exchange(&lock->lock_data, 0, LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK,
650 memory_order_acquire_smp, FALSE);
651#else
652 if (lock->lock_data == 0) {
653 lock->lock_data = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
654 success = 1;
655 }
656#endif // __SMP__
657
658#if LOCK_TRY_DISABLE_INT
659 if (success)
660 disable_preemption_for_thread(thread);
661#if LOCK_PRETEST
662failed:
663#endif // LOCK_PRETEST
664 restore_interrupts(intmask);
665#else
666#if LOCK_PRETEST
667failed:
668#endif // LOCK_PRETEST
669 if (!success)
670 enable_preemption();
671#endif // LOCK_TRY_DISABLE_INT
5ba3f43e
A
672#if CONFIG_DTRACE
673 if (success)
674 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0);
675#endif
39037602
A
676 return success;
677}
678
679/*
680 * Routine: hw_lock_unlock
681 *
5ba3f43e 682 * Unconditionally release lock, release preemption level.
39037602
A
683 */
684void
685hw_lock_unlock(hw_lock_t lock)
686{
687 __c11_atomic_store((_Atomic uintptr_t *)&lock->lock_data, 0, memory_order_release_smp);
5ba3f43e
A
688#if __arm__ || __arm64__
689 // ARM tests are only for open-source exclusion
690 set_event();
691#endif // __arm__ || __arm64__
692#if CONFIG_DTRACE
693 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
694#endif /* CONFIG_DTRACE */
39037602
A
695 enable_preemption();
696}
697
698/*
5ba3f43e 699 * Routine hw_lock_held, doesn't change preemption state.
39037602
A
700 * N.B. Racy, of course.
701 */
702unsigned int
703hw_lock_held(hw_lock_t lock)
704{
705 return (ordered_load_hw(lock) != 0);
706}
91447636
A
707
708/*
709 * Routine: lck_spin_sleep
710 */
711wait_result_t
712lck_spin_sleep(
713 lck_spin_t *lck,
714 lck_sleep_action_t lck_sleep_action,
715 event_t event,
716 wait_interrupt_t interruptible)
717{
718 wait_result_t res;
719
720 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
721 panic("Invalid lock sleep action %x\n", lck_sleep_action);
722
723 res = assert_wait(event, interruptible);
724 if (res == THREAD_WAITING) {
725 lck_spin_unlock(lck);
726 res = thread_block(THREAD_CONTINUE_NULL);
727 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
728 lck_spin_lock(lck);
729 }
730 else
731 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
732 lck_spin_unlock(lck);
733
734 return res;
735}
736
737
738/*
739 * Routine: lck_spin_sleep_deadline
740 */
741wait_result_t
742lck_spin_sleep_deadline(
743 lck_spin_t *lck,
744 lck_sleep_action_t lck_sleep_action,
745 event_t event,
746 wait_interrupt_t interruptible,
747 uint64_t deadline)
748{
749 wait_result_t res;
750
751 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
752 panic("Invalid lock sleep action %x\n", lck_sleep_action);
753
754 res = assert_wait_deadline(event, interruptible, deadline);
755 if (res == THREAD_WAITING) {
756 lck_spin_unlock(lck);
757 res = thread_block(THREAD_CONTINUE_NULL);
758 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
759 lck_spin_lock(lck);
760 }
761 else
762 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
763 lck_spin_unlock(lck);
764
765 return res;
766}
767
768
3e170ce0
A
769/*
770 * Routine: lck_mtx_clear_promoted
771 *
772 * Handle clearing of TH_SFLAG_PROMOTED,
773 * adjusting thread priority as needed.
774 *
775 * Called with thread lock held
776 */
777static void
778lck_mtx_clear_promoted (
779 thread_t thread,
780 __kdebug_only uintptr_t trace_lck)
781{
782 thread->sched_flags &= ~TH_SFLAG_PROMOTED;
783
784 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
785 /* Thread still has a RW lock promotion */
786 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
787 KERNEL_DEBUG_CONSTANT(
788 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
789 thread->sched_pri, DEPRESSPRI, 0, trace_lck, 0);
790 set_sched_pri(thread, DEPRESSPRI);
791 } else {
792 if (thread->base_pri < thread->sched_pri) {
793 KERNEL_DEBUG_CONSTANT(
794 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
795 thread->sched_pri, thread->base_pri, 0, trace_lck, 0);
796 }
797 thread_recompute_sched_pri(thread, FALSE);
798 }
799}
800
801
91447636
A
802/*
803 * Routine: lck_mtx_sleep
804 */
805wait_result_t
806lck_mtx_sleep(
807 lck_mtx_t *lck,
808 lck_sleep_action_t lck_sleep_action,
809 event_t event,
810 wait_interrupt_t interruptible)
811{
812 wait_result_t res;
fe8ab488 813 thread_t thread = current_thread();
91447636
A
814
815 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
3e170ce0 816 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
91447636
A
817
818 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
819 panic("Invalid lock sleep action %x\n", lck_sleep_action);
820
fe8ab488
A
821 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
822 /*
823 * We overload the RW lock promotion to give us a priority ceiling
824 * during the time that this thread is asleep, so that when it
825 * is re-awakened (and not yet contending on the mutex), it is
826 * runnable at a reasonably high priority.
827 */
828 thread->rwlock_count++;
829 }
830
91447636
A
831 res = assert_wait(event, interruptible);
832 if (res == THREAD_WAITING) {
833 lck_mtx_unlock(lck);
834 res = thread_block(THREAD_CONTINUE_NULL);
b0d623f7
A
835 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
836 if ((lck_sleep_action & LCK_SLEEP_SPIN))
837 lck_mtx_lock_spin(lck);
5ba3f43e
A
838 else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS))
839 lck_mtx_lock_spin_always(lck);
b0d623f7
A
840 else
841 lck_mtx_lock(lck);
842 }
91447636
A
843 }
844 else
845 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
846 lck_mtx_unlock(lck);
847
fe8ab488
A
848 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
849 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
850 /* sched_flags checked without lock, but will be rechecked while clearing */
851 lck_rw_clear_promotion(thread);
852 }
853 }
854
91447636
A
855 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
856
857 return res;
858}
859
860
861/*
862 * Routine: lck_mtx_sleep_deadline
863 */
864wait_result_t
865lck_mtx_sleep_deadline(
866 lck_mtx_t *lck,
867 lck_sleep_action_t lck_sleep_action,
868 event_t event,
869 wait_interrupt_t interruptible,
870 uint64_t deadline)
871{
872 wait_result_t res;
fe8ab488 873 thread_t thread = current_thread();
91447636
A
874
875 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
3e170ce0 876 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
91447636
A
877
878 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
879 panic("Invalid lock sleep action %x\n", lck_sleep_action);
880
fe8ab488
A
881 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
882 /*
883 * See lck_mtx_sleep().
884 */
885 thread->rwlock_count++;
886 }
887
91447636
A
888 res = assert_wait_deadline(event, interruptible, deadline);
889 if (res == THREAD_WAITING) {
890 lck_mtx_unlock(lck);
891 res = thread_block(THREAD_CONTINUE_NULL);
6d2010ae
A
892 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
893 if ((lck_sleep_action & LCK_SLEEP_SPIN))
894 lck_mtx_lock_spin(lck);
895 else
896 lck_mtx_lock(lck);
897 }
91447636
A
898 }
899 else
900 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
901 lck_mtx_unlock(lck);
902
fe8ab488
A
903 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
904 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
905 /* sched_flags checked without lock, but will be rechecked while clearing */
906 lck_rw_clear_promotion(thread);
907 }
908 }
909
91447636
A
910 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
911
912 return res;
913}
914
915/*
916 * Routine: lck_mtx_lock_wait
917 *
918 * Invoked in order to wait on contention.
919 *
920 * Called with the interlock locked and
921 * returns it unlocked.
922 */
923void
924lck_mtx_lock_wait (
925 lck_mtx_t *lck,
926 thread_t holder)
927{
928 thread_t self = current_thread();
929 lck_mtx_t *mutex;
3e170ce0
A
930 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
931 __kdebug_only uintptr_t trace_holder = VM_KERNEL_UNSLIDE_OR_PERM(holder);
91447636
A
932 integer_t priority;
933 spl_t s = splsched();
2d21ac55
A
934#if CONFIG_DTRACE
935 uint64_t sleep_start = 0;
936
937 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
938 sleep_start = mach_absolute_time();
939 }
940#endif
91447636
A
941
942 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
943 mutex = lck;
944 else
945 mutex = &lck->lck_mtx_ptr->lck_mtx;
946
3e170ce0 947 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, trace_lck, trace_holder, 0, 0, 0);
91447636
A
948
949 priority = self->sched_pri;
3e170ce0
A
950 if (priority < self->base_pri)
951 priority = self->base_pri;
91447636
A
952 if (priority < BASEPRI_DEFAULT)
953 priority = BASEPRI_DEFAULT;
954
fe8ab488
A
955 /* Do not promote past promotion ceiling */
956 priority = MIN(priority, MAXPRI_PROMOTE);
957
91447636 958 thread_lock(holder);
39037602 959 if (mutex->lck_mtx_pri == 0) {
91447636 960 holder->promotions++;
39037602
A
961 holder->sched_flags |= TH_SFLAG_PROMOTED;
962 }
963
3e170ce0 964 if (mutex->lck_mtx_pri < priority && holder->sched_pri < priority) {
4a3eedf9
A
965 KERNEL_DEBUG_CONSTANT(
966 MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
3e170ce0 967 holder->sched_pri, priority, trace_holder, trace_lck, 0);
4a3eedf9 968 set_sched_pri(holder, priority);
91447636
A
969 }
970 thread_unlock(holder);
971 splx(s);
972
973 if (mutex->lck_mtx_pri < priority)
974 mutex->lck_mtx_pri = priority;
975 if (self->pending_promoter[self->pending_promoter_index] == NULL) {
976 self->pending_promoter[self->pending_promoter_index] = mutex;
977 mutex->lck_mtx_waiters++;
978 }
979 else
980 if (self->pending_promoter[self->pending_promoter_index] != mutex) {
981 self->pending_promoter[++self->pending_promoter_index] = mutex;
982 mutex->lck_mtx_waiters++;
983 }
984
813fb2f6 985 thread_set_pending_block_hint(self, kThreadWaitKernelMutex);
3e170ce0 986 assert_wait(LCK_MTX_EVENT(mutex), THREAD_UNINT);
91447636
A
987 lck_mtx_ilk_unlock(mutex);
988
989 thread_block(THREAD_CONTINUE_NULL);
990
991 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
2d21ac55
A
992#if CONFIG_DTRACE
993 /*
5ba3f43e 994 * Record the DTrace lockstat probe for blocking, block time
2d21ac55
A
995 * measured from when we were entered.
996 */
997 if (sleep_start) {
998 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
999 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
1000 mach_absolute_time() - sleep_start);
1001 } else {
1002 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
1003 mach_absolute_time() - sleep_start);
1004 }
1005 }
1006#endif
91447636
A
1007}
1008
1009/*
1010 * Routine: lck_mtx_lock_acquire
1011 *
1012 * Invoked on acquiring the mutex when there is
1013 * contention.
1014 *
1015 * Returns the current number of waiters.
1016 *
1017 * Called with the interlock locked.
1018 */
1019int
1020lck_mtx_lock_acquire(
1021 lck_mtx_t *lck)
1022{
1023 thread_t thread = current_thread();
1024 lck_mtx_t *mutex;
3e170ce0
A
1025 integer_t priority;
1026 spl_t s;
1027 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
91447636
A
1028
1029 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
1030 mutex = lck;
1031 else
1032 mutex = &lck->lck_mtx_ptr->lck_mtx;
1033
1034 if (thread->pending_promoter[thread->pending_promoter_index] == mutex) {
1035 thread->pending_promoter[thread->pending_promoter_index] = NULL;
1036 if (thread->pending_promoter_index > 0)
1037 thread->pending_promoter_index--;
1038 mutex->lck_mtx_waiters--;
1039 }
1040
3e170ce0
A
1041 if (mutex->lck_mtx_waiters)
1042 priority = mutex->lck_mtx_pri;
1043 else {
1044 mutex->lck_mtx_pri = 0;
1045 priority = 0;
1046 }
91447636 1047
3e170ce0
A
1048 if (priority || thread->was_promoted_on_wakeup) {
1049 s = splsched();
91447636 1050 thread_lock(thread);
3e170ce0
A
1051
1052 if (priority) {
1053 thread->promotions++;
1054 thread->sched_flags |= TH_SFLAG_PROMOTED;
1055 if (thread->sched_pri < priority) {
1056 KERNEL_DEBUG_CONSTANT(
1057 MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
1058 thread->sched_pri, priority, 0, trace_lck, 0);
1059 /* Do not promote past promotion ceiling */
1060 assert(priority <= MAXPRI_PROMOTE);
1061 set_sched_pri(thread, priority);
1062 }
91447636 1063 }
3e170ce0
A
1064 if (thread->was_promoted_on_wakeup) {
1065 thread->was_promoted_on_wakeup = 0;
1066 if (thread->promotions == 0)
1067 lck_mtx_clear_promoted(thread, trace_lck);
1068 }
1069
91447636
A
1070 thread_unlock(thread);
1071 splx(s);
1072 }
91447636 1073
39236c6e
A
1074#if CONFIG_DTRACE
1075 if (lockstat_probemap[LS_LCK_MTX_LOCK_ACQUIRE] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_ACQUIRE]) {
1076 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1077 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lck, 0);
1078 } else {
1079 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, lck, 0);
1080 }
1081 }
1082#endif
91447636
A
1083 return (mutex->lck_mtx_waiters);
1084}
1085
1086/*
1087 * Routine: lck_mtx_unlock_wakeup
1088 *
1089 * Invoked on unlock when there is contention.
1090 *
1091 * Called with the interlock locked.
1092 */
1093void
1094lck_mtx_unlock_wakeup (
1095 lck_mtx_t *lck,
1096 thread_t holder)
1097{
1098 thread_t thread = current_thread();
1099 lck_mtx_t *mutex;
3e170ce0 1100 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
91447636
A
1101
1102 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
1103 mutex = lck;
1104 else
1105 mutex = &lck->lck_mtx_ptr->lck_mtx;
1106
6d2010ae
A
1107 if (thread != holder)
1108 panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);
91447636 1109
3e170ce0 1110 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START, trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(holder), 0, 0, 0);
91447636 1111
6d2010ae 1112 assert(mutex->lck_mtx_waiters > 0);
3e170ce0
A
1113 if (mutex->lck_mtx_waiters > 1)
1114 thread_wakeup_one_with_pri(LCK_MTX_EVENT(lck), lck->lck_mtx_pri);
1115 else
1116 thread_wakeup_one(LCK_MTX_EVENT(lck));
91447636
A
1117
1118 if (thread->promotions > 0) {
1119 spl_t s = splsched();
1120
1121 thread_lock(thread);
3e170ce0
A
1122 if (--thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED))
1123 lck_mtx_clear_promoted(thread, trace_lck);
91447636
A
1124 thread_unlock(thread);
1125 splx(s);
1126 }
91447636
A
1127
1128 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1129}
1130
2d21ac55
A
1131void
1132lck_mtx_unlockspin_wakeup (
1133 lck_mtx_t *lck)
1134{
1135 assert(lck->lck_mtx_waiters > 0);
3e170ce0 1136 thread_wakeup_one(LCK_MTX_EVENT(lck));
2d21ac55 1137
3e170ce0 1138 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_NONE, VM_KERNEL_UNSLIDE_OR_PERM(lck), 0, 0, 1, 0);
2d21ac55
A
1139#if CONFIG_DTRACE
1140 /*
1141 * When there are waiters, we skip the hot-patch spot in the
1142 * fastpath, so we record it here.
1143 */
1144 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lck, 0);
1145#endif
1146}
1147
1148
91447636
A
1149/*
1150 * Routine: mutex_pause
1151 *
1152 * Called by former callers of simple_lock_pause().
1153 */
2d21ac55
A
1154#define MAX_COLLISION_COUNTS 32
1155#define MAX_COLLISION 8
1156
1157unsigned int max_collision_count[MAX_COLLISION_COUNTS];
1158
1159uint32_t collision_backoffs[MAX_COLLISION] = {
1160 10, 50, 100, 200, 400, 600, 800, 1000
1161};
1162
91447636
A
1163
1164void
2d21ac55 1165mutex_pause(uint32_t collisions)
91447636
A
1166{
1167 wait_result_t wait_result;
2d21ac55 1168 uint32_t back_off;
91447636 1169
2d21ac55
A
1170 if (collisions >= MAX_COLLISION_COUNTS)
1171 collisions = MAX_COLLISION_COUNTS - 1;
1172 max_collision_count[collisions]++;
1173
1174 if (collisions >= MAX_COLLISION)
1175 collisions = MAX_COLLISION - 1;
1176 back_off = collision_backoffs[collisions];
1177
1178 wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC);
91447636
A
1179 assert(wait_result == THREAD_WAITING);
1180
1181 wait_result = thread_block(THREAD_CONTINUE_NULL);
1182 assert(wait_result == THREAD_TIMED_OUT);
1183}
1184
2d21ac55
A
1185
1186unsigned int mutex_yield_wait = 0;
1187unsigned int mutex_yield_no_wait = 0;
1188
1189void
b0d623f7
A
1190lck_mtx_yield(
1191 lck_mtx_t *lck)
2d21ac55 1192{
b0d623f7
A
1193 int waiters;
1194
2d21ac55 1195#if DEBUG
b0d623f7 1196 lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
2d21ac55 1197#endif /* DEBUG */
b0d623f7 1198
2d21ac55 1199 if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)
b0d623f7
A
1200 waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters;
1201 else
1202 waiters = lck->lck_mtx_waiters;
2d21ac55 1203
b0d623f7 1204 if ( !waiters) {
2d21ac55
A
1205 mutex_yield_no_wait++;
1206 } else {
1207 mutex_yield_wait++;
b0d623f7 1208 lck_mtx_unlock(lck);
2d21ac55 1209 mutex_pause(0);
b0d623f7 1210 lck_mtx_lock(lck);
2d21ac55
A
1211 }
1212}
1213
1214
91447636
A
1215/*
1216 * Routine: lck_rw_sleep
1217 */
1218wait_result_t
1219lck_rw_sleep(
1220 lck_rw_t *lck,
1221 lck_sleep_action_t lck_sleep_action,
1222 event_t event,
1223 wait_interrupt_t interruptible)
1224{
1225 wait_result_t res;
1226 lck_rw_type_t lck_rw_type;
fe8ab488
A
1227 thread_t thread = current_thread();
1228
91447636
A
1229 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
1230 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1231
fe8ab488
A
1232 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1233 /*
1234 * Although we are dropping the RW lock, the intent in most cases
1235 * is that this thread remains as an observer, since it may hold
1236 * some secondary resource, but must yield to avoid deadlock. In
1237 * this situation, make sure that the thread is boosted to the
1238 * RW lock ceiling while blocked, so that it can re-acquire the
1239 * RW lock at that priority.
1240 */
1241 thread->rwlock_count++;
1242 }
1243
91447636
A
1244 res = assert_wait(event, interruptible);
1245 if (res == THREAD_WAITING) {
1246 lck_rw_type = lck_rw_done(lck);
1247 res = thread_block(THREAD_CONTINUE_NULL);
1248 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1249 if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE)))
1250 lck_rw_lock(lck, lck_rw_type);
1251 else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE)
1252 lck_rw_lock_exclusive(lck);
1253 else
1254 lck_rw_lock_shared(lck);
1255 }
1256 }
1257 else
1258 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
1259 (void)lck_rw_done(lck);
1260
fe8ab488
A
1261 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1262 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1263 /* sched_flags checked without lock, but will be rechecked while clearing */
1264
1265 /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1266 assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1267
1268 lck_rw_clear_promotion(thread);
1269 }
1270 }
1271
91447636
A
1272 return res;
1273}
1274
1275
1276/*
1277 * Routine: lck_rw_sleep_deadline
1278 */
1279wait_result_t
1280lck_rw_sleep_deadline(
1281 lck_rw_t *lck,
1282 lck_sleep_action_t lck_sleep_action,
1283 event_t event,
1284 wait_interrupt_t interruptible,
1285 uint64_t deadline)
1286{
1287 wait_result_t res;
1288 lck_rw_type_t lck_rw_type;
fe8ab488 1289 thread_t thread = current_thread();
91447636
A
1290
1291 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
1292 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1293
fe8ab488
A
1294 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1295 thread->rwlock_count++;
1296 }
1297
91447636
A
1298 res = assert_wait_deadline(event, interruptible, deadline);
1299 if (res == THREAD_WAITING) {
1300 lck_rw_type = lck_rw_done(lck);
1301 res = thread_block(THREAD_CONTINUE_NULL);
1302 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1303 if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE)))
1304 lck_rw_lock(lck, lck_rw_type);
1305 else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE)
1306 lck_rw_lock_exclusive(lck);
1307 else
1308 lck_rw_lock_shared(lck);
1309 }
1310 }
1311 else
1312 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
1313 (void)lck_rw_done(lck);
1314
fe8ab488
A
1315 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1316 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1317 /* sched_flags checked without lock, but will be rechecked while clearing */
1318
1319 /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1320 assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1321
1322 lck_rw_clear_promotion(thread);
1323 }
1324 }
1325
91447636
A
1326 return res;
1327}
1328
39236c6e
A
1329/*
1330 * Reader-writer lock promotion
1331 *
1332 * We support a limited form of reader-writer
1333 * lock promotion whose effects are:
1334 *
1335 * * Qualifying threads have decay disabled
1336 * * Scheduler priority is reset to a floor of
1337 * of their statically assigned priority
1338 * or BASEPRI_BACKGROUND
1339 *
1340 * The rationale is that lck_rw_ts do not have
1341 * a single owner, so we cannot apply a directed
1342 * priority boost from all waiting threads
1343 * to all holding threads without maintaining
1344 * lists of all shared owners and all waiting
1345 * threads for every lock.
1346 *
1347 * Instead (and to preserve the uncontended fast-
1348 * path), acquiring (or attempting to acquire)
1349 * a RW lock in shared or exclusive lock increments
1350 * a per-thread counter. Only if that thread stops
1351 * making forward progress (for instance blocking
1352 * on a mutex, or being preempted) do we consult
1353 * the counter and apply the priority floor.
1354 * When the thread becomes runnable again (or in
1355 * the case of preemption it never stopped being
1356 * runnable), it has the priority boost and should
1357 * be in a good position to run on the CPU and
1358 * release all RW locks (at which point the priority
1359 * boost is cleared).
1360 *
1361 * Care must be taken to ensure that priority
1362 * boosts are not retained indefinitely, since unlike
1363 * mutex priority boosts (where the boost is tied
1364 * to the mutex lifecycle), the boost is tied
1365 * to the thread and independent of any particular
1366 * lck_rw_t. Assertions are in place on return
1367 * to userspace so that the boost is not held
1368 * indefinitely.
1369 *
1370 * The routines that increment/decrement the
1371 * per-thread counter should err on the side of
1372 * incrementing any time a preemption is possible
1373 * and the lock would be visible to the rest of the
1374 * system as held (so it should be incremented before
1375 * interlocks are dropped/preemption is enabled, or
1376 * before a CAS is executed to acquire the lock).
1377 *
1378 */
1379
1380/*
1381 * lck_rw_clear_promotion: Undo priority promotions when the last RW
1382 * lock is released by a thread (if a promotion was active)
1383 */
1384void lck_rw_clear_promotion(thread_t thread)
1385{
1386 assert(thread->rwlock_count == 0);
1387
1388 /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
1389 spl_t s = splsched();
1390
1391 thread_lock(thread);
1392
1393 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
1394 thread->sched_flags &= ~TH_SFLAG_RW_PROMOTED;
1395
1396 if (thread->sched_flags & TH_SFLAG_PROMOTED) {
1397 /* Thread still has a mutex promotion */
1398 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
1399 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE,
490019cf
A
1400 (uintptr_t)thread_tid(thread), thread->sched_pri, DEPRESSPRI, 0, 0);
1401
39236c6e
A
1402 set_sched_pri(thread, DEPRESSPRI);
1403 } else {
1404 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE,
490019cf
A
1405 (uintptr_t)thread_tid(thread), thread->sched_pri, thread->base_pri, 0, 0);
1406
3e170ce0 1407 thread_recompute_sched_pri(thread, FALSE);
39236c6e
A
1408 }
1409 }
1410
1411 thread_unlock(thread);
1412 splx(s);
1413}
1414
39037602
A
1415/*
1416 * Callout from context switch if the thread goes
1417 * off core with a positive rwlock_count
1418 *
1419 * Called at splsched with the thread locked
1420 */
1421void
1422lck_rw_set_promotion_locked(thread_t thread)
1423{
1424 if (LcksOpts & disLkRWPrio)
1425 return;
1426
1427 integer_t priority;
1428
1429 priority = thread->sched_pri;
1430
1431 if (priority < thread->base_pri)
1432 priority = thread->base_pri;
1433 if (priority < BASEPRI_BACKGROUND)
1434 priority = BASEPRI_BACKGROUND;
1435
1436 if ((thread->sched_pri < priority) ||
1437 !(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1438 KERNEL_DEBUG_CONSTANT(
1439 MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_PROMOTE) | DBG_FUNC_NONE,
1440 (uintptr_t)thread_tid(thread), thread->sched_pri,
1441 thread->base_pri, priority, 0);
1442
1443 thread->sched_flags |= TH_SFLAG_RW_PROMOTED;
1444
1445 if (thread->sched_pri < priority)
1446 set_sched_pri(thread, priority);
1447 }
1448}
1449
91447636
A
1450kern_return_t
1451host_lockgroup_info(
1452 host_t host,
1453 lockgroup_info_array_t *lockgroup_infop,
1454 mach_msg_type_number_t *lockgroup_infoCntp)
1455{
1456 lockgroup_info_t *lockgroup_info_base;
1457 lockgroup_info_t *lockgroup_info;
1458 vm_offset_t lockgroup_info_addr;
1459 vm_size_t lockgroup_info_size;
2dced7af 1460 vm_size_t lockgroup_info_vmsize;
91447636
A
1461 lck_grp_t *lck_grp;
1462 unsigned int i;
91447636
A
1463 vm_map_copy_t copy;
1464 kern_return_t kr;
1465
1466 if (host == HOST_NULL)
1467 return KERN_INVALID_HOST;
1468
b0d623f7 1469 lck_mtx_lock(&lck_grp_lock);
91447636 1470
2dced7af
A
1471 lockgroup_info_size = lck_grp_cnt * sizeof(*lockgroup_info);
1472 lockgroup_info_vmsize = round_page(lockgroup_info_size);
91447636 1473 kr = kmem_alloc_pageable(ipc_kernel_map,
2dced7af 1474 &lockgroup_info_addr, lockgroup_info_vmsize, VM_KERN_MEMORY_IPC);
91447636 1475 if (kr != KERN_SUCCESS) {
b0d623f7 1476 lck_mtx_unlock(&lck_grp_lock);
91447636
A
1477 return(kr);
1478 }
1479
1480 lockgroup_info_base = (lockgroup_info_t *) lockgroup_info_addr;
1481 lck_grp = (lck_grp_t *)queue_first(&lck_grp_queue);
1482 lockgroup_info = lockgroup_info_base;
1483
1484 for (i = 0; i < lck_grp_cnt; i++) {
1485
1486 lockgroup_info->lock_spin_cnt = lck_grp->lck_grp_spincnt;
1487 lockgroup_info->lock_spin_util_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_util_cnt;
1488 lockgroup_info->lock_spin_held_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cnt;
1489 lockgroup_info->lock_spin_miss_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_miss_cnt;
1490 lockgroup_info->lock_spin_held_max = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_max;
1491 lockgroup_info->lock_spin_held_cum = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cum;
1492
1493 lockgroup_info->lock_mtx_cnt = lck_grp->lck_grp_mtxcnt;
1494 lockgroup_info->lock_mtx_util_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt;
1495 lockgroup_info->lock_mtx_held_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cnt;
1496 lockgroup_info->lock_mtx_miss_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt;
1497 lockgroup_info->lock_mtx_wait_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt;
1498 lockgroup_info->lock_mtx_held_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_max;
1499 lockgroup_info->lock_mtx_held_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cum;
1500 lockgroup_info->lock_mtx_wait_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_max;
1501 lockgroup_info->lock_mtx_wait_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cum;
1502
1503 lockgroup_info->lock_rw_cnt = lck_grp->lck_grp_rwcnt;
1504 lockgroup_info->lock_rw_util_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt;
1505 lockgroup_info->lock_rw_held_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cnt;
1506 lockgroup_info->lock_rw_miss_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt;
1507 lockgroup_info->lock_rw_wait_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt;
1508 lockgroup_info->lock_rw_held_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_max;
1509 lockgroup_info->lock_rw_held_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cum;
1510 lockgroup_info->lock_rw_wait_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_max;
1511 lockgroup_info->lock_rw_wait_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cum;
1512
1513 (void) strncpy(lockgroup_info->lockgroup_name,lck_grp->lck_grp_name, LOCKGROUP_MAX_NAME);
1514
1515 lck_grp = (lck_grp_t *)(queue_next((queue_entry_t)(lck_grp)));
1516 lockgroup_info++;
1517 }
1518
1519 *lockgroup_infoCntp = lck_grp_cnt;
b0d623f7 1520 lck_mtx_unlock(&lck_grp_lock);
91447636 1521
2dced7af
A
1522 if (lockgroup_info_size != lockgroup_info_vmsize)
1523 bzero((char *)lockgroup_info, lockgroup_info_vmsize - lockgroup_info_size);
91447636
A
1524
1525 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)lockgroup_info_addr,
1526 (vm_map_size_t)lockgroup_info_size, TRUE, &copy);
1527 assert(kr == KERN_SUCCESS);
1528
1529 *lockgroup_infop = (lockgroup_info_t *) copy;
1530
1531 return(KERN_SUCCESS);
1532}
1533
39037602
A
1534/*
1535 * Atomic primitives, prototyped in kern/simple_lock.h
1536 * Noret versions are more efficient on some architectures
1537 */
1538
1539uint32_t
1540hw_atomic_add(volatile uint32_t *dest, uint32_t delt)
1541{
1542 ALIGN_TEST(dest,uint32_t);
1543 return __c11_atomic_fetch_add(ATOMIC_CAST(uint32_t,dest), delt, memory_order_relaxed) + delt;
1544}
1545
1546uint32_t
1547hw_atomic_sub(volatile uint32_t *dest, uint32_t delt)
1548{
1549 ALIGN_TEST(dest,uint32_t);
1550 return __c11_atomic_fetch_sub(ATOMIC_CAST(uint32_t,dest), delt, memory_order_relaxed) - delt;
1551}
1552
1553uint32_t
1554hw_atomic_or(volatile uint32_t *dest, uint32_t mask)
1555{
1556 ALIGN_TEST(dest,uint32_t);
1557 return __c11_atomic_fetch_or(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed) | mask;
1558}
1559
1560void
1561hw_atomic_or_noret(volatile uint32_t *dest, uint32_t mask)
1562{
1563 ALIGN_TEST(dest,uint32_t);
1564 __c11_atomic_fetch_or(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed);
1565}
1566
1567uint32_t
1568hw_atomic_and(volatile uint32_t *dest, uint32_t mask)
1569{
1570 ALIGN_TEST(dest,uint32_t);
1571 return __c11_atomic_fetch_and(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed) & mask;
1572}
1573
1574void
1575hw_atomic_and_noret(volatile uint32_t *dest, uint32_t mask)
1576{
1577 ALIGN_TEST(dest,uint32_t);
1578 __c11_atomic_fetch_and(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed);
1579}
1580
1581uint32_t
1582hw_compare_and_store(uint32_t oldval, uint32_t newval, volatile uint32_t *dest)
1583{
1584 ALIGN_TEST(dest,uint32_t);
1585 return __c11_atomic_compare_exchange_strong(ATOMIC_CAST(uint32_t,dest), &oldval, newval,
1586 memory_order_acq_rel_smp, memory_order_relaxed);
1587}
1588