]> git.saurik.com Git - apple/xnu.git/blame - osfmk/kern/locks.c
xnu-3789.60.24.tar.gz
[apple/xnu.git] / osfmk / kern / locks.c
CommitLineData
91447636 1/*
39037602 2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
91447636 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
91447636 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
91447636
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
39037602
A
56
57#define ATOMIC_PRIVATE 1
58#define LOCK_PRIVATE 1
59
91447636
A
60#include <mach_ldebug.h>
61#include <debug.h>
62
63#include <mach/kern_return.h>
64#include <mach/mach_host_server.h>
65#include <mach_debug/lockgroup_info.h>
66
67#include <kern/locks.h>
68#include <kern/misc_protos.h>
69#include <kern/kalloc.h>
70#include <kern/thread.h>
71#include <kern/processor.h>
72#include <kern/sched_prim.h>
73#include <kern/debug.h>
39037602
A
74#include <machine/atomic.h>
75#include <machine/machine_cpu.h>
91447636
A
76#include <string.h>
77
78
79#include <sys/kdebug.h>
80
2d21ac55
A
81#if CONFIG_DTRACE
82/*
83 * We need only enough declarations from the BSD-side to be able to
84 * test if our probe is active, and to call __dtrace_probe(). Setting
85 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
86 */
87#define NEED_DTRACE_DEFS
88#include <../bsd/sys/lockstat.h>
89#endif
90
91447636
A
91#define LCK_MTX_SLEEP_CODE 0
92#define LCK_MTX_SLEEP_DEADLINE_CODE 1
93#define LCK_MTX_LCK_WAIT_CODE 2
94#define LCK_MTX_UNLCK_WAKEUP_CODE 3
95
39037602
A
96#if MACH_LDEBUG
97#define ALIGN_TEST(p,t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
98#else
99#define ALIGN_TEST(p,t) do{}while(0)
100#endif
101
102/* Silence the volatile to _Atomic cast warning */
103#define ATOMIC_CAST(t,p) ((_Atomic t*)(uintptr_t)(p))
104
105/* Enforce program order of loads and stores. */
106#define ordered_load(target, type) \
107 __c11_atomic_load((_Atomic type *)(target), memory_order_relaxed)
108#define ordered_store(target, type, value) \
109 __c11_atomic_store((_Atomic type *)(target), value, memory_order_relaxed)
110
111#define ordered_load_hw(lock) ordered_load(&(lock)->lock_data, uintptr_t)
112#define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, uintptr_t, (value))
113
114#define NOINLINE __attribute__((noinline))
115
116
91447636
A
117static queue_head_t lck_grp_queue;
118static unsigned int lck_grp_cnt;
119
b0d623f7
A
120decl_lck_mtx_data(static,lck_grp_lock)
121static lck_mtx_ext_t lck_grp_lock_ext;
91447636
A
122
123lck_grp_attr_t LockDefaultGroupAttr;
b0d623f7
A
124lck_grp_t LockCompatGroup;
125lck_attr_t LockDefaultLckAttr;
91447636
A
126
127/*
128 * Routine: lck_mod_init
129 */
130
131void
132lck_mod_init(
133 void)
134{
6d2010ae
A
135 /*
136 * Obtain "lcks" options:this currently controls lock statistics
137 */
138 if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts)))
139 LcksOpts = 0;
140
91447636 141 queue_init(&lck_grp_queue);
b0d623f7
A
142
143 /*
144 * Need to bootstrap the LockCompatGroup instead of calling lck_grp_init() here. This avoids
145 * grabbing the lck_grp_lock before it is initialized.
146 */
147
148 bzero(&LockCompatGroup, sizeof(lck_grp_t));
149 (void) strncpy(LockCompatGroup.lck_grp_name, "Compatibility APIs", LCK_GRP_MAX_NAME);
150
151 if (LcksOpts & enaLkStat)
152 LockCompatGroup.lck_grp_attr = LCK_GRP_ATTR_STAT;
153 else
154 LockCompatGroup.lck_grp_attr = LCK_ATTR_NONE;
155
156 LockCompatGroup.lck_grp_refcnt = 1;
157
158 enqueue_tail(&lck_grp_queue, (queue_entry_t)&LockCompatGroup);
159 lck_grp_cnt = 1;
160
161 lck_grp_attr_setdefault(&LockDefaultGroupAttr);
91447636 162 lck_attr_setdefault(&LockDefaultLckAttr);
b0d623f7
A
163
164 lck_mtx_init_ext(&lck_grp_lock, &lck_grp_lock_ext, &LockCompatGroup, &LockDefaultLckAttr);
165
91447636
A
166}
167
168/*
169 * Routine: lck_grp_attr_alloc_init
170 */
171
172lck_grp_attr_t *
173lck_grp_attr_alloc_init(
174 void)
175{
176 lck_grp_attr_t *attr;
177
178 if ((attr = (lck_grp_attr_t *)kalloc(sizeof(lck_grp_attr_t))) != 0)
179 lck_grp_attr_setdefault(attr);
180
181 return(attr);
182}
183
184
185/*
186 * Routine: lck_grp_attr_setdefault
187 */
188
189void
190lck_grp_attr_setdefault(
191 lck_grp_attr_t *attr)
192{
193 if (LcksOpts & enaLkStat)
194 attr->grp_attr_val = LCK_GRP_ATTR_STAT;
195 else
196 attr->grp_attr_val = 0;
197}
198
199
200/*
201 * Routine: lck_grp_attr_setstat
202 */
203
204void
205lck_grp_attr_setstat(
206 lck_grp_attr_t *attr)
207{
2d21ac55 208 (void)hw_atomic_or(&attr->grp_attr_val, LCK_GRP_ATTR_STAT);
91447636
A
209}
210
211
212/*
213 * Routine: lck_grp_attr_free
214 */
215
216void
217lck_grp_attr_free(
218 lck_grp_attr_t *attr)
219{
220 kfree(attr, sizeof(lck_grp_attr_t));
221}
222
223
224/*
3e170ce0 225 * Routine: lck_grp_alloc_init
91447636
A
226 */
227
228lck_grp_t *
229lck_grp_alloc_init(
230 const char* grp_name,
231 lck_grp_attr_t *attr)
232{
233 lck_grp_t *grp;
234
235 if ((grp = (lck_grp_t *)kalloc(sizeof(lck_grp_t))) != 0)
236 lck_grp_init(grp, grp_name, attr);
237
238 return(grp);
239}
240
91447636 241/*
3e170ce0 242 * Routine: lck_grp_init
91447636
A
243 */
244
245void
3e170ce0 246lck_grp_init(lck_grp_t * grp, const char * grp_name, lck_grp_attr_t * attr)
91447636 247{
39037602
A
248 /* make sure locking infrastructure has been initialized */
249 assert(lck_grp_cnt > 0);
250
91447636
A
251 bzero((void *)grp, sizeof(lck_grp_t));
252
3e170ce0 253 (void)strlcpy(grp->lck_grp_name, grp_name, LCK_GRP_MAX_NAME);
91447636
A
254
255 if (attr != LCK_GRP_ATTR_NULL)
256 grp->lck_grp_attr = attr->grp_attr_val;
257 else if (LcksOpts & enaLkStat)
3e170ce0
A
258 grp->lck_grp_attr = LCK_GRP_ATTR_STAT;
259 else
260 grp->lck_grp_attr = LCK_ATTR_NONE;
91447636
A
261
262 grp->lck_grp_refcnt = 1;
263
b0d623f7 264 lck_mtx_lock(&lck_grp_lock);
91447636
A
265 enqueue_tail(&lck_grp_queue, (queue_entry_t)grp);
266 lck_grp_cnt++;
b0d623f7 267 lck_mtx_unlock(&lck_grp_lock);
91447636
A
268}
269
91447636
A
270/*
271 * Routine: lck_grp_free
272 */
273
274void
275lck_grp_free(
276 lck_grp_t *grp)
277{
b0d623f7 278 lck_mtx_lock(&lck_grp_lock);
91447636
A
279 lck_grp_cnt--;
280 (void)remque((queue_entry_t)grp);
b0d623f7 281 lck_mtx_unlock(&lck_grp_lock);
91447636
A
282 lck_grp_deallocate(grp);
283}
284
285
286/*
287 * Routine: lck_grp_reference
288 */
289
290void
291lck_grp_reference(
292 lck_grp_t *grp)
293{
2d21ac55 294 (void)hw_atomic_add(&grp->lck_grp_refcnt, 1);
91447636
A
295}
296
297
298/*
299 * Routine: lck_grp_deallocate
300 */
301
302void
303lck_grp_deallocate(
304 lck_grp_t *grp)
305{
2d21ac55 306 if (hw_atomic_sub(&grp->lck_grp_refcnt, 1) == 0)
91447636
A
307 kfree(grp, sizeof(lck_grp_t));
308}
309
310/*
311 * Routine: lck_grp_lckcnt_incr
312 */
313
314void
315lck_grp_lckcnt_incr(
316 lck_grp_t *grp,
317 lck_type_t lck_type)
318{
319 unsigned int *lckcnt;
320
321 switch (lck_type) {
322 case LCK_TYPE_SPIN:
323 lckcnt = &grp->lck_grp_spincnt;
324 break;
325 case LCK_TYPE_MTX:
326 lckcnt = &grp->lck_grp_mtxcnt;
327 break;
328 case LCK_TYPE_RW:
329 lckcnt = &grp->lck_grp_rwcnt;
330 break;
331 default:
332 return panic("lck_grp_lckcnt_incr(): invalid lock type: %d\n", lck_type);
333 }
334
2d21ac55 335 (void)hw_atomic_add(lckcnt, 1);
91447636
A
336}
337
338/*
339 * Routine: lck_grp_lckcnt_decr
340 */
341
342void
343lck_grp_lckcnt_decr(
344 lck_grp_t *grp,
345 lck_type_t lck_type)
346{
347 unsigned int *lckcnt;
39037602 348 int updated;
91447636
A
349
350 switch (lck_type) {
351 case LCK_TYPE_SPIN:
352 lckcnt = &grp->lck_grp_spincnt;
353 break;
354 case LCK_TYPE_MTX:
355 lckcnt = &grp->lck_grp_mtxcnt;
356 break;
357 case LCK_TYPE_RW:
358 lckcnt = &grp->lck_grp_rwcnt;
359 break;
360 default:
39037602
A
361 panic("lck_grp_lckcnt_decr(): invalid lock type: %d\n", lck_type);
362 return;
91447636
A
363 }
364
39037602
A
365 updated = (int)hw_atomic_sub(lckcnt, 1);
366 assert(updated >= 0);
91447636
A
367}
368
369/*
370 * Routine: lck_attr_alloc_init
371 */
372
373lck_attr_t *
374lck_attr_alloc_init(
375 void)
376{
377 lck_attr_t *attr;
378
379 if ((attr = (lck_attr_t *)kalloc(sizeof(lck_attr_t))) != 0)
380 lck_attr_setdefault(attr);
381
382 return(attr);
383}
384
385
386/*
387 * Routine: lck_attr_setdefault
388 */
389
390void
391lck_attr_setdefault(
392 lck_attr_t *attr)
393{
316670eb 394#if __i386__ || __x86_64__
91447636 395#if !DEBUG
593a1d5f
A
396 if (LcksOpts & enaLkDeb)
397 attr->lck_attr_val = LCK_ATTR_DEBUG;
398 else
399 attr->lck_attr_val = LCK_ATTR_NONE;
91447636 400#else
593a1d5f
A
401 attr->lck_attr_val = LCK_ATTR_DEBUG;
402#endif /* !DEBUG */
316670eb
A
403#else
404#error Unknown architecture.
405#endif /* __arm__ */
91447636
A
406}
407
408
409/*
410 * Routine: lck_attr_setdebug
411 */
412void
413lck_attr_setdebug(
414 lck_attr_t *attr)
415{
2d21ac55
A
416 (void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_DEBUG);
417}
418
419/*
420 * Routine: lck_attr_setdebug
421 */
422void
423lck_attr_cleardebug(
424 lck_attr_t *attr)
425{
426 (void)hw_atomic_and(&attr->lck_attr_val, ~LCK_ATTR_DEBUG);
91447636
A
427}
428
429
0c530ab8
A
430/*
431 * Routine: lck_attr_rw_shared_priority
432 */
433void
434lck_attr_rw_shared_priority(
435 lck_attr_t *attr)
436{
2d21ac55 437 (void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_RW_SHARED_PRIORITY);
0c530ab8
A
438}
439
440
91447636
A
441/*
442 * Routine: lck_attr_free
443 */
444void
445lck_attr_free(
446 lck_attr_t *attr)
447{
448 kfree(attr, sizeof(lck_attr_t));
449}
450
39037602
A
451/*
452 * Routine: hw_lock_init
453 *
454 * Initialize a hardware lock.
455 */
456void
457hw_lock_init(hw_lock_t lock)
458{
459 ordered_store_hw(lock, 0);
460}
461
462/*
463 * Routine: hw_lock_lock_contended
464 *
465 * Spin until lock is acquired or timeout expires.
466 * timeout is in mach_absolute_time ticks.
467 * MACH_RT: called with preemption disabled.
468 */
469
470#if __SMP__
471static unsigned int NOINLINE
472hw_lock_lock_contended(hw_lock_t lock, uintptr_t data, uint64_t timeout, boolean_t do_panic)
473{
474 uint64_t end = 0;
475 uintptr_t holder = lock->lock_data;
476 int i;
477
478 if (timeout == 0)
479 timeout = LOCK_PANIC_TIMEOUT;
480
481 for ( ; ; ) {
482 for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
483 boolean_t wait = FALSE;
484
485 cpu_pause();
486#if (!__ARM_ENABLE_WFE_) || (LOCK_PRETEST)
487 holder = ordered_load_hw(lock);
488 if (holder != 0)
489 continue;
490#endif
491#if __ARM_ENABLE_WFE_
492 wait = TRUE; // Wait for event
493#endif
494 if (atomic_compare_exchange(&lock->lock_data, 0, data,
495 memory_order_acquire_smp, wait))
496 return 1;
497 }
498 if (end == 0)
499 end = ml_get_timebase() + timeout;
500 else if (ml_get_timebase() >= end)
501 break;
502 }
503 if (do_panic) {
504 // Capture the actual time spent blocked, which may be higher than the timeout
505 // if a misbehaving interrupt stole this thread's CPU time.
506 panic("Spinlock timeout after %llu ticks, %p = %lx",
507 (ml_get_timebase() - end + timeout), lock, holder);
508 }
509 return 0;
510}
511#endif // __SMP__
512
513/*
514 * Routine: hw_lock_lock
515 *
516 * Acquire lock, spinning until it becomes available.
517 * MACH_RT: also return with preemption disabled.
518 */
519void
520hw_lock_lock(hw_lock_t lock)
521{
522 thread_t thread;
523 uintptr_t state;
524
525 thread = current_thread();
526 disable_preemption_for_thread(thread);
527 state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
528#if __SMP__
529#if LOCK_PRETEST
530 if (ordered_load_hw(lock))
531 goto contended;
532#endif // LOCK_PRETEST
533 if (atomic_compare_exchange(&lock->lock_data, 0, state,
534 memory_order_acquire_smp, TRUE))
535 return;
536#if LOCK_PRETEST
537contended:
538#endif // LOCK_PRETEST
539 hw_lock_lock_contended(lock, state, 0, TRUE);
540#else // __SMP__
541 if (lock->lock_data)
542 panic("Spinlock held %p", lock);
543 lock->lock_data = state;
544#endif // __SMP__
545 return;
546}
547
548/*
549 * Routine: hw_lock_to
550 *
551 * Acquire lock, spinning until it becomes available or timeout.
552 * timeout is in mach_absolute_time ticks.
553 * MACH_RT: also return with preemption disabled.
554 */
555unsigned int
556hw_lock_to(hw_lock_t lock, uint64_t timeout)
557{
558 thread_t thread;
559 uintptr_t state;
560
561 thread = current_thread();
562 disable_preemption_for_thread(thread);
563 state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
564#if __SMP__
565#if LOCK_PRETEST
566 if (ordered_load_hw(lock))
567 goto contended;
568#endif // LOCK_PRETEST
569 if (atomic_compare_exchange(&lock->lock_data, 0, state,
570 memory_order_acquire_smp, TRUE))
571 return 1;
572#if LOCK_PRETEST
573contended:
574#endif // LOCK_PRETEST
575 return hw_lock_lock_contended(lock, state, timeout, FALSE);
576#else // __SMP__
577 (void)timeout;
578 if (ordered_load_hw(lock) == 0) {
579 ordered_store_hw(lock, state);
580 return 1;
581 }
582 return 0;
583#endif // __SMP__
584}
585
586/*
587 * Routine: hw_lock_try
588 * MACH_RT: returns with preemption disabled on success.
589 */
590unsigned int
591hw_lock_try(hw_lock_t lock)
592{
593 thread_t thread = current_thread();
594 int success = 0;
595#if LOCK_TRY_DISABLE_INT
596 long intmask;
597
598 intmask = disable_interrupts();
599#else
600 disable_preemption_for_thread(thread);
601#endif // LOCK_TRY_DISABLE_INT
602
603#if __SMP__
604#if LOCK_PRETEST
605 if (ordered_load_hw(lock))
606 goto failed;
607#endif // LOCK_PRETEST
608 success = atomic_compare_exchange(&lock->lock_data, 0, LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK,
609 memory_order_acquire_smp, FALSE);
610#else
611 if (lock->lock_data == 0) {
612 lock->lock_data = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
613 success = 1;
614 }
615#endif // __SMP__
616
617#if LOCK_TRY_DISABLE_INT
618 if (success)
619 disable_preemption_for_thread(thread);
620#if LOCK_PRETEST
621failed:
622#endif // LOCK_PRETEST
623 restore_interrupts(intmask);
624#else
625#if LOCK_PRETEST
626failed:
627#endif // LOCK_PRETEST
628 if (!success)
629 enable_preemption();
630#endif // LOCK_TRY_DISABLE_INT
631 return success;
632}
633
634/*
635 * Routine: hw_lock_unlock
636 *
637 * Unconditionally release lock.
638 * MACH_RT: release preemption level.
639 */
640void
641hw_lock_unlock(hw_lock_t lock)
642{
643 __c11_atomic_store((_Atomic uintptr_t *)&lock->lock_data, 0, memory_order_release_smp);
644 enable_preemption();
645}
646
647/*
648 * RoutineL hw_lock_held
649 * MACH_RT: doesn't change preemption state.
650 * N.B. Racy, of course.
651 */
652unsigned int
653hw_lock_held(hw_lock_t lock)
654{
655 return (ordered_load_hw(lock) != 0);
656}
91447636
A
657
658/*
659 * Routine: lck_spin_sleep
660 */
661wait_result_t
662lck_spin_sleep(
663 lck_spin_t *lck,
664 lck_sleep_action_t lck_sleep_action,
665 event_t event,
666 wait_interrupt_t interruptible)
667{
668 wait_result_t res;
669
670 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
671 panic("Invalid lock sleep action %x\n", lck_sleep_action);
672
673 res = assert_wait(event, interruptible);
674 if (res == THREAD_WAITING) {
675 lck_spin_unlock(lck);
676 res = thread_block(THREAD_CONTINUE_NULL);
677 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
678 lck_spin_lock(lck);
679 }
680 else
681 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
682 lck_spin_unlock(lck);
683
684 return res;
685}
686
687
688/*
689 * Routine: lck_spin_sleep_deadline
690 */
691wait_result_t
692lck_spin_sleep_deadline(
693 lck_spin_t *lck,
694 lck_sleep_action_t lck_sleep_action,
695 event_t event,
696 wait_interrupt_t interruptible,
697 uint64_t deadline)
698{
699 wait_result_t res;
700
701 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
702 panic("Invalid lock sleep action %x\n", lck_sleep_action);
703
704 res = assert_wait_deadline(event, interruptible, deadline);
705 if (res == THREAD_WAITING) {
706 lck_spin_unlock(lck);
707 res = thread_block(THREAD_CONTINUE_NULL);
708 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
709 lck_spin_lock(lck);
710 }
711 else
712 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
713 lck_spin_unlock(lck);
714
715 return res;
716}
717
718
3e170ce0
A
719/*
720 * Routine: lck_mtx_clear_promoted
721 *
722 * Handle clearing of TH_SFLAG_PROMOTED,
723 * adjusting thread priority as needed.
724 *
725 * Called with thread lock held
726 */
727static void
728lck_mtx_clear_promoted (
729 thread_t thread,
730 __kdebug_only uintptr_t trace_lck)
731{
732 thread->sched_flags &= ~TH_SFLAG_PROMOTED;
733
734 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
735 /* Thread still has a RW lock promotion */
736 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
737 KERNEL_DEBUG_CONSTANT(
738 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
739 thread->sched_pri, DEPRESSPRI, 0, trace_lck, 0);
740 set_sched_pri(thread, DEPRESSPRI);
741 } else {
742 if (thread->base_pri < thread->sched_pri) {
743 KERNEL_DEBUG_CONSTANT(
744 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
745 thread->sched_pri, thread->base_pri, 0, trace_lck, 0);
746 }
747 thread_recompute_sched_pri(thread, FALSE);
748 }
749}
750
751
91447636
A
752/*
753 * Routine: lck_mtx_sleep
754 */
755wait_result_t
756lck_mtx_sleep(
757 lck_mtx_t *lck,
758 lck_sleep_action_t lck_sleep_action,
759 event_t event,
760 wait_interrupt_t interruptible)
761{
762 wait_result_t res;
fe8ab488 763 thread_t thread = current_thread();
91447636
A
764
765 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
3e170ce0 766 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
91447636
A
767
768 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
769 panic("Invalid lock sleep action %x\n", lck_sleep_action);
770
fe8ab488
A
771 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
772 /*
773 * We overload the RW lock promotion to give us a priority ceiling
774 * during the time that this thread is asleep, so that when it
775 * is re-awakened (and not yet contending on the mutex), it is
776 * runnable at a reasonably high priority.
777 */
778 thread->rwlock_count++;
779 }
780
91447636
A
781 res = assert_wait(event, interruptible);
782 if (res == THREAD_WAITING) {
783 lck_mtx_unlock(lck);
784 res = thread_block(THREAD_CONTINUE_NULL);
b0d623f7
A
785 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
786 if ((lck_sleep_action & LCK_SLEEP_SPIN))
787 lck_mtx_lock_spin(lck);
788 else
789 lck_mtx_lock(lck);
790 }
91447636
A
791 }
792 else
793 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
794 lck_mtx_unlock(lck);
795
fe8ab488
A
796 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
797 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
798 /* sched_flags checked without lock, but will be rechecked while clearing */
799 lck_rw_clear_promotion(thread);
800 }
801 }
802
91447636
A
803 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
804
805 return res;
806}
807
808
809/*
810 * Routine: lck_mtx_sleep_deadline
811 */
812wait_result_t
813lck_mtx_sleep_deadline(
814 lck_mtx_t *lck,
815 lck_sleep_action_t lck_sleep_action,
816 event_t event,
817 wait_interrupt_t interruptible,
818 uint64_t deadline)
819{
820 wait_result_t res;
fe8ab488 821 thread_t thread = current_thread();
91447636
A
822
823 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
3e170ce0 824 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
91447636
A
825
826 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
827 panic("Invalid lock sleep action %x\n", lck_sleep_action);
828
fe8ab488
A
829 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
830 /*
831 * See lck_mtx_sleep().
832 */
833 thread->rwlock_count++;
834 }
835
91447636
A
836 res = assert_wait_deadline(event, interruptible, deadline);
837 if (res == THREAD_WAITING) {
838 lck_mtx_unlock(lck);
839 res = thread_block(THREAD_CONTINUE_NULL);
6d2010ae
A
840 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
841 if ((lck_sleep_action & LCK_SLEEP_SPIN))
842 lck_mtx_lock_spin(lck);
843 else
844 lck_mtx_lock(lck);
845 }
91447636
A
846 }
847 else
848 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
849 lck_mtx_unlock(lck);
850
fe8ab488
A
851 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
852 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
853 /* sched_flags checked without lock, but will be rechecked while clearing */
854 lck_rw_clear_promotion(thread);
855 }
856 }
857
91447636
A
858 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
859
860 return res;
861}
862
863/*
864 * Routine: lck_mtx_lock_wait
865 *
866 * Invoked in order to wait on contention.
867 *
868 * Called with the interlock locked and
869 * returns it unlocked.
870 */
871void
872lck_mtx_lock_wait (
873 lck_mtx_t *lck,
874 thread_t holder)
875{
876 thread_t self = current_thread();
877 lck_mtx_t *mutex;
3e170ce0
A
878 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
879 __kdebug_only uintptr_t trace_holder = VM_KERNEL_UNSLIDE_OR_PERM(holder);
91447636
A
880 integer_t priority;
881 spl_t s = splsched();
2d21ac55
A
882#if CONFIG_DTRACE
883 uint64_t sleep_start = 0;
884
885 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
886 sleep_start = mach_absolute_time();
887 }
888#endif
91447636
A
889
890 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
891 mutex = lck;
892 else
893 mutex = &lck->lck_mtx_ptr->lck_mtx;
894
3e170ce0 895 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, trace_lck, trace_holder, 0, 0, 0);
91447636
A
896
897 priority = self->sched_pri;
3e170ce0
A
898 if (priority < self->base_pri)
899 priority = self->base_pri;
91447636
A
900 if (priority < BASEPRI_DEFAULT)
901 priority = BASEPRI_DEFAULT;
902
fe8ab488
A
903 /* Do not promote past promotion ceiling */
904 priority = MIN(priority, MAXPRI_PROMOTE);
905
91447636 906 thread_lock(holder);
39037602 907 if (mutex->lck_mtx_pri == 0) {
91447636 908 holder->promotions++;
39037602
A
909 holder->sched_flags |= TH_SFLAG_PROMOTED;
910 }
911
3e170ce0 912 if (mutex->lck_mtx_pri < priority && holder->sched_pri < priority) {
4a3eedf9
A
913 KERNEL_DEBUG_CONSTANT(
914 MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
3e170ce0 915 holder->sched_pri, priority, trace_holder, trace_lck, 0);
4a3eedf9 916 set_sched_pri(holder, priority);
91447636
A
917 }
918 thread_unlock(holder);
919 splx(s);
920
921 if (mutex->lck_mtx_pri < priority)
922 mutex->lck_mtx_pri = priority;
923 if (self->pending_promoter[self->pending_promoter_index] == NULL) {
924 self->pending_promoter[self->pending_promoter_index] = mutex;
925 mutex->lck_mtx_waiters++;
926 }
927 else
928 if (self->pending_promoter[self->pending_promoter_index] != mutex) {
929 self->pending_promoter[++self->pending_promoter_index] = mutex;
930 mutex->lck_mtx_waiters++;
931 }
932
813fb2f6 933 thread_set_pending_block_hint(self, kThreadWaitKernelMutex);
3e170ce0 934 assert_wait(LCK_MTX_EVENT(mutex), THREAD_UNINT);
91447636
A
935 lck_mtx_ilk_unlock(mutex);
936
937 thread_block(THREAD_CONTINUE_NULL);
938
939 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
2d21ac55
A
940#if CONFIG_DTRACE
941 /*
942 * Record the Dtrace lockstat probe for blocking, block time
943 * measured from when we were entered.
944 */
945 if (sleep_start) {
946 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
947 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
948 mach_absolute_time() - sleep_start);
949 } else {
950 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
951 mach_absolute_time() - sleep_start);
952 }
953 }
954#endif
91447636
A
955}
956
957/*
958 * Routine: lck_mtx_lock_acquire
959 *
960 * Invoked on acquiring the mutex when there is
961 * contention.
962 *
963 * Returns the current number of waiters.
964 *
965 * Called with the interlock locked.
966 */
967int
968lck_mtx_lock_acquire(
969 lck_mtx_t *lck)
970{
971 thread_t thread = current_thread();
972 lck_mtx_t *mutex;
3e170ce0
A
973 integer_t priority;
974 spl_t s;
975 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
91447636
A
976
977 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
978 mutex = lck;
979 else
980 mutex = &lck->lck_mtx_ptr->lck_mtx;
981
982 if (thread->pending_promoter[thread->pending_promoter_index] == mutex) {
983 thread->pending_promoter[thread->pending_promoter_index] = NULL;
984 if (thread->pending_promoter_index > 0)
985 thread->pending_promoter_index--;
986 mutex->lck_mtx_waiters--;
987 }
988
3e170ce0
A
989 if (mutex->lck_mtx_waiters)
990 priority = mutex->lck_mtx_pri;
991 else {
992 mutex->lck_mtx_pri = 0;
993 priority = 0;
994 }
91447636 995
3e170ce0
A
996 if (priority || thread->was_promoted_on_wakeup) {
997 s = splsched();
91447636 998 thread_lock(thread);
3e170ce0
A
999
1000 if (priority) {
1001 thread->promotions++;
1002 thread->sched_flags |= TH_SFLAG_PROMOTED;
1003 if (thread->sched_pri < priority) {
1004 KERNEL_DEBUG_CONSTANT(
1005 MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
1006 thread->sched_pri, priority, 0, trace_lck, 0);
1007 /* Do not promote past promotion ceiling */
1008 assert(priority <= MAXPRI_PROMOTE);
1009 set_sched_pri(thread, priority);
1010 }
91447636 1011 }
3e170ce0
A
1012 if (thread->was_promoted_on_wakeup) {
1013 thread->was_promoted_on_wakeup = 0;
1014 if (thread->promotions == 0)
1015 lck_mtx_clear_promoted(thread, trace_lck);
1016 }
1017
91447636
A
1018 thread_unlock(thread);
1019 splx(s);
1020 }
91447636 1021
39236c6e
A
1022#if CONFIG_DTRACE
1023 if (lockstat_probemap[LS_LCK_MTX_LOCK_ACQUIRE] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_ACQUIRE]) {
1024 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1025 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lck, 0);
1026 } else {
1027 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, lck, 0);
1028 }
1029 }
1030#endif
91447636
A
1031 return (mutex->lck_mtx_waiters);
1032}
1033
1034/*
1035 * Routine: lck_mtx_unlock_wakeup
1036 *
1037 * Invoked on unlock when there is contention.
1038 *
1039 * Called with the interlock locked.
1040 */
1041void
1042lck_mtx_unlock_wakeup (
1043 lck_mtx_t *lck,
1044 thread_t holder)
1045{
1046 thread_t thread = current_thread();
1047 lck_mtx_t *mutex;
3e170ce0 1048 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
91447636
A
1049
1050 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
1051 mutex = lck;
1052 else
1053 mutex = &lck->lck_mtx_ptr->lck_mtx;
1054
6d2010ae
A
1055 if (thread != holder)
1056 panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);
91447636 1057
3e170ce0 1058 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START, trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(holder), 0, 0, 0);
91447636 1059
6d2010ae 1060 assert(mutex->lck_mtx_waiters > 0);
3e170ce0
A
1061 if (mutex->lck_mtx_waiters > 1)
1062 thread_wakeup_one_with_pri(LCK_MTX_EVENT(lck), lck->lck_mtx_pri);
1063 else
1064 thread_wakeup_one(LCK_MTX_EVENT(lck));
91447636
A
1065
1066 if (thread->promotions > 0) {
1067 spl_t s = splsched();
1068
1069 thread_lock(thread);
3e170ce0
A
1070 if (--thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED))
1071 lck_mtx_clear_promoted(thread, trace_lck);
91447636
A
1072 thread_unlock(thread);
1073 splx(s);
1074 }
91447636
A
1075
1076 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1077}
1078
2d21ac55
A
1079void
1080lck_mtx_unlockspin_wakeup (
1081 lck_mtx_t *lck)
1082{
1083 assert(lck->lck_mtx_waiters > 0);
3e170ce0 1084 thread_wakeup_one(LCK_MTX_EVENT(lck));
2d21ac55 1085
3e170ce0 1086 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_NONE, VM_KERNEL_UNSLIDE_OR_PERM(lck), 0, 0, 1, 0);
2d21ac55
A
1087#if CONFIG_DTRACE
1088 /*
1089 * When there are waiters, we skip the hot-patch spot in the
1090 * fastpath, so we record it here.
1091 */
1092 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lck, 0);
1093#endif
1094}
1095
1096
91447636
A
1097/*
1098 * Routine: mutex_pause
1099 *
1100 * Called by former callers of simple_lock_pause().
1101 */
2d21ac55
A
1102#define MAX_COLLISION_COUNTS 32
1103#define MAX_COLLISION 8
1104
1105unsigned int max_collision_count[MAX_COLLISION_COUNTS];
1106
1107uint32_t collision_backoffs[MAX_COLLISION] = {
1108 10, 50, 100, 200, 400, 600, 800, 1000
1109};
1110
91447636
A
1111
1112void
2d21ac55 1113mutex_pause(uint32_t collisions)
91447636
A
1114{
1115 wait_result_t wait_result;
2d21ac55 1116 uint32_t back_off;
91447636 1117
2d21ac55
A
1118 if (collisions >= MAX_COLLISION_COUNTS)
1119 collisions = MAX_COLLISION_COUNTS - 1;
1120 max_collision_count[collisions]++;
1121
1122 if (collisions >= MAX_COLLISION)
1123 collisions = MAX_COLLISION - 1;
1124 back_off = collision_backoffs[collisions];
1125
1126 wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC);
91447636
A
1127 assert(wait_result == THREAD_WAITING);
1128
1129 wait_result = thread_block(THREAD_CONTINUE_NULL);
1130 assert(wait_result == THREAD_TIMED_OUT);
1131}
1132
2d21ac55
A
1133
1134unsigned int mutex_yield_wait = 0;
1135unsigned int mutex_yield_no_wait = 0;
1136
1137void
b0d623f7
A
1138lck_mtx_yield(
1139 lck_mtx_t *lck)
2d21ac55 1140{
b0d623f7
A
1141 int waiters;
1142
2d21ac55 1143#if DEBUG
b0d623f7 1144 lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
2d21ac55 1145#endif /* DEBUG */
b0d623f7 1146
2d21ac55 1147 if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)
b0d623f7
A
1148 waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters;
1149 else
1150 waiters = lck->lck_mtx_waiters;
2d21ac55 1151
b0d623f7 1152 if ( !waiters) {
2d21ac55
A
1153 mutex_yield_no_wait++;
1154 } else {
1155 mutex_yield_wait++;
b0d623f7 1156 lck_mtx_unlock(lck);
2d21ac55 1157 mutex_pause(0);
b0d623f7 1158 lck_mtx_lock(lck);
2d21ac55
A
1159 }
1160}
1161
1162
91447636
A
1163/*
1164 * Routine: lck_rw_sleep
1165 */
1166wait_result_t
1167lck_rw_sleep(
1168 lck_rw_t *lck,
1169 lck_sleep_action_t lck_sleep_action,
1170 event_t event,
1171 wait_interrupt_t interruptible)
1172{
1173 wait_result_t res;
1174 lck_rw_type_t lck_rw_type;
fe8ab488
A
1175 thread_t thread = current_thread();
1176
91447636
A
1177 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
1178 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1179
fe8ab488
A
1180 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1181 /*
1182 * Although we are dropping the RW lock, the intent in most cases
1183 * is that this thread remains as an observer, since it may hold
1184 * some secondary resource, but must yield to avoid deadlock. In
1185 * this situation, make sure that the thread is boosted to the
1186 * RW lock ceiling while blocked, so that it can re-acquire the
1187 * RW lock at that priority.
1188 */
1189 thread->rwlock_count++;
1190 }
1191
91447636
A
1192 res = assert_wait(event, interruptible);
1193 if (res == THREAD_WAITING) {
1194 lck_rw_type = lck_rw_done(lck);
1195 res = thread_block(THREAD_CONTINUE_NULL);
1196 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1197 if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE)))
1198 lck_rw_lock(lck, lck_rw_type);
1199 else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE)
1200 lck_rw_lock_exclusive(lck);
1201 else
1202 lck_rw_lock_shared(lck);
1203 }
1204 }
1205 else
1206 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
1207 (void)lck_rw_done(lck);
1208
fe8ab488
A
1209 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1210 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1211 /* sched_flags checked without lock, but will be rechecked while clearing */
1212
1213 /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1214 assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1215
1216 lck_rw_clear_promotion(thread);
1217 }
1218 }
1219
91447636
A
1220 return res;
1221}
1222
1223
1224/*
1225 * Routine: lck_rw_sleep_deadline
1226 */
1227wait_result_t
1228lck_rw_sleep_deadline(
1229 lck_rw_t *lck,
1230 lck_sleep_action_t lck_sleep_action,
1231 event_t event,
1232 wait_interrupt_t interruptible,
1233 uint64_t deadline)
1234{
1235 wait_result_t res;
1236 lck_rw_type_t lck_rw_type;
fe8ab488 1237 thread_t thread = current_thread();
91447636
A
1238
1239 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
1240 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1241
fe8ab488
A
1242 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1243 thread->rwlock_count++;
1244 }
1245
91447636
A
1246 res = assert_wait_deadline(event, interruptible, deadline);
1247 if (res == THREAD_WAITING) {
1248 lck_rw_type = lck_rw_done(lck);
1249 res = thread_block(THREAD_CONTINUE_NULL);
1250 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1251 if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE)))
1252 lck_rw_lock(lck, lck_rw_type);
1253 else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE)
1254 lck_rw_lock_exclusive(lck);
1255 else
1256 lck_rw_lock_shared(lck);
1257 }
1258 }
1259 else
1260 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
1261 (void)lck_rw_done(lck);
1262
fe8ab488
A
1263 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1264 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1265 /* sched_flags checked without lock, but will be rechecked while clearing */
1266
1267 /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1268 assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1269
1270 lck_rw_clear_promotion(thread);
1271 }
1272 }
1273
91447636
A
1274 return res;
1275}
1276
39236c6e
A
1277/*
1278 * Reader-writer lock promotion
1279 *
1280 * We support a limited form of reader-writer
1281 * lock promotion whose effects are:
1282 *
1283 * * Qualifying threads have decay disabled
1284 * * Scheduler priority is reset to a floor of
1285 * of their statically assigned priority
1286 * or BASEPRI_BACKGROUND
1287 *
1288 * The rationale is that lck_rw_ts do not have
1289 * a single owner, so we cannot apply a directed
1290 * priority boost from all waiting threads
1291 * to all holding threads without maintaining
1292 * lists of all shared owners and all waiting
1293 * threads for every lock.
1294 *
1295 * Instead (and to preserve the uncontended fast-
1296 * path), acquiring (or attempting to acquire)
1297 * a RW lock in shared or exclusive lock increments
1298 * a per-thread counter. Only if that thread stops
1299 * making forward progress (for instance blocking
1300 * on a mutex, or being preempted) do we consult
1301 * the counter and apply the priority floor.
1302 * When the thread becomes runnable again (or in
1303 * the case of preemption it never stopped being
1304 * runnable), it has the priority boost and should
1305 * be in a good position to run on the CPU and
1306 * release all RW locks (at which point the priority
1307 * boost is cleared).
1308 *
1309 * Care must be taken to ensure that priority
1310 * boosts are not retained indefinitely, since unlike
1311 * mutex priority boosts (where the boost is tied
1312 * to the mutex lifecycle), the boost is tied
1313 * to the thread and independent of any particular
1314 * lck_rw_t. Assertions are in place on return
1315 * to userspace so that the boost is not held
1316 * indefinitely.
1317 *
1318 * The routines that increment/decrement the
1319 * per-thread counter should err on the side of
1320 * incrementing any time a preemption is possible
1321 * and the lock would be visible to the rest of the
1322 * system as held (so it should be incremented before
1323 * interlocks are dropped/preemption is enabled, or
1324 * before a CAS is executed to acquire the lock).
1325 *
1326 */
1327
1328/*
1329 * lck_rw_clear_promotion: Undo priority promotions when the last RW
1330 * lock is released by a thread (if a promotion was active)
1331 */
1332void lck_rw_clear_promotion(thread_t thread)
1333{
1334 assert(thread->rwlock_count == 0);
1335
1336 /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
1337 spl_t s = splsched();
1338
1339 thread_lock(thread);
1340
1341 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
1342 thread->sched_flags &= ~TH_SFLAG_RW_PROMOTED;
1343
1344 if (thread->sched_flags & TH_SFLAG_PROMOTED) {
1345 /* Thread still has a mutex promotion */
1346 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
1347 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE,
490019cf
A
1348 (uintptr_t)thread_tid(thread), thread->sched_pri, DEPRESSPRI, 0, 0);
1349
39236c6e
A
1350 set_sched_pri(thread, DEPRESSPRI);
1351 } else {
1352 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE,
490019cf
A
1353 (uintptr_t)thread_tid(thread), thread->sched_pri, thread->base_pri, 0, 0);
1354
3e170ce0 1355 thread_recompute_sched_pri(thread, FALSE);
39236c6e
A
1356 }
1357 }
1358
1359 thread_unlock(thread);
1360 splx(s);
1361}
1362
39037602
A
1363/*
1364 * Callout from context switch if the thread goes
1365 * off core with a positive rwlock_count
1366 *
1367 * Called at splsched with the thread locked
1368 */
1369void
1370lck_rw_set_promotion_locked(thread_t thread)
1371{
1372 if (LcksOpts & disLkRWPrio)
1373 return;
1374
1375 integer_t priority;
1376
1377 priority = thread->sched_pri;
1378
1379 if (priority < thread->base_pri)
1380 priority = thread->base_pri;
1381 if (priority < BASEPRI_BACKGROUND)
1382 priority = BASEPRI_BACKGROUND;
1383
1384 if ((thread->sched_pri < priority) ||
1385 !(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1386 KERNEL_DEBUG_CONSTANT(
1387 MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_PROMOTE) | DBG_FUNC_NONE,
1388 (uintptr_t)thread_tid(thread), thread->sched_pri,
1389 thread->base_pri, priority, 0);
1390
1391 thread->sched_flags |= TH_SFLAG_RW_PROMOTED;
1392
1393 if (thread->sched_pri < priority)
1394 set_sched_pri(thread, priority);
1395 }
1396}
1397
91447636
A
1398kern_return_t
1399host_lockgroup_info(
1400 host_t host,
1401 lockgroup_info_array_t *lockgroup_infop,
1402 mach_msg_type_number_t *lockgroup_infoCntp)
1403{
1404 lockgroup_info_t *lockgroup_info_base;
1405 lockgroup_info_t *lockgroup_info;
1406 vm_offset_t lockgroup_info_addr;
1407 vm_size_t lockgroup_info_size;
2dced7af 1408 vm_size_t lockgroup_info_vmsize;
91447636
A
1409 lck_grp_t *lck_grp;
1410 unsigned int i;
91447636
A
1411 vm_map_copy_t copy;
1412 kern_return_t kr;
1413
1414 if (host == HOST_NULL)
1415 return KERN_INVALID_HOST;
1416
b0d623f7 1417 lck_mtx_lock(&lck_grp_lock);
91447636 1418
2dced7af
A
1419 lockgroup_info_size = lck_grp_cnt * sizeof(*lockgroup_info);
1420 lockgroup_info_vmsize = round_page(lockgroup_info_size);
91447636 1421 kr = kmem_alloc_pageable(ipc_kernel_map,
2dced7af 1422 &lockgroup_info_addr, lockgroup_info_vmsize, VM_KERN_MEMORY_IPC);
91447636 1423 if (kr != KERN_SUCCESS) {
b0d623f7 1424 lck_mtx_unlock(&lck_grp_lock);
91447636
A
1425 return(kr);
1426 }
1427
1428 lockgroup_info_base = (lockgroup_info_t *) lockgroup_info_addr;
1429 lck_grp = (lck_grp_t *)queue_first(&lck_grp_queue);
1430 lockgroup_info = lockgroup_info_base;
1431
1432 for (i = 0; i < lck_grp_cnt; i++) {
1433
1434 lockgroup_info->lock_spin_cnt = lck_grp->lck_grp_spincnt;
1435 lockgroup_info->lock_spin_util_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_util_cnt;
1436 lockgroup_info->lock_spin_held_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cnt;
1437 lockgroup_info->lock_spin_miss_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_miss_cnt;
1438 lockgroup_info->lock_spin_held_max = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_max;
1439 lockgroup_info->lock_spin_held_cum = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cum;
1440
1441 lockgroup_info->lock_mtx_cnt = lck_grp->lck_grp_mtxcnt;
1442 lockgroup_info->lock_mtx_util_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt;
1443 lockgroup_info->lock_mtx_held_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cnt;
1444 lockgroup_info->lock_mtx_miss_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt;
1445 lockgroup_info->lock_mtx_wait_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt;
1446 lockgroup_info->lock_mtx_held_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_max;
1447 lockgroup_info->lock_mtx_held_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cum;
1448 lockgroup_info->lock_mtx_wait_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_max;
1449 lockgroup_info->lock_mtx_wait_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cum;
1450
1451 lockgroup_info->lock_rw_cnt = lck_grp->lck_grp_rwcnt;
1452 lockgroup_info->lock_rw_util_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt;
1453 lockgroup_info->lock_rw_held_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cnt;
1454 lockgroup_info->lock_rw_miss_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt;
1455 lockgroup_info->lock_rw_wait_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt;
1456 lockgroup_info->lock_rw_held_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_max;
1457 lockgroup_info->lock_rw_held_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cum;
1458 lockgroup_info->lock_rw_wait_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_max;
1459 lockgroup_info->lock_rw_wait_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cum;
1460
1461 (void) strncpy(lockgroup_info->lockgroup_name,lck_grp->lck_grp_name, LOCKGROUP_MAX_NAME);
1462
1463 lck_grp = (lck_grp_t *)(queue_next((queue_entry_t)(lck_grp)));
1464 lockgroup_info++;
1465 }
1466
1467 *lockgroup_infoCntp = lck_grp_cnt;
b0d623f7 1468 lck_mtx_unlock(&lck_grp_lock);
91447636 1469
2dced7af
A
1470 if (lockgroup_info_size != lockgroup_info_vmsize)
1471 bzero((char *)lockgroup_info, lockgroup_info_vmsize - lockgroup_info_size);
91447636
A
1472
1473 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)lockgroup_info_addr,
1474 (vm_map_size_t)lockgroup_info_size, TRUE, &copy);
1475 assert(kr == KERN_SUCCESS);
1476
1477 *lockgroup_infop = (lockgroup_info_t *) copy;
1478
1479 return(KERN_SUCCESS);
1480}
1481
39037602
A
1482/*
1483 * Atomic primitives, prototyped in kern/simple_lock.h
1484 * Noret versions are more efficient on some architectures
1485 */
1486
1487uint32_t
1488hw_atomic_add(volatile uint32_t *dest, uint32_t delt)
1489{
1490 ALIGN_TEST(dest,uint32_t);
1491 return __c11_atomic_fetch_add(ATOMIC_CAST(uint32_t,dest), delt, memory_order_relaxed) + delt;
1492}
1493
1494uint32_t
1495hw_atomic_sub(volatile uint32_t *dest, uint32_t delt)
1496{
1497 ALIGN_TEST(dest,uint32_t);
1498 return __c11_atomic_fetch_sub(ATOMIC_CAST(uint32_t,dest), delt, memory_order_relaxed) - delt;
1499}
1500
1501uint32_t
1502hw_atomic_or(volatile uint32_t *dest, uint32_t mask)
1503{
1504 ALIGN_TEST(dest,uint32_t);
1505 return __c11_atomic_fetch_or(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed) | mask;
1506}
1507
1508void
1509hw_atomic_or_noret(volatile uint32_t *dest, uint32_t mask)
1510{
1511 ALIGN_TEST(dest,uint32_t);
1512 __c11_atomic_fetch_or(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed);
1513}
1514
1515uint32_t
1516hw_atomic_and(volatile uint32_t *dest, uint32_t mask)
1517{
1518 ALIGN_TEST(dest,uint32_t);
1519 return __c11_atomic_fetch_and(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed) & mask;
1520}
1521
1522void
1523hw_atomic_and_noret(volatile uint32_t *dest, uint32_t mask)
1524{
1525 ALIGN_TEST(dest,uint32_t);
1526 __c11_atomic_fetch_and(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed);
1527}
1528
1529uint32_t
1530hw_compare_and_store(uint32_t oldval, uint32_t newval, volatile uint32_t *dest)
1531{
1532 ALIGN_TEST(dest,uint32_t);
1533 return __c11_atomic_compare_exchange_strong(ATOMIC_CAST(uint32_t,dest), &oldval, newval,
1534 memory_order_acq_rel_smp, memory_order_relaxed);
1535}
1536