]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/locks.c
5141535e3eeb9138e404b396c336ce57faa8c541
[apple/xnu.git] / osfmk / kern / locks.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57 #define ATOMIC_PRIVATE 1
58 #define LOCK_PRIVATE 1
59
60 #include <mach_ldebug.h>
61 #include <debug.h>
62
63 #include <mach/kern_return.h>
64 #include <mach/mach_host_server.h>
65 #include <mach_debug/lockgroup_info.h>
66
67 #include <kern/locks.h>
68 #include <kern/misc_protos.h>
69 #include <kern/kalloc.h>
70 #include <kern/thread.h>
71 #include <kern/processor.h>
72 #include <kern/sched_prim.h>
73 #include <kern/debug.h>
74 #include <machine/atomic.h>
75 #include <machine/machine_cpu.h>
76 #include <string.h>
77
78
79 #include <sys/kdebug.h>
80
81 #if CONFIG_DTRACE
82 /*
83 * We need only enough declarations from the BSD-side to be able to
84 * test if our probe is active, and to call __dtrace_probe(). Setting
85 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
86 */
87 #define NEED_DTRACE_DEFS
88 #include <../bsd/sys/lockstat.h>
89 #endif
90
91 #define LCK_MTX_SLEEP_CODE 0
92 #define LCK_MTX_SLEEP_DEADLINE_CODE 1
93 #define LCK_MTX_LCK_WAIT_CODE 2
94 #define LCK_MTX_UNLCK_WAKEUP_CODE 3
95
96 #if MACH_LDEBUG
97 #define ALIGN_TEST(p,t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
98 #else
99 #define ALIGN_TEST(p,t) do{}while(0)
100 #endif
101
102 /* Silence the volatile to _Atomic cast warning */
103 #define ATOMIC_CAST(t,p) ((_Atomic t*)(uintptr_t)(p))
104
105 /* Enforce program order of loads and stores. */
106 #define ordered_load(target, type) \
107 __c11_atomic_load((_Atomic type *)(target), memory_order_relaxed)
108 #define ordered_store(target, type, value) \
109 __c11_atomic_store((_Atomic type *)(target), value, memory_order_relaxed)
110
111 #define ordered_load_hw(lock) ordered_load(&(lock)->lock_data, uintptr_t)
112 #define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, uintptr_t, (value))
113
114 #define NOINLINE __attribute__((noinline))
115
116
117 static queue_head_t lck_grp_queue;
118 static unsigned int lck_grp_cnt;
119
120 decl_lck_mtx_data(static,lck_grp_lock)
121 static lck_mtx_ext_t lck_grp_lock_ext;
122
123 lck_grp_attr_t LockDefaultGroupAttr;
124 lck_grp_t LockCompatGroup;
125 lck_attr_t LockDefaultLckAttr;
126
127 /*
128 * Routine: lck_mod_init
129 */
130
131 void
132 lck_mod_init(
133 void)
134 {
135 /*
136 * Obtain "lcks" options:this currently controls lock statistics
137 */
138 if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts)))
139 LcksOpts = 0;
140
141 queue_init(&lck_grp_queue);
142
143 /*
144 * Need to bootstrap the LockCompatGroup instead of calling lck_grp_init() here. This avoids
145 * grabbing the lck_grp_lock before it is initialized.
146 */
147
148 bzero(&LockCompatGroup, sizeof(lck_grp_t));
149 (void) strncpy(LockCompatGroup.lck_grp_name, "Compatibility APIs", LCK_GRP_MAX_NAME);
150
151 if (LcksOpts & enaLkStat)
152 LockCompatGroup.lck_grp_attr = LCK_GRP_ATTR_STAT;
153 else
154 LockCompatGroup.lck_grp_attr = LCK_ATTR_NONE;
155
156 LockCompatGroup.lck_grp_refcnt = 1;
157
158 enqueue_tail(&lck_grp_queue, (queue_entry_t)&LockCompatGroup);
159 lck_grp_cnt = 1;
160
161 lck_grp_attr_setdefault(&LockDefaultGroupAttr);
162 lck_attr_setdefault(&LockDefaultLckAttr);
163
164 lck_mtx_init_ext(&lck_grp_lock, &lck_grp_lock_ext, &LockCompatGroup, &LockDefaultLckAttr);
165
166 }
167
168 /*
169 * Routine: lck_grp_attr_alloc_init
170 */
171
172 lck_grp_attr_t *
173 lck_grp_attr_alloc_init(
174 void)
175 {
176 lck_grp_attr_t *attr;
177
178 if ((attr = (lck_grp_attr_t *)kalloc(sizeof(lck_grp_attr_t))) != 0)
179 lck_grp_attr_setdefault(attr);
180
181 return(attr);
182 }
183
184
185 /*
186 * Routine: lck_grp_attr_setdefault
187 */
188
189 void
190 lck_grp_attr_setdefault(
191 lck_grp_attr_t *attr)
192 {
193 if (LcksOpts & enaLkStat)
194 attr->grp_attr_val = LCK_GRP_ATTR_STAT;
195 else
196 attr->grp_attr_val = 0;
197 }
198
199
200 /*
201 * Routine: lck_grp_attr_setstat
202 */
203
204 void
205 lck_grp_attr_setstat(
206 lck_grp_attr_t *attr)
207 {
208 (void)hw_atomic_or(&attr->grp_attr_val, LCK_GRP_ATTR_STAT);
209 }
210
211
212 /*
213 * Routine: lck_grp_attr_free
214 */
215
216 void
217 lck_grp_attr_free(
218 lck_grp_attr_t *attr)
219 {
220 kfree(attr, sizeof(lck_grp_attr_t));
221 }
222
223
224 /*
225 * Routine: lck_grp_alloc_init
226 */
227
228 lck_grp_t *
229 lck_grp_alloc_init(
230 const char* grp_name,
231 lck_grp_attr_t *attr)
232 {
233 lck_grp_t *grp;
234
235 if ((grp = (lck_grp_t *)kalloc(sizeof(lck_grp_t))) != 0)
236 lck_grp_init(grp, grp_name, attr);
237
238 return(grp);
239 }
240
241 /*
242 * Routine: lck_grp_init
243 */
244
245 void
246 lck_grp_init(lck_grp_t * grp, const char * grp_name, lck_grp_attr_t * attr)
247 {
248 /* make sure locking infrastructure has been initialized */
249 assert(lck_grp_cnt > 0);
250
251 bzero((void *)grp, sizeof(lck_grp_t));
252
253 (void)strlcpy(grp->lck_grp_name, grp_name, LCK_GRP_MAX_NAME);
254
255 if (attr != LCK_GRP_ATTR_NULL)
256 grp->lck_grp_attr = attr->grp_attr_val;
257 else if (LcksOpts & enaLkStat)
258 grp->lck_grp_attr = LCK_GRP_ATTR_STAT;
259 else
260 grp->lck_grp_attr = LCK_ATTR_NONE;
261
262 grp->lck_grp_refcnt = 1;
263
264 lck_mtx_lock(&lck_grp_lock);
265 enqueue_tail(&lck_grp_queue, (queue_entry_t)grp);
266 lck_grp_cnt++;
267 lck_mtx_unlock(&lck_grp_lock);
268 }
269
270 /*
271 * Routine: lck_grp_free
272 */
273
274 void
275 lck_grp_free(
276 lck_grp_t *grp)
277 {
278 lck_mtx_lock(&lck_grp_lock);
279 lck_grp_cnt--;
280 (void)remque((queue_entry_t)grp);
281 lck_mtx_unlock(&lck_grp_lock);
282 lck_grp_deallocate(grp);
283 }
284
285
286 /*
287 * Routine: lck_grp_reference
288 */
289
290 void
291 lck_grp_reference(
292 lck_grp_t *grp)
293 {
294 (void)hw_atomic_add(&grp->lck_grp_refcnt, 1);
295 }
296
297
298 /*
299 * Routine: lck_grp_deallocate
300 */
301
302 void
303 lck_grp_deallocate(
304 lck_grp_t *grp)
305 {
306 if (hw_atomic_sub(&grp->lck_grp_refcnt, 1) == 0)
307 kfree(grp, sizeof(lck_grp_t));
308 }
309
310 /*
311 * Routine: lck_grp_lckcnt_incr
312 */
313
314 void
315 lck_grp_lckcnt_incr(
316 lck_grp_t *grp,
317 lck_type_t lck_type)
318 {
319 unsigned int *lckcnt;
320
321 switch (lck_type) {
322 case LCK_TYPE_SPIN:
323 lckcnt = &grp->lck_grp_spincnt;
324 break;
325 case LCK_TYPE_MTX:
326 lckcnt = &grp->lck_grp_mtxcnt;
327 break;
328 case LCK_TYPE_RW:
329 lckcnt = &grp->lck_grp_rwcnt;
330 break;
331 default:
332 return panic("lck_grp_lckcnt_incr(): invalid lock type: %d\n", lck_type);
333 }
334
335 (void)hw_atomic_add(lckcnt, 1);
336 }
337
338 /*
339 * Routine: lck_grp_lckcnt_decr
340 */
341
342 void
343 lck_grp_lckcnt_decr(
344 lck_grp_t *grp,
345 lck_type_t lck_type)
346 {
347 unsigned int *lckcnt;
348 int updated;
349
350 switch (lck_type) {
351 case LCK_TYPE_SPIN:
352 lckcnt = &grp->lck_grp_spincnt;
353 break;
354 case LCK_TYPE_MTX:
355 lckcnt = &grp->lck_grp_mtxcnt;
356 break;
357 case LCK_TYPE_RW:
358 lckcnt = &grp->lck_grp_rwcnt;
359 break;
360 default:
361 panic("lck_grp_lckcnt_decr(): invalid lock type: %d\n", lck_type);
362 return;
363 }
364
365 updated = (int)hw_atomic_sub(lckcnt, 1);
366 assert(updated >= 0);
367 }
368
369 /*
370 * Routine: lck_attr_alloc_init
371 */
372
373 lck_attr_t *
374 lck_attr_alloc_init(
375 void)
376 {
377 lck_attr_t *attr;
378
379 if ((attr = (lck_attr_t *)kalloc(sizeof(lck_attr_t))) != 0)
380 lck_attr_setdefault(attr);
381
382 return(attr);
383 }
384
385
386 /*
387 * Routine: lck_attr_setdefault
388 */
389
390 void
391 lck_attr_setdefault(
392 lck_attr_t *attr)
393 {
394 #if __i386__ || __x86_64__
395 #if !DEBUG
396 if (LcksOpts & enaLkDeb)
397 attr->lck_attr_val = LCK_ATTR_DEBUG;
398 else
399 attr->lck_attr_val = LCK_ATTR_NONE;
400 #else
401 attr->lck_attr_val = LCK_ATTR_DEBUG;
402 #endif /* !DEBUG */
403 #else
404 #error Unknown architecture.
405 #endif /* __arm__ */
406 }
407
408
409 /*
410 * Routine: lck_attr_setdebug
411 */
412 void
413 lck_attr_setdebug(
414 lck_attr_t *attr)
415 {
416 (void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_DEBUG);
417 }
418
419 /*
420 * Routine: lck_attr_setdebug
421 */
422 void
423 lck_attr_cleardebug(
424 lck_attr_t *attr)
425 {
426 (void)hw_atomic_and(&attr->lck_attr_val, ~LCK_ATTR_DEBUG);
427 }
428
429
430 /*
431 * Routine: lck_attr_rw_shared_priority
432 */
433 void
434 lck_attr_rw_shared_priority(
435 lck_attr_t *attr)
436 {
437 (void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_RW_SHARED_PRIORITY);
438 }
439
440
441 /*
442 * Routine: lck_attr_free
443 */
444 void
445 lck_attr_free(
446 lck_attr_t *attr)
447 {
448 kfree(attr, sizeof(lck_attr_t));
449 }
450
451 /*
452 * Routine: hw_lock_init
453 *
454 * Initialize a hardware lock.
455 */
456 void
457 hw_lock_init(hw_lock_t lock)
458 {
459 ordered_store_hw(lock, 0);
460 }
461
462 /*
463 * Routine: hw_lock_lock_contended
464 *
465 * Spin until lock is acquired or timeout expires.
466 * timeout is in mach_absolute_time ticks.
467 * MACH_RT: called with preemption disabled.
468 */
469
470 #if __SMP__
471 static unsigned int NOINLINE
472 hw_lock_lock_contended(hw_lock_t lock, uintptr_t data, uint64_t timeout, boolean_t do_panic)
473 {
474 uint64_t end = 0;
475 uintptr_t holder = lock->lock_data;
476 int i;
477
478 if (timeout == 0)
479 timeout = LOCK_PANIC_TIMEOUT;
480
481 for ( ; ; ) {
482 for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
483 boolean_t wait = FALSE;
484
485 cpu_pause();
486 #if (!__ARM_ENABLE_WFE_) || (LOCK_PRETEST)
487 holder = ordered_load_hw(lock);
488 if (holder != 0)
489 continue;
490 #endif
491 #if __ARM_ENABLE_WFE_
492 wait = TRUE; // Wait for event
493 #endif
494 if (atomic_compare_exchange(&lock->lock_data, 0, data,
495 memory_order_acquire_smp, wait))
496 return 1;
497 }
498 if (end == 0)
499 end = ml_get_timebase() + timeout;
500 else if (ml_get_timebase() >= end)
501 break;
502 }
503 if (do_panic) {
504 // Capture the actual time spent blocked, which may be higher than the timeout
505 // if a misbehaving interrupt stole this thread's CPU time.
506 panic("Spinlock timeout after %llu ticks, %p = %lx",
507 (ml_get_timebase() - end + timeout), lock, holder);
508 }
509 return 0;
510 }
511 #endif // __SMP__
512
513 /*
514 * Routine: hw_lock_lock
515 *
516 * Acquire lock, spinning until it becomes available.
517 * MACH_RT: also return with preemption disabled.
518 */
519 void
520 hw_lock_lock(hw_lock_t lock)
521 {
522 thread_t thread;
523 uintptr_t state;
524
525 thread = current_thread();
526 disable_preemption_for_thread(thread);
527 state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
528 #if __SMP__
529 #if LOCK_PRETEST
530 if (ordered_load_hw(lock))
531 goto contended;
532 #endif // LOCK_PRETEST
533 if (atomic_compare_exchange(&lock->lock_data, 0, state,
534 memory_order_acquire_smp, TRUE))
535 return;
536 #if LOCK_PRETEST
537 contended:
538 #endif // LOCK_PRETEST
539 hw_lock_lock_contended(lock, state, 0, TRUE);
540 #else // __SMP__
541 if (lock->lock_data)
542 panic("Spinlock held %p", lock);
543 lock->lock_data = state;
544 #endif // __SMP__
545 return;
546 }
547
548 /*
549 * Routine: hw_lock_to
550 *
551 * Acquire lock, spinning until it becomes available or timeout.
552 * timeout is in mach_absolute_time ticks.
553 * MACH_RT: also return with preemption disabled.
554 */
555 unsigned int
556 hw_lock_to(hw_lock_t lock, uint64_t timeout)
557 {
558 thread_t thread;
559 uintptr_t state;
560
561 thread = current_thread();
562 disable_preemption_for_thread(thread);
563 state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
564 #if __SMP__
565 #if LOCK_PRETEST
566 if (ordered_load_hw(lock))
567 goto contended;
568 #endif // LOCK_PRETEST
569 if (atomic_compare_exchange(&lock->lock_data, 0, state,
570 memory_order_acquire_smp, TRUE))
571 return 1;
572 #if LOCK_PRETEST
573 contended:
574 #endif // LOCK_PRETEST
575 return hw_lock_lock_contended(lock, state, timeout, FALSE);
576 #else // __SMP__
577 (void)timeout;
578 if (ordered_load_hw(lock) == 0) {
579 ordered_store_hw(lock, state);
580 return 1;
581 }
582 return 0;
583 #endif // __SMP__
584 }
585
586 /*
587 * Routine: hw_lock_try
588 * MACH_RT: returns with preemption disabled on success.
589 */
590 unsigned int
591 hw_lock_try(hw_lock_t lock)
592 {
593 thread_t thread = current_thread();
594 int success = 0;
595 #if LOCK_TRY_DISABLE_INT
596 long intmask;
597
598 intmask = disable_interrupts();
599 #else
600 disable_preemption_for_thread(thread);
601 #endif // LOCK_TRY_DISABLE_INT
602
603 #if __SMP__
604 #if LOCK_PRETEST
605 if (ordered_load_hw(lock))
606 goto failed;
607 #endif // LOCK_PRETEST
608 success = atomic_compare_exchange(&lock->lock_data, 0, LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK,
609 memory_order_acquire_smp, FALSE);
610 #else
611 if (lock->lock_data == 0) {
612 lock->lock_data = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
613 success = 1;
614 }
615 #endif // __SMP__
616
617 #if LOCK_TRY_DISABLE_INT
618 if (success)
619 disable_preemption_for_thread(thread);
620 #if LOCK_PRETEST
621 failed:
622 #endif // LOCK_PRETEST
623 restore_interrupts(intmask);
624 #else
625 #if LOCK_PRETEST
626 failed:
627 #endif // LOCK_PRETEST
628 if (!success)
629 enable_preemption();
630 #endif // LOCK_TRY_DISABLE_INT
631 return success;
632 }
633
634 /*
635 * Routine: hw_lock_unlock
636 *
637 * Unconditionally release lock.
638 * MACH_RT: release preemption level.
639 */
640 void
641 hw_lock_unlock(hw_lock_t lock)
642 {
643 __c11_atomic_store((_Atomic uintptr_t *)&lock->lock_data, 0, memory_order_release_smp);
644 enable_preemption();
645 }
646
647 /*
648 * RoutineL hw_lock_held
649 * MACH_RT: doesn't change preemption state.
650 * N.B. Racy, of course.
651 */
652 unsigned int
653 hw_lock_held(hw_lock_t lock)
654 {
655 return (ordered_load_hw(lock) != 0);
656 }
657
658 /*
659 * Routine: lck_spin_sleep
660 */
661 wait_result_t
662 lck_spin_sleep(
663 lck_spin_t *lck,
664 lck_sleep_action_t lck_sleep_action,
665 event_t event,
666 wait_interrupt_t interruptible)
667 {
668 wait_result_t res;
669
670 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
671 panic("Invalid lock sleep action %x\n", lck_sleep_action);
672
673 res = assert_wait(event, interruptible);
674 if (res == THREAD_WAITING) {
675 lck_spin_unlock(lck);
676 res = thread_block(THREAD_CONTINUE_NULL);
677 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
678 lck_spin_lock(lck);
679 }
680 else
681 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
682 lck_spin_unlock(lck);
683
684 return res;
685 }
686
687
688 /*
689 * Routine: lck_spin_sleep_deadline
690 */
691 wait_result_t
692 lck_spin_sleep_deadline(
693 lck_spin_t *lck,
694 lck_sleep_action_t lck_sleep_action,
695 event_t event,
696 wait_interrupt_t interruptible,
697 uint64_t deadline)
698 {
699 wait_result_t res;
700
701 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
702 panic("Invalid lock sleep action %x\n", lck_sleep_action);
703
704 res = assert_wait_deadline(event, interruptible, deadline);
705 if (res == THREAD_WAITING) {
706 lck_spin_unlock(lck);
707 res = thread_block(THREAD_CONTINUE_NULL);
708 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
709 lck_spin_lock(lck);
710 }
711 else
712 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
713 lck_spin_unlock(lck);
714
715 return res;
716 }
717
718
719 /*
720 * Routine: lck_mtx_clear_promoted
721 *
722 * Handle clearing of TH_SFLAG_PROMOTED,
723 * adjusting thread priority as needed.
724 *
725 * Called with thread lock held
726 */
727 static void
728 lck_mtx_clear_promoted (
729 thread_t thread,
730 __kdebug_only uintptr_t trace_lck)
731 {
732 thread->sched_flags &= ~TH_SFLAG_PROMOTED;
733
734 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
735 /* Thread still has a RW lock promotion */
736 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
737 KERNEL_DEBUG_CONSTANT(
738 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
739 thread->sched_pri, DEPRESSPRI, 0, trace_lck, 0);
740 set_sched_pri(thread, DEPRESSPRI);
741 } else {
742 if (thread->base_pri < thread->sched_pri) {
743 KERNEL_DEBUG_CONSTANT(
744 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
745 thread->sched_pri, thread->base_pri, 0, trace_lck, 0);
746 }
747 thread_recompute_sched_pri(thread, FALSE);
748 }
749 }
750
751
752 /*
753 * Routine: lck_mtx_sleep
754 */
755 wait_result_t
756 lck_mtx_sleep(
757 lck_mtx_t *lck,
758 lck_sleep_action_t lck_sleep_action,
759 event_t event,
760 wait_interrupt_t interruptible)
761 {
762 wait_result_t res;
763 thread_t thread = current_thread();
764
765 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
766 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
767
768 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
769 panic("Invalid lock sleep action %x\n", lck_sleep_action);
770
771 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
772 /*
773 * We overload the RW lock promotion to give us a priority ceiling
774 * during the time that this thread is asleep, so that when it
775 * is re-awakened (and not yet contending on the mutex), it is
776 * runnable at a reasonably high priority.
777 */
778 thread->rwlock_count++;
779 }
780
781 res = assert_wait(event, interruptible);
782 if (res == THREAD_WAITING) {
783 lck_mtx_unlock(lck);
784 res = thread_block(THREAD_CONTINUE_NULL);
785 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
786 if ((lck_sleep_action & LCK_SLEEP_SPIN))
787 lck_mtx_lock_spin(lck);
788 else
789 lck_mtx_lock(lck);
790 }
791 }
792 else
793 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
794 lck_mtx_unlock(lck);
795
796 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
797 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
798 /* sched_flags checked without lock, but will be rechecked while clearing */
799 lck_rw_clear_promotion(thread);
800 }
801 }
802
803 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
804
805 return res;
806 }
807
808
809 /*
810 * Routine: lck_mtx_sleep_deadline
811 */
812 wait_result_t
813 lck_mtx_sleep_deadline(
814 lck_mtx_t *lck,
815 lck_sleep_action_t lck_sleep_action,
816 event_t event,
817 wait_interrupt_t interruptible,
818 uint64_t deadline)
819 {
820 wait_result_t res;
821 thread_t thread = current_thread();
822
823 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
824 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
825
826 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
827 panic("Invalid lock sleep action %x\n", lck_sleep_action);
828
829 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
830 /*
831 * See lck_mtx_sleep().
832 */
833 thread->rwlock_count++;
834 }
835
836 res = assert_wait_deadline(event, interruptible, deadline);
837 if (res == THREAD_WAITING) {
838 lck_mtx_unlock(lck);
839 res = thread_block(THREAD_CONTINUE_NULL);
840 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
841 if ((lck_sleep_action & LCK_SLEEP_SPIN))
842 lck_mtx_lock_spin(lck);
843 else
844 lck_mtx_lock(lck);
845 }
846 }
847 else
848 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
849 lck_mtx_unlock(lck);
850
851 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
852 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
853 /* sched_flags checked without lock, but will be rechecked while clearing */
854 lck_rw_clear_promotion(thread);
855 }
856 }
857
858 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
859
860 return res;
861 }
862
863 /*
864 * Routine: lck_mtx_lock_wait
865 *
866 * Invoked in order to wait on contention.
867 *
868 * Called with the interlock locked and
869 * returns it unlocked.
870 */
871 void
872 lck_mtx_lock_wait (
873 lck_mtx_t *lck,
874 thread_t holder)
875 {
876 thread_t self = current_thread();
877 lck_mtx_t *mutex;
878 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
879 __kdebug_only uintptr_t trace_holder = VM_KERNEL_UNSLIDE_OR_PERM(holder);
880 integer_t priority;
881 spl_t s = splsched();
882 #if CONFIG_DTRACE
883 uint64_t sleep_start = 0;
884
885 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
886 sleep_start = mach_absolute_time();
887 }
888 #endif
889
890 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
891 mutex = lck;
892 else
893 mutex = &lck->lck_mtx_ptr->lck_mtx;
894
895 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, trace_lck, trace_holder, 0, 0, 0);
896
897 priority = self->sched_pri;
898 if (priority < self->base_pri)
899 priority = self->base_pri;
900 if (priority < BASEPRI_DEFAULT)
901 priority = BASEPRI_DEFAULT;
902
903 /* Do not promote past promotion ceiling */
904 priority = MIN(priority, MAXPRI_PROMOTE);
905
906 thread_lock(holder);
907 if (mutex->lck_mtx_pri == 0) {
908 holder->promotions++;
909 holder->sched_flags |= TH_SFLAG_PROMOTED;
910 }
911
912 if (mutex->lck_mtx_pri < priority && holder->sched_pri < priority) {
913 KERNEL_DEBUG_CONSTANT(
914 MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
915 holder->sched_pri, priority, trace_holder, trace_lck, 0);
916 set_sched_pri(holder, priority);
917 }
918 thread_unlock(holder);
919 splx(s);
920
921 if (mutex->lck_mtx_pri < priority)
922 mutex->lck_mtx_pri = priority;
923 if (self->pending_promoter[self->pending_promoter_index] == NULL) {
924 self->pending_promoter[self->pending_promoter_index] = mutex;
925 mutex->lck_mtx_waiters++;
926 }
927 else
928 if (self->pending_promoter[self->pending_promoter_index] != mutex) {
929 self->pending_promoter[++self->pending_promoter_index] = mutex;
930 mutex->lck_mtx_waiters++;
931 }
932
933 assert_wait(LCK_MTX_EVENT(mutex), THREAD_UNINT);
934 lck_mtx_ilk_unlock(mutex);
935
936 thread_block(THREAD_CONTINUE_NULL);
937
938 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
939 #if CONFIG_DTRACE
940 /*
941 * Record the Dtrace lockstat probe for blocking, block time
942 * measured from when we were entered.
943 */
944 if (sleep_start) {
945 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
946 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
947 mach_absolute_time() - sleep_start);
948 } else {
949 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
950 mach_absolute_time() - sleep_start);
951 }
952 }
953 #endif
954 }
955
956 /*
957 * Routine: lck_mtx_lock_acquire
958 *
959 * Invoked on acquiring the mutex when there is
960 * contention.
961 *
962 * Returns the current number of waiters.
963 *
964 * Called with the interlock locked.
965 */
966 int
967 lck_mtx_lock_acquire(
968 lck_mtx_t *lck)
969 {
970 thread_t thread = current_thread();
971 lck_mtx_t *mutex;
972 integer_t priority;
973 spl_t s;
974 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
975
976 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
977 mutex = lck;
978 else
979 mutex = &lck->lck_mtx_ptr->lck_mtx;
980
981 if (thread->pending_promoter[thread->pending_promoter_index] == mutex) {
982 thread->pending_promoter[thread->pending_promoter_index] = NULL;
983 if (thread->pending_promoter_index > 0)
984 thread->pending_promoter_index--;
985 mutex->lck_mtx_waiters--;
986 }
987
988 if (mutex->lck_mtx_waiters)
989 priority = mutex->lck_mtx_pri;
990 else {
991 mutex->lck_mtx_pri = 0;
992 priority = 0;
993 }
994
995 if (priority || thread->was_promoted_on_wakeup) {
996 s = splsched();
997 thread_lock(thread);
998
999 if (priority) {
1000 thread->promotions++;
1001 thread->sched_flags |= TH_SFLAG_PROMOTED;
1002 if (thread->sched_pri < priority) {
1003 KERNEL_DEBUG_CONSTANT(
1004 MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
1005 thread->sched_pri, priority, 0, trace_lck, 0);
1006 /* Do not promote past promotion ceiling */
1007 assert(priority <= MAXPRI_PROMOTE);
1008 set_sched_pri(thread, priority);
1009 }
1010 }
1011 if (thread->was_promoted_on_wakeup) {
1012 thread->was_promoted_on_wakeup = 0;
1013 if (thread->promotions == 0)
1014 lck_mtx_clear_promoted(thread, trace_lck);
1015 }
1016
1017 thread_unlock(thread);
1018 splx(s);
1019 }
1020
1021 #if CONFIG_DTRACE
1022 if (lockstat_probemap[LS_LCK_MTX_LOCK_ACQUIRE] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_ACQUIRE]) {
1023 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1024 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lck, 0);
1025 } else {
1026 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, lck, 0);
1027 }
1028 }
1029 #endif
1030 return (mutex->lck_mtx_waiters);
1031 }
1032
1033 /*
1034 * Routine: lck_mtx_unlock_wakeup
1035 *
1036 * Invoked on unlock when there is contention.
1037 *
1038 * Called with the interlock locked.
1039 */
1040 void
1041 lck_mtx_unlock_wakeup (
1042 lck_mtx_t *lck,
1043 thread_t holder)
1044 {
1045 thread_t thread = current_thread();
1046 lck_mtx_t *mutex;
1047 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1048
1049 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
1050 mutex = lck;
1051 else
1052 mutex = &lck->lck_mtx_ptr->lck_mtx;
1053
1054 if (thread != holder)
1055 panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);
1056
1057 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START, trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(holder), 0, 0, 0);
1058
1059 assert(mutex->lck_mtx_waiters > 0);
1060 if (mutex->lck_mtx_waiters > 1)
1061 thread_wakeup_one_with_pri(LCK_MTX_EVENT(lck), lck->lck_mtx_pri);
1062 else
1063 thread_wakeup_one(LCK_MTX_EVENT(lck));
1064
1065 if (thread->promotions > 0) {
1066 spl_t s = splsched();
1067
1068 thread_lock(thread);
1069 if (--thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED))
1070 lck_mtx_clear_promoted(thread, trace_lck);
1071 thread_unlock(thread);
1072 splx(s);
1073 }
1074
1075 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1076 }
1077
1078 void
1079 lck_mtx_unlockspin_wakeup (
1080 lck_mtx_t *lck)
1081 {
1082 assert(lck->lck_mtx_waiters > 0);
1083 thread_wakeup_one(LCK_MTX_EVENT(lck));
1084
1085 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_NONE, VM_KERNEL_UNSLIDE_OR_PERM(lck), 0, 0, 1, 0);
1086 #if CONFIG_DTRACE
1087 /*
1088 * When there are waiters, we skip the hot-patch spot in the
1089 * fastpath, so we record it here.
1090 */
1091 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lck, 0);
1092 #endif
1093 }
1094
1095
1096 /*
1097 * Routine: mutex_pause
1098 *
1099 * Called by former callers of simple_lock_pause().
1100 */
1101 #define MAX_COLLISION_COUNTS 32
1102 #define MAX_COLLISION 8
1103
1104 unsigned int max_collision_count[MAX_COLLISION_COUNTS];
1105
1106 uint32_t collision_backoffs[MAX_COLLISION] = {
1107 10, 50, 100, 200, 400, 600, 800, 1000
1108 };
1109
1110
1111 void
1112 mutex_pause(uint32_t collisions)
1113 {
1114 wait_result_t wait_result;
1115 uint32_t back_off;
1116
1117 if (collisions >= MAX_COLLISION_COUNTS)
1118 collisions = MAX_COLLISION_COUNTS - 1;
1119 max_collision_count[collisions]++;
1120
1121 if (collisions >= MAX_COLLISION)
1122 collisions = MAX_COLLISION - 1;
1123 back_off = collision_backoffs[collisions];
1124
1125 wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC);
1126 assert(wait_result == THREAD_WAITING);
1127
1128 wait_result = thread_block(THREAD_CONTINUE_NULL);
1129 assert(wait_result == THREAD_TIMED_OUT);
1130 }
1131
1132
1133 unsigned int mutex_yield_wait = 0;
1134 unsigned int mutex_yield_no_wait = 0;
1135
1136 void
1137 lck_mtx_yield(
1138 lck_mtx_t *lck)
1139 {
1140 int waiters;
1141
1142 #if DEBUG
1143 lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
1144 #endif /* DEBUG */
1145
1146 if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)
1147 waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters;
1148 else
1149 waiters = lck->lck_mtx_waiters;
1150
1151 if ( !waiters) {
1152 mutex_yield_no_wait++;
1153 } else {
1154 mutex_yield_wait++;
1155 lck_mtx_unlock(lck);
1156 mutex_pause(0);
1157 lck_mtx_lock(lck);
1158 }
1159 }
1160
1161
1162 /*
1163 * Routine: lck_rw_sleep
1164 */
1165 wait_result_t
1166 lck_rw_sleep(
1167 lck_rw_t *lck,
1168 lck_sleep_action_t lck_sleep_action,
1169 event_t event,
1170 wait_interrupt_t interruptible)
1171 {
1172 wait_result_t res;
1173 lck_rw_type_t lck_rw_type;
1174 thread_t thread = current_thread();
1175
1176 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
1177 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1178
1179 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1180 /*
1181 * Although we are dropping the RW lock, the intent in most cases
1182 * is that this thread remains as an observer, since it may hold
1183 * some secondary resource, but must yield to avoid deadlock. In
1184 * this situation, make sure that the thread is boosted to the
1185 * RW lock ceiling while blocked, so that it can re-acquire the
1186 * RW lock at that priority.
1187 */
1188 thread->rwlock_count++;
1189 }
1190
1191 res = assert_wait(event, interruptible);
1192 if (res == THREAD_WAITING) {
1193 lck_rw_type = lck_rw_done(lck);
1194 res = thread_block(THREAD_CONTINUE_NULL);
1195 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1196 if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE)))
1197 lck_rw_lock(lck, lck_rw_type);
1198 else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE)
1199 lck_rw_lock_exclusive(lck);
1200 else
1201 lck_rw_lock_shared(lck);
1202 }
1203 }
1204 else
1205 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
1206 (void)lck_rw_done(lck);
1207
1208 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1209 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1210 /* sched_flags checked without lock, but will be rechecked while clearing */
1211
1212 /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1213 assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1214
1215 lck_rw_clear_promotion(thread);
1216 }
1217 }
1218
1219 return res;
1220 }
1221
1222
1223 /*
1224 * Routine: lck_rw_sleep_deadline
1225 */
1226 wait_result_t
1227 lck_rw_sleep_deadline(
1228 lck_rw_t *lck,
1229 lck_sleep_action_t lck_sleep_action,
1230 event_t event,
1231 wait_interrupt_t interruptible,
1232 uint64_t deadline)
1233 {
1234 wait_result_t res;
1235 lck_rw_type_t lck_rw_type;
1236 thread_t thread = current_thread();
1237
1238 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
1239 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1240
1241 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1242 thread->rwlock_count++;
1243 }
1244
1245 res = assert_wait_deadline(event, interruptible, deadline);
1246 if (res == THREAD_WAITING) {
1247 lck_rw_type = lck_rw_done(lck);
1248 res = thread_block(THREAD_CONTINUE_NULL);
1249 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1250 if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE)))
1251 lck_rw_lock(lck, lck_rw_type);
1252 else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE)
1253 lck_rw_lock_exclusive(lck);
1254 else
1255 lck_rw_lock_shared(lck);
1256 }
1257 }
1258 else
1259 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
1260 (void)lck_rw_done(lck);
1261
1262 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1263 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1264 /* sched_flags checked without lock, but will be rechecked while clearing */
1265
1266 /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1267 assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1268
1269 lck_rw_clear_promotion(thread);
1270 }
1271 }
1272
1273 return res;
1274 }
1275
1276 /*
1277 * Reader-writer lock promotion
1278 *
1279 * We support a limited form of reader-writer
1280 * lock promotion whose effects are:
1281 *
1282 * * Qualifying threads have decay disabled
1283 * * Scheduler priority is reset to a floor of
1284 * of their statically assigned priority
1285 * or BASEPRI_BACKGROUND
1286 *
1287 * The rationale is that lck_rw_ts do not have
1288 * a single owner, so we cannot apply a directed
1289 * priority boost from all waiting threads
1290 * to all holding threads without maintaining
1291 * lists of all shared owners and all waiting
1292 * threads for every lock.
1293 *
1294 * Instead (and to preserve the uncontended fast-
1295 * path), acquiring (or attempting to acquire)
1296 * a RW lock in shared or exclusive lock increments
1297 * a per-thread counter. Only if that thread stops
1298 * making forward progress (for instance blocking
1299 * on a mutex, or being preempted) do we consult
1300 * the counter and apply the priority floor.
1301 * When the thread becomes runnable again (or in
1302 * the case of preemption it never stopped being
1303 * runnable), it has the priority boost and should
1304 * be in a good position to run on the CPU and
1305 * release all RW locks (at which point the priority
1306 * boost is cleared).
1307 *
1308 * Care must be taken to ensure that priority
1309 * boosts are not retained indefinitely, since unlike
1310 * mutex priority boosts (where the boost is tied
1311 * to the mutex lifecycle), the boost is tied
1312 * to the thread and independent of any particular
1313 * lck_rw_t. Assertions are in place on return
1314 * to userspace so that the boost is not held
1315 * indefinitely.
1316 *
1317 * The routines that increment/decrement the
1318 * per-thread counter should err on the side of
1319 * incrementing any time a preemption is possible
1320 * and the lock would be visible to the rest of the
1321 * system as held (so it should be incremented before
1322 * interlocks are dropped/preemption is enabled, or
1323 * before a CAS is executed to acquire the lock).
1324 *
1325 */
1326
1327 /*
1328 * lck_rw_clear_promotion: Undo priority promotions when the last RW
1329 * lock is released by a thread (if a promotion was active)
1330 */
1331 void lck_rw_clear_promotion(thread_t thread)
1332 {
1333 assert(thread->rwlock_count == 0);
1334
1335 /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
1336 spl_t s = splsched();
1337
1338 thread_lock(thread);
1339
1340 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
1341 thread->sched_flags &= ~TH_SFLAG_RW_PROMOTED;
1342
1343 if (thread->sched_flags & TH_SFLAG_PROMOTED) {
1344 /* Thread still has a mutex promotion */
1345 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
1346 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE,
1347 (uintptr_t)thread_tid(thread), thread->sched_pri, DEPRESSPRI, 0, 0);
1348
1349 set_sched_pri(thread, DEPRESSPRI);
1350 } else {
1351 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE,
1352 (uintptr_t)thread_tid(thread), thread->sched_pri, thread->base_pri, 0, 0);
1353
1354 thread_recompute_sched_pri(thread, FALSE);
1355 }
1356 }
1357
1358 thread_unlock(thread);
1359 splx(s);
1360 }
1361
1362 /*
1363 * Callout from context switch if the thread goes
1364 * off core with a positive rwlock_count
1365 *
1366 * Called at splsched with the thread locked
1367 */
1368 void
1369 lck_rw_set_promotion_locked(thread_t thread)
1370 {
1371 if (LcksOpts & disLkRWPrio)
1372 return;
1373
1374 integer_t priority;
1375
1376 priority = thread->sched_pri;
1377
1378 if (priority < thread->base_pri)
1379 priority = thread->base_pri;
1380 if (priority < BASEPRI_BACKGROUND)
1381 priority = BASEPRI_BACKGROUND;
1382
1383 if ((thread->sched_pri < priority) ||
1384 !(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1385 KERNEL_DEBUG_CONSTANT(
1386 MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_PROMOTE) | DBG_FUNC_NONE,
1387 (uintptr_t)thread_tid(thread), thread->sched_pri,
1388 thread->base_pri, priority, 0);
1389
1390 thread->sched_flags |= TH_SFLAG_RW_PROMOTED;
1391
1392 if (thread->sched_pri < priority)
1393 set_sched_pri(thread, priority);
1394 }
1395 }
1396
1397 kern_return_t
1398 host_lockgroup_info(
1399 host_t host,
1400 lockgroup_info_array_t *lockgroup_infop,
1401 mach_msg_type_number_t *lockgroup_infoCntp)
1402 {
1403 lockgroup_info_t *lockgroup_info_base;
1404 lockgroup_info_t *lockgroup_info;
1405 vm_offset_t lockgroup_info_addr;
1406 vm_size_t lockgroup_info_size;
1407 vm_size_t lockgroup_info_vmsize;
1408 lck_grp_t *lck_grp;
1409 unsigned int i;
1410 vm_map_copy_t copy;
1411 kern_return_t kr;
1412
1413 if (host == HOST_NULL)
1414 return KERN_INVALID_HOST;
1415
1416 lck_mtx_lock(&lck_grp_lock);
1417
1418 lockgroup_info_size = lck_grp_cnt * sizeof(*lockgroup_info);
1419 lockgroup_info_vmsize = round_page(lockgroup_info_size);
1420 kr = kmem_alloc_pageable(ipc_kernel_map,
1421 &lockgroup_info_addr, lockgroup_info_vmsize, VM_KERN_MEMORY_IPC);
1422 if (kr != KERN_SUCCESS) {
1423 lck_mtx_unlock(&lck_grp_lock);
1424 return(kr);
1425 }
1426
1427 lockgroup_info_base = (lockgroup_info_t *) lockgroup_info_addr;
1428 lck_grp = (lck_grp_t *)queue_first(&lck_grp_queue);
1429 lockgroup_info = lockgroup_info_base;
1430
1431 for (i = 0; i < lck_grp_cnt; i++) {
1432
1433 lockgroup_info->lock_spin_cnt = lck_grp->lck_grp_spincnt;
1434 lockgroup_info->lock_spin_util_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_util_cnt;
1435 lockgroup_info->lock_spin_held_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cnt;
1436 lockgroup_info->lock_spin_miss_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_miss_cnt;
1437 lockgroup_info->lock_spin_held_max = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_max;
1438 lockgroup_info->lock_spin_held_cum = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cum;
1439
1440 lockgroup_info->lock_mtx_cnt = lck_grp->lck_grp_mtxcnt;
1441 lockgroup_info->lock_mtx_util_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt;
1442 lockgroup_info->lock_mtx_held_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cnt;
1443 lockgroup_info->lock_mtx_miss_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt;
1444 lockgroup_info->lock_mtx_wait_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt;
1445 lockgroup_info->lock_mtx_held_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_max;
1446 lockgroup_info->lock_mtx_held_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cum;
1447 lockgroup_info->lock_mtx_wait_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_max;
1448 lockgroup_info->lock_mtx_wait_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cum;
1449
1450 lockgroup_info->lock_rw_cnt = lck_grp->lck_grp_rwcnt;
1451 lockgroup_info->lock_rw_util_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt;
1452 lockgroup_info->lock_rw_held_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cnt;
1453 lockgroup_info->lock_rw_miss_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt;
1454 lockgroup_info->lock_rw_wait_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt;
1455 lockgroup_info->lock_rw_held_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_max;
1456 lockgroup_info->lock_rw_held_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cum;
1457 lockgroup_info->lock_rw_wait_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_max;
1458 lockgroup_info->lock_rw_wait_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cum;
1459
1460 (void) strncpy(lockgroup_info->lockgroup_name,lck_grp->lck_grp_name, LOCKGROUP_MAX_NAME);
1461
1462 lck_grp = (lck_grp_t *)(queue_next((queue_entry_t)(lck_grp)));
1463 lockgroup_info++;
1464 }
1465
1466 *lockgroup_infoCntp = lck_grp_cnt;
1467 lck_mtx_unlock(&lck_grp_lock);
1468
1469 if (lockgroup_info_size != lockgroup_info_vmsize)
1470 bzero((char *)lockgroup_info, lockgroup_info_vmsize - lockgroup_info_size);
1471
1472 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)lockgroup_info_addr,
1473 (vm_map_size_t)lockgroup_info_size, TRUE, &copy);
1474 assert(kr == KERN_SUCCESS);
1475
1476 *lockgroup_infop = (lockgroup_info_t *) copy;
1477
1478 return(KERN_SUCCESS);
1479 }
1480
1481 /*
1482 * Atomic primitives, prototyped in kern/simple_lock.h
1483 * Noret versions are more efficient on some architectures
1484 */
1485
1486 uint32_t
1487 hw_atomic_add(volatile uint32_t *dest, uint32_t delt)
1488 {
1489 ALIGN_TEST(dest,uint32_t);
1490 return __c11_atomic_fetch_add(ATOMIC_CAST(uint32_t,dest), delt, memory_order_relaxed) + delt;
1491 }
1492
1493 uint32_t
1494 hw_atomic_sub(volatile uint32_t *dest, uint32_t delt)
1495 {
1496 ALIGN_TEST(dest,uint32_t);
1497 return __c11_atomic_fetch_sub(ATOMIC_CAST(uint32_t,dest), delt, memory_order_relaxed) - delt;
1498 }
1499
1500 uint32_t
1501 hw_atomic_or(volatile uint32_t *dest, uint32_t mask)
1502 {
1503 ALIGN_TEST(dest,uint32_t);
1504 return __c11_atomic_fetch_or(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed) | mask;
1505 }
1506
1507 void
1508 hw_atomic_or_noret(volatile uint32_t *dest, uint32_t mask)
1509 {
1510 ALIGN_TEST(dest,uint32_t);
1511 __c11_atomic_fetch_or(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed);
1512 }
1513
1514 uint32_t
1515 hw_atomic_and(volatile uint32_t *dest, uint32_t mask)
1516 {
1517 ALIGN_TEST(dest,uint32_t);
1518 return __c11_atomic_fetch_and(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed) & mask;
1519 }
1520
1521 void
1522 hw_atomic_and_noret(volatile uint32_t *dest, uint32_t mask)
1523 {
1524 ALIGN_TEST(dest,uint32_t);
1525 __c11_atomic_fetch_and(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed);
1526 }
1527
1528 uint32_t
1529 hw_compare_and_store(uint32_t oldval, uint32_t newval, volatile uint32_t *dest)
1530 {
1531 ALIGN_TEST(dest,uint32_t);
1532 return __c11_atomic_compare_exchange_strong(ATOMIC_CAST(uint32_t,dest), &oldval, newval,
1533 memory_order_acq_rel_smp, memory_order_relaxed);
1534 }
1535