]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/locks.c
25641b8beb409ef593dec5205996e71db2ae7f05
[apple/xnu.git] / osfmk / kern / locks.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57 #define ATOMIC_PRIVATE 1
58 #define LOCK_PRIVATE 1
59
60 #include <mach_ldebug.h>
61 #include <debug.h>
62
63 #include <mach/kern_return.h>
64 #include <mach/mach_host_server.h>
65 #include <mach_debug/lockgroup_info.h>
66
67 #include <kern/locks.h>
68 #include <kern/misc_protos.h>
69 #include <kern/kalloc.h>
70 #include <kern/thread.h>
71 #include <kern/processor.h>
72 #include <kern/sched_prim.h>
73 #include <kern/debug.h>
74 #include <machine/atomic.h>
75 #include <machine/machine_cpu.h>
76 #include <string.h>
77
78
79 #include <sys/kdebug.h>
80
81 #if CONFIG_DTRACE
82 /*
83 * We need only enough declarations from the BSD-side to be able to
84 * test if our probe is active, and to call __dtrace_probe(). Setting
85 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
86 */
87 #define NEED_DTRACE_DEFS
88 #include <../bsd/sys/lockstat.h>
89 #endif
90
91 #define LCK_MTX_SLEEP_CODE 0
92 #define LCK_MTX_SLEEP_DEADLINE_CODE 1
93 #define LCK_MTX_LCK_WAIT_CODE 2
94 #define LCK_MTX_UNLCK_WAKEUP_CODE 3
95
96 #if MACH_LDEBUG
97 #define ALIGN_TEST(p,t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
98 #else
99 #define ALIGN_TEST(p,t) do{}while(0)
100 #endif
101
102 /* Silence the volatile to _Atomic cast warning */
103 #define ATOMIC_CAST(t,p) ((_Atomic t*)(uintptr_t)(p))
104
105 /* Enforce program order of loads and stores. */
106 #define ordered_load(target, type) \
107 __c11_atomic_load((_Atomic type *)(target), memory_order_relaxed)
108 #define ordered_store(target, type, value) \
109 __c11_atomic_store((_Atomic type *)(target), value, memory_order_relaxed)
110
111 #define ordered_load_hw(lock) ordered_load(&(lock)->lock_data, uintptr_t)
112 #define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, uintptr_t, (value))
113
114 #define NOINLINE __attribute__((noinline))
115
116
117 static queue_head_t lck_grp_queue;
118 static unsigned int lck_grp_cnt;
119
120 decl_lck_mtx_data(static,lck_grp_lock)
121 static lck_mtx_ext_t lck_grp_lock_ext;
122
123 lck_grp_attr_t LockDefaultGroupAttr;
124 lck_grp_t LockCompatGroup;
125 lck_attr_t LockDefaultLckAttr;
126
127 #if CONFIG_DTRACE && __SMP__
128 #if defined (__x86_64__)
129 uint64_t dtrace_spin_threshold = 500; // 500ns
130 #elif defined(__arm__) || defined(__arm64__)
131 uint64_t dtrace_spin_threshold = LOCK_PANIC_TIMEOUT / 1000000; // 500ns
132 #endif
133 #endif
134
135 /*
136 * Routine: lck_mod_init
137 */
138
139 void
140 lck_mod_init(
141 void)
142 {
143 /*
144 * Obtain "lcks" options:this currently controls lock statistics
145 */
146 if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts)))
147 LcksOpts = 0;
148
149
150 #if (DEVELOPMENT || DEBUG) && defined(__x86_64__)
151 if (!PE_parse_boot_argn("-disable_mtx_chk", &LckDisablePreemptCheck, sizeof (LckDisablePreemptCheck)))
152 LckDisablePreemptCheck = 0;
153 #endif /* (DEVELOPMENT || DEBUG) && defined(__x86_64__) */
154
155 queue_init(&lck_grp_queue);
156
157 /*
158 * Need to bootstrap the LockCompatGroup instead of calling lck_grp_init() here. This avoids
159 * grabbing the lck_grp_lock before it is initialized.
160 */
161
162 bzero(&LockCompatGroup, sizeof(lck_grp_t));
163 (void) strncpy(LockCompatGroup.lck_grp_name, "Compatibility APIs", LCK_GRP_MAX_NAME);
164
165 if (LcksOpts & enaLkStat)
166 LockCompatGroup.lck_grp_attr = LCK_GRP_ATTR_STAT;
167 else
168 LockCompatGroup.lck_grp_attr = LCK_ATTR_NONE;
169
170 LockCompatGroup.lck_grp_refcnt = 1;
171
172 enqueue_tail(&lck_grp_queue, (queue_entry_t)&LockCompatGroup);
173 lck_grp_cnt = 1;
174
175 lck_grp_attr_setdefault(&LockDefaultGroupAttr);
176 lck_attr_setdefault(&LockDefaultLckAttr);
177
178 lck_mtx_init_ext(&lck_grp_lock, &lck_grp_lock_ext, &LockCompatGroup, &LockDefaultLckAttr);
179 }
180
181 /*
182 * Routine: lck_grp_attr_alloc_init
183 */
184
185 lck_grp_attr_t *
186 lck_grp_attr_alloc_init(
187 void)
188 {
189 lck_grp_attr_t *attr;
190
191 if ((attr = (lck_grp_attr_t *)kalloc(sizeof(lck_grp_attr_t))) != 0)
192 lck_grp_attr_setdefault(attr);
193
194 return(attr);
195 }
196
197
198 /*
199 * Routine: lck_grp_attr_setdefault
200 */
201
202 void
203 lck_grp_attr_setdefault(
204 lck_grp_attr_t *attr)
205 {
206 if (LcksOpts & enaLkStat)
207 attr->grp_attr_val = LCK_GRP_ATTR_STAT;
208 else
209 attr->grp_attr_val = 0;
210 }
211
212
213 /*
214 * Routine: lck_grp_attr_setstat
215 */
216
217 void
218 lck_grp_attr_setstat(
219 lck_grp_attr_t *attr)
220 {
221 (void)hw_atomic_or(&attr->grp_attr_val, LCK_GRP_ATTR_STAT);
222 }
223
224
225 /*
226 * Routine: lck_grp_attr_free
227 */
228
229 void
230 lck_grp_attr_free(
231 lck_grp_attr_t *attr)
232 {
233 kfree(attr, sizeof(lck_grp_attr_t));
234 }
235
236
237 /*
238 * Routine: lck_grp_alloc_init
239 */
240
241 lck_grp_t *
242 lck_grp_alloc_init(
243 const char* grp_name,
244 lck_grp_attr_t *attr)
245 {
246 lck_grp_t *grp;
247
248 if ((grp = (lck_grp_t *)kalloc(sizeof(lck_grp_t))) != 0)
249 lck_grp_init(grp, grp_name, attr);
250
251 return(grp);
252 }
253
254 /*
255 * Routine: lck_grp_init
256 */
257
258 void
259 lck_grp_init(lck_grp_t * grp, const char * grp_name, lck_grp_attr_t * attr)
260 {
261 /* make sure locking infrastructure has been initialized */
262 assert(lck_grp_cnt > 0);
263
264 bzero((void *)grp, sizeof(lck_grp_t));
265
266 (void)strlcpy(grp->lck_grp_name, grp_name, LCK_GRP_MAX_NAME);
267
268 if (attr != LCK_GRP_ATTR_NULL)
269 grp->lck_grp_attr = attr->grp_attr_val;
270 else if (LcksOpts & enaLkStat)
271 grp->lck_grp_attr = LCK_GRP_ATTR_STAT;
272 else
273 grp->lck_grp_attr = LCK_ATTR_NONE;
274
275 grp->lck_grp_refcnt = 1;
276
277 lck_mtx_lock(&lck_grp_lock);
278 enqueue_tail(&lck_grp_queue, (queue_entry_t)grp);
279 lck_grp_cnt++;
280 lck_mtx_unlock(&lck_grp_lock);
281 }
282
283 /*
284 * Routine: lck_grp_free
285 */
286
287 void
288 lck_grp_free(
289 lck_grp_t *grp)
290 {
291 lck_mtx_lock(&lck_grp_lock);
292 lck_grp_cnt--;
293 (void)remque((queue_entry_t)grp);
294 lck_mtx_unlock(&lck_grp_lock);
295 lck_grp_deallocate(grp);
296 }
297
298
299 /*
300 * Routine: lck_grp_reference
301 */
302
303 void
304 lck_grp_reference(
305 lck_grp_t *grp)
306 {
307 (void)hw_atomic_add(&grp->lck_grp_refcnt, 1);
308 }
309
310
311 /*
312 * Routine: lck_grp_deallocate
313 */
314
315 void
316 lck_grp_deallocate(
317 lck_grp_t *grp)
318 {
319 if (hw_atomic_sub(&grp->lck_grp_refcnt, 1) == 0)
320 kfree(grp, sizeof(lck_grp_t));
321 }
322
323 /*
324 * Routine: lck_grp_lckcnt_incr
325 */
326
327 void
328 lck_grp_lckcnt_incr(
329 lck_grp_t *grp,
330 lck_type_t lck_type)
331 {
332 unsigned int *lckcnt;
333
334 switch (lck_type) {
335 case LCK_TYPE_SPIN:
336 lckcnt = &grp->lck_grp_spincnt;
337 break;
338 case LCK_TYPE_MTX:
339 lckcnt = &grp->lck_grp_mtxcnt;
340 break;
341 case LCK_TYPE_RW:
342 lckcnt = &grp->lck_grp_rwcnt;
343 break;
344 default:
345 return panic("lck_grp_lckcnt_incr(): invalid lock type: %d\n", lck_type);
346 }
347
348 (void)hw_atomic_add(lckcnt, 1);
349 }
350
351 /*
352 * Routine: lck_grp_lckcnt_decr
353 */
354
355 void
356 lck_grp_lckcnt_decr(
357 lck_grp_t *grp,
358 lck_type_t lck_type)
359 {
360 unsigned int *lckcnt;
361 int updated;
362
363 switch (lck_type) {
364 case LCK_TYPE_SPIN:
365 lckcnt = &grp->lck_grp_spincnt;
366 break;
367 case LCK_TYPE_MTX:
368 lckcnt = &grp->lck_grp_mtxcnt;
369 break;
370 case LCK_TYPE_RW:
371 lckcnt = &grp->lck_grp_rwcnt;
372 break;
373 default:
374 panic("lck_grp_lckcnt_decr(): invalid lock type: %d\n", lck_type);
375 return;
376 }
377
378 updated = (int)hw_atomic_sub(lckcnt, 1);
379 assert(updated >= 0);
380 }
381
382 /*
383 * Routine: lck_attr_alloc_init
384 */
385
386 lck_attr_t *
387 lck_attr_alloc_init(
388 void)
389 {
390 lck_attr_t *attr;
391
392 if ((attr = (lck_attr_t *)kalloc(sizeof(lck_attr_t))) != 0)
393 lck_attr_setdefault(attr);
394
395 return(attr);
396 }
397
398
399 /*
400 * Routine: lck_attr_setdefault
401 */
402
403 void
404 lck_attr_setdefault(
405 lck_attr_t *attr)
406 {
407 #if __arm__ || __arm64__
408 /* <rdar://problem/4404579>: Using LCK_ATTR_DEBUG here causes panic at boot time for arm */
409 attr->lck_attr_val = LCK_ATTR_NONE;
410 #elif __i386__ || __x86_64__
411 #if !DEBUG
412 if (LcksOpts & enaLkDeb)
413 attr->lck_attr_val = LCK_ATTR_DEBUG;
414 else
415 attr->lck_attr_val = LCK_ATTR_NONE;
416 #else
417 attr->lck_attr_val = LCK_ATTR_DEBUG;
418 #endif /* !DEBUG */
419 #else
420 #error Unknown architecture.
421 #endif /* __arm__ */
422 }
423
424
425 /*
426 * Routine: lck_attr_setdebug
427 */
428 void
429 lck_attr_setdebug(
430 lck_attr_t *attr)
431 {
432 (void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_DEBUG);
433 }
434
435 /*
436 * Routine: lck_attr_setdebug
437 */
438 void
439 lck_attr_cleardebug(
440 lck_attr_t *attr)
441 {
442 (void)hw_atomic_and(&attr->lck_attr_val, ~LCK_ATTR_DEBUG);
443 }
444
445
446 /*
447 * Routine: lck_attr_rw_shared_priority
448 */
449 void
450 lck_attr_rw_shared_priority(
451 lck_attr_t *attr)
452 {
453 (void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_RW_SHARED_PRIORITY);
454 }
455
456
457 /*
458 * Routine: lck_attr_free
459 */
460 void
461 lck_attr_free(
462 lck_attr_t *attr)
463 {
464 kfree(attr, sizeof(lck_attr_t));
465 }
466
467 /*
468 * Routine: hw_lock_init
469 *
470 * Initialize a hardware lock.
471 */
472 void
473 hw_lock_init(hw_lock_t lock)
474 {
475 ordered_store_hw(lock, 0);
476 }
477
478 /*
479 * Routine: hw_lock_lock_contended
480 *
481 * Spin until lock is acquired or timeout expires.
482 * timeout is in mach_absolute_time ticks. Called with
483 * preemption disabled.
484 */
485
486 #if __SMP__
487 static unsigned int NOINLINE
488 hw_lock_lock_contended(hw_lock_t lock, uintptr_t data, uint64_t timeout, boolean_t do_panic)
489 {
490 uint64_t end = 0;
491 uintptr_t holder = lock->lock_data;
492 int i;
493
494 if (timeout == 0)
495 timeout = LOCK_PANIC_TIMEOUT;
496 #if CONFIG_DTRACE
497 uint64_t begin;
498 boolean_t dtrace_enabled = lockstat_probemap[LS_LCK_SPIN_LOCK_SPIN] != 0;
499 if (__improbable(dtrace_enabled))
500 begin = mach_absolute_time();
501 #endif
502 for ( ; ; ) {
503 for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
504 cpu_pause();
505 #if (!__ARM_ENABLE_WFE_) || (LOCK_PRETEST)
506 holder = ordered_load_hw(lock);
507 if (holder != 0)
508 continue;
509 #endif
510 if (atomic_compare_exchange(&lock->lock_data, 0, data,
511 memory_order_acquire_smp, TRUE)) {
512 #if CONFIG_DTRACE
513 if (__improbable(dtrace_enabled)) {
514 uint64_t spintime = mach_absolute_time() - begin;
515 if (spintime > dtrace_spin_threshold)
516 LOCKSTAT_RECORD2(LS_LCK_SPIN_LOCK_SPIN, lock, spintime, dtrace_spin_threshold);
517 }
518 #endif
519 return 1;
520 }
521 }
522 if (end == 0) {
523 end = ml_get_timebase() + timeout;
524 }
525 else if (ml_get_timebase() >= end)
526 break;
527 }
528 if (do_panic) {
529 // Capture the actual time spent blocked, which may be higher than the timeout
530 // if a misbehaving interrupt stole this thread's CPU time.
531 panic("Spinlock timeout after %llu ticks, %p = %lx",
532 (ml_get_timebase() - end + timeout), lock, holder);
533 }
534 return 0;
535 }
536 #endif // __SMP__
537
538 /*
539 * Routine: hw_lock_lock
540 *
541 * Acquire lock, spinning until it becomes available,
542 * return with preemption disabled.
543 */
544 void
545 hw_lock_lock(hw_lock_t lock)
546 {
547 thread_t thread;
548 uintptr_t state;
549
550 thread = current_thread();
551 disable_preemption_for_thread(thread);
552 state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
553 #if __SMP__
554
555 #if LOCK_PRETEST
556 if (ordered_load_hw(lock))
557 goto contended;
558 #endif // LOCK_PRETEST
559 if (atomic_compare_exchange(&lock->lock_data, 0, state,
560 memory_order_acquire_smp, TRUE)) {
561 goto end;
562 }
563 #if LOCK_PRETEST
564 contended:
565 #endif // LOCK_PRETEST
566 hw_lock_lock_contended(lock, state, 0, TRUE);
567 end:
568 #else // __SMP__
569 if (lock->lock_data)
570 panic("Spinlock held %p", lock);
571 lock->lock_data = state;
572 #endif // __SMP__
573 #if CONFIG_DTRACE
574 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0);
575 #endif
576 return;
577 }
578
579 /*
580 * Routine: hw_lock_to
581 *
582 * Acquire lock, spinning until it becomes available or timeout.
583 * Timeout is in mach_absolute_time ticks, return with
584 * preemption disabled.
585 */
586 unsigned int
587 hw_lock_to(hw_lock_t lock, uint64_t timeout)
588 {
589 thread_t thread;
590 uintptr_t state;
591 unsigned int success = 0;
592
593 thread = current_thread();
594 disable_preemption_for_thread(thread);
595 state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
596 #if __SMP__
597
598 #if LOCK_PRETEST
599 if (ordered_load_hw(lock))
600 goto contended;
601 #endif // LOCK_PRETEST
602 if (atomic_compare_exchange(&lock->lock_data, 0, state,
603 memory_order_acquire_smp, TRUE)) {
604 success = 1;
605 goto end;
606 }
607 #if LOCK_PRETEST
608 contended:
609 #endif // LOCK_PRETEST
610 success = hw_lock_lock_contended(lock, state, timeout, FALSE);
611 end:
612 #else // __SMP__
613 (void)timeout;
614 if (ordered_load_hw(lock) == 0) {
615 ordered_store_hw(lock, state);
616 success = 1;
617 }
618 #endif // __SMP__
619 #if CONFIG_DTRACE
620 if (success)
621 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0);
622 #endif
623 return success;
624 }
625
626 /*
627 * Routine: hw_lock_try
628 *
629 * returns with preemption disabled on success.
630 */
631 unsigned int
632 hw_lock_try(hw_lock_t lock)
633 {
634 thread_t thread = current_thread();
635 int success = 0;
636 #if LOCK_TRY_DISABLE_INT
637 long intmask;
638
639 intmask = disable_interrupts();
640 #else
641 disable_preemption_for_thread(thread);
642 #endif // LOCK_TRY_DISABLE_INT
643
644 #if __SMP__
645 #if LOCK_PRETEST
646 if (ordered_load_hw(lock))
647 goto failed;
648 #endif // LOCK_PRETEST
649 success = atomic_compare_exchange(&lock->lock_data, 0, LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK,
650 memory_order_acquire_smp, FALSE);
651 #else
652 if (lock->lock_data == 0) {
653 lock->lock_data = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
654 success = 1;
655 }
656 #endif // __SMP__
657
658 #if LOCK_TRY_DISABLE_INT
659 if (success)
660 disable_preemption_for_thread(thread);
661 #if LOCK_PRETEST
662 failed:
663 #endif // LOCK_PRETEST
664 restore_interrupts(intmask);
665 #else
666 #if LOCK_PRETEST
667 failed:
668 #endif // LOCK_PRETEST
669 if (!success)
670 enable_preemption();
671 #endif // LOCK_TRY_DISABLE_INT
672 #if CONFIG_DTRACE
673 if (success)
674 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0);
675 #endif
676 return success;
677 }
678
679 /*
680 * Routine: hw_lock_unlock
681 *
682 * Unconditionally release lock, release preemption level.
683 */
684 void
685 hw_lock_unlock(hw_lock_t lock)
686 {
687 __c11_atomic_store((_Atomic uintptr_t *)&lock->lock_data, 0, memory_order_release_smp);
688 #if __arm__ || __arm64__
689 // ARM tests are only for open-source exclusion
690 set_event();
691 #endif // __arm__ || __arm64__
692 #if CONFIG_DTRACE
693 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
694 #endif /* CONFIG_DTRACE */
695 enable_preemption();
696 }
697
698 /*
699 * Routine hw_lock_held, doesn't change preemption state.
700 * N.B. Racy, of course.
701 */
702 unsigned int
703 hw_lock_held(hw_lock_t lock)
704 {
705 return (ordered_load_hw(lock) != 0);
706 }
707
708 /*
709 * Routine: lck_spin_sleep
710 */
711 wait_result_t
712 lck_spin_sleep(
713 lck_spin_t *lck,
714 lck_sleep_action_t lck_sleep_action,
715 event_t event,
716 wait_interrupt_t interruptible)
717 {
718 wait_result_t res;
719
720 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
721 panic("Invalid lock sleep action %x\n", lck_sleep_action);
722
723 res = assert_wait(event, interruptible);
724 if (res == THREAD_WAITING) {
725 lck_spin_unlock(lck);
726 res = thread_block(THREAD_CONTINUE_NULL);
727 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
728 lck_spin_lock(lck);
729 }
730 else
731 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
732 lck_spin_unlock(lck);
733
734 return res;
735 }
736
737
738 /*
739 * Routine: lck_spin_sleep_deadline
740 */
741 wait_result_t
742 lck_spin_sleep_deadline(
743 lck_spin_t *lck,
744 lck_sleep_action_t lck_sleep_action,
745 event_t event,
746 wait_interrupt_t interruptible,
747 uint64_t deadline)
748 {
749 wait_result_t res;
750
751 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
752 panic("Invalid lock sleep action %x\n", lck_sleep_action);
753
754 res = assert_wait_deadline(event, interruptible, deadline);
755 if (res == THREAD_WAITING) {
756 lck_spin_unlock(lck);
757 res = thread_block(THREAD_CONTINUE_NULL);
758 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
759 lck_spin_lock(lck);
760 }
761 else
762 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
763 lck_spin_unlock(lck);
764
765 return res;
766 }
767
768
769 /*
770 * Routine: lck_mtx_clear_promoted
771 *
772 * Handle clearing of TH_SFLAG_PROMOTED,
773 * adjusting thread priority as needed.
774 *
775 * Called with thread lock held
776 */
777 static void
778 lck_mtx_clear_promoted (
779 thread_t thread,
780 __kdebug_only uintptr_t trace_lck)
781 {
782 thread->sched_flags &= ~TH_SFLAG_PROMOTED;
783
784 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
785 /* Thread still has a RW lock promotion */
786 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
787 KERNEL_DEBUG_CONSTANT(
788 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
789 thread->sched_pri, DEPRESSPRI, 0, trace_lck, 0);
790 set_sched_pri(thread, DEPRESSPRI);
791 } else {
792 if (thread->base_pri < thread->sched_pri) {
793 KERNEL_DEBUG_CONSTANT(
794 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
795 thread->sched_pri, thread->base_pri, 0, trace_lck, 0);
796 }
797 thread_recompute_sched_pri(thread, FALSE);
798 }
799 }
800
801
802 /*
803 * Routine: lck_mtx_sleep
804 */
805 wait_result_t
806 lck_mtx_sleep(
807 lck_mtx_t *lck,
808 lck_sleep_action_t lck_sleep_action,
809 event_t event,
810 wait_interrupt_t interruptible)
811 {
812 wait_result_t res;
813 thread_t thread = current_thread();
814
815 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
816 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
817
818 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
819 panic("Invalid lock sleep action %x\n", lck_sleep_action);
820
821 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
822 /*
823 * We overload the RW lock promotion to give us a priority ceiling
824 * during the time that this thread is asleep, so that when it
825 * is re-awakened (and not yet contending on the mutex), it is
826 * runnable at a reasonably high priority.
827 */
828 thread->rwlock_count++;
829 }
830
831 res = assert_wait(event, interruptible);
832 if (res == THREAD_WAITING) {
833 lck_mtx_unlock(lck);
834 res = thread_block(THREAD_CONTINUE_NULL);
835 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
836 if ((lck_sleep_action & LCK_SLEEP_SPIN))
837 lck_mtx_lock_spin(lck);
838 else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS))
839 lck_mtx_lock_spin_always(lck);
840 else
841 lck_mtx_lock(lck);
842 }
843 }
844 else
845 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
846 lck_mtx_unlock(lck);
847
848 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
849 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
850 /* sched_flags checked without lock, but will be rechecked while clearing */
851 lck_rw_clear_promotion(thread);
852 }
853 }
854
855 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
856
857 return res;
858 }
859
860
861 /*
862 * Routine: lck_mtx_sleep_deadline
863 */
864 wait_result_t
865 lck_mtx_sleep_deadline(
866 lck_mtx_t *lck,
867 lck_sleep_action_t lck_sleep_action,
868 event_t event,
869 wait_interrupt_t interruptible,
870 uint64_t deadline)
871 {
872 wait_result_t res;
873 thread_t thread = current_thread();
874
875 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
876 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
877
878 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
879 panic("Invalid lock sleep action %x\n", lck_sleep_action);
880
881 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
882 /*
883 * See lck_mtx_sleep().
884 */
885 thread->rwlock_count++;
886 }
887
888 res = assert_wait_deadline(event, interruptible, deadline);
889 if (res == THREAD_WAITING) {
890 lck_mtx_unlock(lck);
891 res = thread_block(THREAD_CONTINUE_NULL);
892 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
893 if ((lck_sleep_action & LCK_SLEEP_SPIN))
894 lck_mtx_lock_spin(lck);
895 else
896 lck_mtx_lock(lck);
897 }
898 }
899 else
900 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
901 lck_mtx_unlock(lck);
902
903 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
904 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
905 /* sched_flags checked without lock, but will be rechecked while clearing */
906 lck_rw_clear_promotion(thread);
907 }
908 }
909
910 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
911
912 return res;
913 }
914
915 /*
916 * Routine: lck_mtx_lock_wait
917 *
918 * Invoked in order to wait on contention.
919 *
920 * Called with the interlock locked and
921 * returns it unlocked.
922 */
923 void
924 lck_mtx_lock_wait (
925 lck_mtx_t *lck,
926 thread_t holder)
927 {
928 thread_t self = current_thread();
929 lck_mtx_t *mutex;
930 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
931 __kdebug_only uintptr_t trace_holder = VM_KERNEL_UNSLIDE_OR_PERM(holder);
932 integer_t priority;
933 spl_t s = splsched();
934 #if CONFIG_DTRACE
935 uint64_t sleep_start = 0;
936
937 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
938 sleep_start = mach_absolute_time();
939 }
940 #endif
941
942 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
943 mutex = lck;
944 else
945 mutex = &lck->lck_mtx_ptr->lck_mtx;
946
947 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, trace_lck, trace_holder, 0, 0, 0);
948
949 priority = self->sched_pri;
950 if (priority < self->base_pri)
951 priority = self->base_pri;
952 if (priority < BASEPRI_DEFAULT)
953 priority = BASEPRI_DEFAULT;
954
955 /* Do not promote past promotion ceiling */
956 priority = MIN(priority, MAXPRI_PROMOTE);
957
958 thread_lock(holder);
959 if (mutex->lck_mtx_pri == 0) {
960 holder->promotions++;
961 holder->sched_flags |= TH_SFLAG_PROMOTED;
962 }
963
964 if (mutex->lck_mtx_pri < priority && holder->sched_pri < priority) {
965 KERNEL_DEBUG_CONSTANT(
966 MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
967 holder->sched_pri, priority, trace_holder, trace_lck, 0);
968 set_sched_pri(holder, priority);
969 }
970 thread_unlock(holder);
971 splx(s);
972
973 if (mutex->lck_mtx_pri < priority)
974 mutex->lck_mtx_pri = priority;
975 if (self->pending_promoter[self->pending_promoter_index] == NULL) {
976 self->pending_promoter[self->pending_promoter_index] = mutex;
977 mutex->lck_mtx_waiters++;
978 }
979 else
980 if (self->pending_promoter[self->pending_promoter_index] != mutex) {
981 self->pending_promoter[++self->pending_promoter_index] = mutex;
982 mutex->lck_mtx_waiters++;
983 }
984
985 thread_set_pending_block_hint(self, kThreadWaitKernelMutex);
986 assert_wait(LCK_MTX_EVENT(mutex), THREAD_UNINT);
987 lck_mtx_ilk_unlock(mutex);
988
989 thread_block(THREAD_CONTINUE_NULL);
990
991 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
992 #if CONFIG_DTRACE
993 /*
994 * Record the DTrace lockstat probe for blocking, block time
995 * measured from when we were entered.
996 */
997 if (sleep_start) {
998 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
999 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
1000 mach_absolute_time() - sleep_start);
1001 } else {
1002 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
1003 mach_absolute_time() - sleep_start);
1004 }
1005 }
1006 #endif
1007 }
1008
1009 /*
1010 * Routine: lck_mtx_lock_acquire
1011 *
1012 * Invoked on acquiring the mutex when there is
1013 * contention.
1014 *
1015 * Returns the current number of waiters.
1016 *
1017 * Called with the interlock locked.
1018 */
1019 int
1020 lck_mtx_lock_acquire(
1021 lck_mtx_t *lck)
1022 {
1023 thread_t thread = current_thread();
1024 lck_mtx_t *mutex;
1025 integer_t priority;
1026 spl_t s;
1027 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1028
1029 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
1030 mutex = lck;
1031 else
1032 mutex = &lck->lck_mtx_ptr->lck_mtx;
1033
1034 if (thread->pending_promoter[thread->pending_promoter_index] == mutex) {
1035 thread->pending_promoter[thread->pending_promoter_index] = NULL;
1036 if (thread->pending_promoter_index > 0)
1037 thread->pending_promoter_index--;
1038 mutex->lck_mtx_waiters--;
1039 }
1040
1041 if (mutex->lck_mtx_waiters)
1042 priority = mutex->lck_mtx_pri;
1043 else {
1044 mutex->lck_mtx_pri = 0;
1045 priority = 0;
1046 }
1047
1048 if (priority || thread->was_promoted_on_wakeup) {
1049 s = splsched();
1050 thread_lock(thread);
1051
1052 if (priority) {
1053 thread->promotions++;
1054 thread->sched_flags |= TH_SFLAG_PROMOTED;
1055 if (thread->sched_pri < priority) {
1056 KERNEL_DEBUG_CONSTANT(
1057 MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
1058 thread->sched_pri, priority, 0, trace_lck, 0);
1059 /* Do not promote past promotion ceiling */
1060 assert(priority <= MAXPRI_PROMOTE);
1061 set_sched_pri(thread, priority);
1062 }
1063 }
1064 if (thread->was_promoted_on_wakeup) {
1065 thread->was_promoted_on_wakeup = 0;
1066 if (thread->promotions == 0)
1067 lck_mtx_clear_promoted(thread, trace_lck);
1068 }
1069
1070 thread_unlock(thread);
1071 splx(s);
1072 }
1073
1074 #if CONFIG_DTRACE
1075 if (lockstat_probemap[LS_LCK_MTX_LOCK_ACQUIRE] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_ACQUIRE]) {
1076 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1077 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lck, 0);
1078 } else {
1079 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, lck, 0);
1080 }
1081 }
1082 #endif
1083 return (mutex->lck_mtx_waiters);
1084 }
1085
1086 /*
1087 * Routine: lck_mtx_unlock_wakeup
1088 *
1089 * Invoked on unlock when there is contention.
1090 *
1091 * Called with the interlock locked.
1092 */
1093 void
1094 lck_mtx_unlock_wakeup (
1095 lck_mtx_t *lck,
1096 thread_t holder)
1097 {
1098 thread_t thread = current_thread();
1099 lck_mtx_t *mutex;
1100 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1101
1102 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
1103 mutex = lck;
1104 else
1105 mutex = &lck->lck_mtx_ptr->lck_mtx;
1106
1107 if (thread != holder)
1108 panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);
1109
1110 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START, trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(holder), 0, 0, 0);
1111
1112 assert(mutex->lck_mtx_waiters > 0);
1113 if (mutex->lck_mtx_waiters > 1)
1114 thread_wakeup_one_with_pri(LCK_MTX_EVENT(lck), lck->lck_mtx_pri);
1115 else
1116 thread_wakeup_one(LCK_MTX_EVENT(lck));
1117
1118 if (thread->promotions > 0) {
1119 spl_t s = splsched();
1120
1121 thread_lock(thread);
1122 if (--thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED))
1123 lck_mtx_clear_promoted(thread, trace_lck);
1124 thread_unlock(thread);
1125 splx(s);
1126 }
1127
1128 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1129 }
1130
1131 void
1132 lck_mtx_unlockspin_wakeup (
1133 lck_mtx_t *lck)
1134 {
1135 assert(lck->lck_mtx_waiters > 0);
1136 thread_wakeup_one(LCK_MTX_EVENT(lck));
1137
1138 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_NONE, VM_KERNEL_UNSLIDE_OR_PERM(lck), 0, 0, 1, 0);
1139 #if CONFIG_DTRACE
1140 /*
1141 * When there are waiters, we skip the hot-patch spot in the
1142 * fastpath, so we record it here.
1143 */
1144 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lck, 0);
1145 #endif
1146 }
1147
1148
1149 /*
1150 * Routine: mutex_pause
1151 *
1152 * Called by former callers of simple_lock_pause().
1153 */
1154 #define MAX_COLLISION_COUNTS 32
1155 #define MAX_COLLISION 8
1156
1157 unsigned int max_collision_count[MAX_COLLISION_COUNTS];
1158
1159 uint32_t collision_backoffs[MAX_COLLISION] = {
1160 10, 50, 100, 200, 400, 600, 800, 1000
1161 };
1162
1163
1164 void
1165 mutex_pause(uint32_t collisions)
1166 {
1167 wait_result_t wait_result;
1168 uint32_t back_off;
1169
1170 if (collisions >= MAX_COLLISION_COUNTS)
1171 collisions = MAX_COLLISION_COUNTS - 1;
1172 max_collision_count[collisions]++;
1173
1174 if (collisions >= MAX_COLLISION)
1175 collisions = MAX_COLLISION - 1;
1176 back_off = collision_backoffs[collisions];
1177
1178 wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC);
1179 assert(wait_result == THREAD_WAITING);
1180
1181 wait_result = thread_block(THREAD_CONTINUE_NULL);
1182 assert(wait_result == THREAD_TIMED_OUT);
1183 }
1184
1185
1186 unsigned int mutex_yield_wait = 0;
1187 unsigned int mutex_yield_no_wait = 0;
1188
1189 void
1190 lck_mtx_yield(
1191 lck_mtx_t *lck)
1192 {
1193 int waiters;
1194
1195 #if DEBUG
1196 lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
1197 #endif /* DEBUG */
1198
1199 if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)
1200 waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters;
1201 else
1202 waiters = lck->lck_mtx_waiters;
1203
1204 if ( !waiters) {
1205 mutex_yield_no_wait++;
1206 } else {
1207 mutex_yield_wait++;
1208 lck_mtx_unlock(lck);
1209 mutex_pause(0);
1210 lck_mtx_lock(lck);
1211 }
1212 }
1213
1214
1215 /*
1216 * Routine: lck_rw_sleep
1217 */
1218 wait_result_t
1219 lck_rw_sleep(
1220 lck_rw_t *lck,
1221 lck_sleep_action_t lck_sleep_action,
1222 event_t event,
1223 wait_interrupt_t interruptible)
1224 {
1225 wait_result_t res;
1226 lck_rw_type_t lck_rw_type;
1227 thread_t thread = current_thread();
1228
1229 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
1230 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1231
1232 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1233 /*
1234 * Although we are dropping the RW lock, the intent in most cases
1235 * is that this thread remains as an observer, since it may hold
1236 * some secondary resource, but must yield to avoid deadlock. In
1237 * this situation, make sure that the thread is boosted to the
1238 * RW lock ceiling while blocked, so that it can re-acquire the
1239 * RW lock at that priority.
1240 */
1241 thread->rwlock_count++;
1242 }
1243
1244 res = assert_wait(event, interruptible);
1245 if (res == THREAD_WAITING) {
1246 lck_rw_type = lck_rw_done(lck);
1247 res = thread_block(THREAD_CONTINUE_NULL);
1248 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1249 if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE)))
1250 lck_rw_lock(lck, lck_rw_type);
1251 else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE)
1252 lck_rw_lock_exclusive(lck);
1253 else
1254 lck_rw_lock_shared(lck);
1255 }
1256 }
1257 else
1258 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
1259 (void)lck_rw_done(lck);
1260
1261 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1262 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1263 /* sched_flags checked without lock, but will be rechecked while clearing */
1264
1265 /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1266 assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1267
1268 lck_rw_clear_promotion(thread);
1269 }
1270 }
1271
1272 return res;
1273 }
1274
1275
1276 /*
1277 * Routine: lck_rw_sleep_deadline
1278 */
1279 wait_result_t
1280 lck_rw_sleep_deadline(
1281 lck_rw_t *lck,
1282 lck_sleep_action_t lck_sleep_action,
1283 event_t event,
1284 wait_interrupt_t interruptible,
1285 uint64_t deadline)
1286 {
1287 wait_result_t res;
1288 lck_rw_type_t lck_rw_type;
1289 thread_t thread = current_thread();
1290
1291 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
1292 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1293
1294 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1295 thread->rwlock_count++;
1296 }
1297
1298 res = assert_wait_deadline(event, interruptible, deadline);
1299 if (res == THREAD_WAITING) {
1300 lck_rw_type = lck_rw_done(lck);
1301 res = thread_block(THREAD_CONTINUE_NULL);
1302 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1303 if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE)))
1304 lck_rw_lock(lck, lck_rw_type);
1305 else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE)
1306 lck_rw_lock_exclusive(lck);
1307 else
1308 lck_rw_lock_shared(lck);
1309 }
1310 }
1311 else
1312 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
1313 (void)lck_rw_done(lck);
1314
1315 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1316 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1317 /* sched_flags checked without lock, but will be rechecked while clearing */
1318
1319 /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1320 assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1321
1322 lck_rw_clear_promotion(thread);
1323 }
1324 }
1325
1326 return res;
1327 }
1328
1329 /*
1330 * Reader-writer lock promotion
1331 *
1332 * We support a limited form of reader-writer
1333 * lock promotion whose effects are:
1334 *
1335 * * Qualifying threads have decay disabled
1336 * * Scheduler priority is reset to a floor of
1337 * of their statically assigned priority
1338 * or BASEPRI_BACKGROUND
1339 *
1340 * The rationale is that lck_rw_ts do not have
1341 * a single owner, so we cannot apply a directed
1342 * priority boost from all waiting threads
1343 * to all holding threads without maintaining
1344 * lists of all shared owners and all waiting
1345 * threads for every lock.
1346 *
1347 * Instead (and to preserve the uncontended fast-
1348 * path), acquiring (or attempting to acquire)
1349 * a RW lock in shared or exclusive lock increments
1350 * a per-thread counter. Only if that thread stops
1351 * making forward progress (for instance blocking
1352 * on a mutex, or being preempted) do we consult
1353 * the counter and apply the priority floor.
1354 * When the thread becomes runnable again (or in
1355 * the case of preemption it never stopped being
1356 * runnable), it has the priority boost and should
1357 * be in a good position to run on the CPU and
1358 * release all RW locks (at which point the priority
1359 * boost is cleared).
1360 *
1361 * Care must be taken to ensure that priority
1362 * boosts are not retained indefinitely, since unlike
1363 * mutex priority boosts (where the boost is tied
1364 * to the mutex lifecycle), the boost is tied
1365 * to the thread and independent of any particular
1366 * lck_rw_t. Assertions are in place on return
1367 * to userspace so that the boost is not held
1368 * indefinitely.
1369 *
1370 * The routines that increment/decrement the
1371 * per-thread counter should err on the side of
1372 * incrementing any time a preemption is possible
1373 * and the lock would be visible to the rest of the
1374 * system as held (so it should be incremented before
1375 * interlocks are dropped/preemption is enabled, or
1376 * before a CAS is executed to acquire the lock).
1377 *
1378 */
1379
1380 /*
1381 * lck_rw_clear_promotion: Undo priority promotions when the last RW
1382 * lock is released by a thread (if a promotion was active)
1383 */
1384 void lck_rw_clear_promotion(thread_t thread)
1385 {
1386 assert(thread->rwlock_count == 0);
1387
1388 /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
1389 spl_t s = splsched();
1390
1391 thread_lock(thread);
1392
1393 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
1394 thread->sched_flags &= ~TH_SFLAG_RW_PROMOTED;
1395
1396 if (thread->sched_flags & TH_SFLAG_PROMOTED) {
1397 /* Thread still has a mutex promotion */
1398 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
1399 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE,
1400 (uintptr_t)thread_tid(thread), thread->sched_pri, DEPRESSPRI, 0, 0);
1401
1402 set_sched_pri(thread, DEPRESSPRI);
1403 } else {
1404 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE,
1405 (uintptr_t)thread_tid(thread), thread->sched_pri, thread->base_pri, 0, 0);
1406
1407 thread_recompute_sched_pri(thread, FALSE);
1408 }
1409 }
1410
1411 thread_unlock(thread);
1412 splx(s);
1413 }
1414
1415 /*
1416 * Callout from context switch if the thread goes
1417 * off core with a positive rwlock_count
1418 *
1419 * Called at splsched with the thread locked
1420 */
1421 void
1422 lck_rw_set_promotion_locked(thread_t thread)
1423 {
1424 if (LcksOpts & disLkRWPrio)
1425 return;
1426
1427 integer_t priority;
1428
1429 priority = thread->sched_pri;
1430
1431 if (priority < thread->base_pri)
1432 priority = thread->base_pri;
1433 if (priority < BASEPRI_BACKGROUND)
1434 priority = BASEPRI_BACKGROUND;
1435
1436 if ((thread->sched_pri < priority) ||
1437 !(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1438 KERNEL_DEBUG_CONSTANT(
1439 MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_PROMOTE) | DBG_FUNC_NONE,
1440 (uintptr_t)thread_tid(thread), thread->sched_pri,
1441 thread->base_pri, priority, 0);
1442
1443 thread->sched_flags |= TH_SFLAG_RW_PROMOTED;
1444
1445 if (thread->sched_pri < priority)
1446 set_sched_pri(thread, priority);
1447 }
1448 }
1449
1450 kern_return_t
1451 host_lockgroup_info(
1452 host_t host,
1453 lockgroup_info_array_t *lockgroup_infop,
1454 mach_msg_type_number_t *lockgroup_infoCntp)
1455 {
1456 lockgroup_info_t *lockgroup_info_base;
1457 lockgroup_info_t *lockgroup_info;
1458 vm_offset_t lockgroup_info_addr;
1459 vm_size_t lockgroup_info_size;
1460 vm_size_t lockgroup_info_vmsize;
1461 lck_grp_t *lck_grp;
1462 unsigned int i;
1463 vm_map_copy_t copy;
1464 kern_return_t kr;
1465
1466 if (host == HOST_NULL)
1467 return KERN_INVALID_HOST;
1468
1469 lck_mtx_lock(&lck_grp_lock);
1470
1471 lockgroup_info_size = lck_grp_cnt * sizeof(*lockgroup_info);
1472 lockgroup_info_vmsize = round_page(lockgroup_info_size);
1473 kr = kmem_alloc_pageable(ipc_kernel_map,
1474 &lockgroup_info_addr, lockgroup_info_vmsize, VM_KERN_MEMORY_IPC);
1475 if (kr != KERN_SUCCESS) {
1476 lck_mtx_unlock(&lck_grp_lock);
1477 return(kr);
1478 }
1479
1480 lockgroup_info_base = (lockgroup_info_t *) lockgroup_info_addr;
1481 lck_grp = (lck_grp_t *)queue_first(&lck_grp_queue);
1482 lockgroup_info = lockgroup_info_base;
1483
1484 for (i = 0; i < lck_grp_cnt; i++) {
1485
1486 lockgroup_info->lock_spin_cnt = lck_grp->lck_grp_spincnt;
1487 lockgroup_info->lock_spin_util_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_util_cnt;
1488 lockgroup_info->lock_spin_held_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cnt;
1489 lockgroup_info->lock_spin_miss_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_miss_cnt;
1490 lockgroup_info->lock_spin_held_max = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_max;
1491 lockgroup_info->lock_spin_held_cum = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cum;
1492
1493 lockgroup_info->lock_mtx_cnt = lck_grp->lck_grp_mtxcnt;
1494 lockgroup_info->lock_mtx_util_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt;
1495 lockgroup_info->lock_mtx_held_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cnt;
1496 lockgroup_info->lock_mtx_miss_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt;
1497 lockgroup_info->lock_mtx_wait_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt;
1498 lockgroup_info->lock_mtx_held_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_max;
1499 lockgroup_info->lock_mtx_held_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cum;
1500 lockgroup_info->lock_mtx_wait_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_max;
1501 lockgroup_info->lock_mtx_wait_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cum;
1502
1503 lockgroup_info->lock_rw_cnt = lck_grp->lck_grp_rwcnt;
1504 lockgroup_info->lock_rw_util_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt;
1505 lockgroup_info->lock_rw_held_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cnt;
1506 lockgroup_info->lock_rw_miss_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt;
1507 lockgroup_info->lock_rw_wait_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt;
1508 lockgroup_info->lock_rw_held_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_max;
1509 lockgroup_info->lock_rw_held_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cum;
1510 lockgroup_info->lock_rw_wait_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_max;
1511 lockgroup_info->lock_rw_wait_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cum;
1512
1513 (void) strncpy(lockgroup_info->lockgroup_name,lck_grp->lck_grp_name, LOCKGROUP_MAX_NAME);
1514
1515 lck_grp = (lck_grp_t *)(queue_next((queue_entry_t)(lck_grp)));
1516 lockgroup_info++;
1517 }
1518
1519 *lockgroup_infoCntp = lck_grp_cnt;
1520 lck_mtx_unlock(&lck_grp_lock);
1521
1522 if (lockgroup_info_size != lockgroup_info_vmsize)
1523 bzero((char *)lockgroup_info, lockgroup_info_vmsize - lockgroup_info_size);
1524
1525 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)lockgroup_info_addr,
1526 (vm_map_size_t)lockgroup_info_size, TRUE, &copy);
1527 assert(kr == KERN_SUCCESS);
1528
1529 *lockgroup_infop = (lockgroup_info_t *) copy;
1530
1531 return(KERN_SUCCESS);
1532 }
1533
1534 /*
1535 * Atomic primitives, prototyped in kern/simple_lock.h
1536 * Noret versions are more efficient on some architectures
1537 */
1538
1539 uint32_t
1540 hw_atomic_add(volatile uint32_t *dest, uint32_t delt)
1541 {
1542 ALIGN_TEST(dest,uint32_t);
1543 return __c11_atomic_fetch_add(ATOMIC_CAST(uint32_t,dest), delt, memory_order_relaxed) + delt;
1544 }
1545
1546 uint32_t
1547 hw_atomic_sub(volatile uint32_t *dest, uint32_t delt)
1548 {
1549 ALIGN_TEST(dest,uint32_t);
1550 return __c11_atomic_fetch_sub(ATOMIC_CAST(uint32_t,dest), delt, memory_order_relaxed) - delt;
1551 }
1552
1553 uint32_t
1554 hw_atomic_or(volatile uint32_t *dest, uint32_t mask)
1555 {
1556 ALIGN_TEST(dest,uint32_t);
1557 return __c11_atomic_fetch_or(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed) | mask;
1558 }
1559
1560 void
1561 hw_atomic_or_noret(volatile uint32_t *dest, uint32_t mask)
1562 {
1563 ALIGN_TEST(dest,uint32_t);
1564 __c11_atomic_fetch_or(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed);
1565 }
1566
1567 uint32_t
1568 hw_atomic_and(volatile uint32_t *dest, uint32_t mask)
1569 {
1570 ALIGN_TEST(dest,uint32_t);
1571 return __c11_atomic_fetch_and(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed) & mask;
1572 }
1573
1574 void
1575 hw_atomic_and_noret(volatile uint32_t *dest, uint32_t mask)
1576 {
1577 ALIGN_TEST(dest,uint32_t);
1578 __c11_atomic_fetch_and(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed);
1579 }
1580
1581 uint32_t
1582 hw_compare_and_store(uint32_t oldval, uint32_t newval, volatile uint32_t *dest)
1583 {
1584 ALIGN_TEST(dest,uint32_t);
1585 return __c11_atomic_compare_exchange_strong(ATOMIC_CAST(uint32_t,dest), &oldval, newval,
1586 memory_order_acq_rel_smp, memory_order_relaxed);
1587 }
1588