2 * Copyright (c) 2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/ioctl.h>
32 #include <sys/file_internal.h>
33 #include <sys/proc_internal.h>
34 #include <sys/kernel.h>
35 #include <sys/guarded.h>
37 #include <sys/malloc.h>
38 #include <sys/sysproto.h>
39 #include <sys/pthread_shims.h>
41 #include <mach/mach_types.h>
43 #include <kern/cpu_data.h>
44 #include <kern/mach_param.h>
45 #include <kern/kern_types.h>
46 #include <kern/assert.h>
47 #include <kern/kalloc.h>
48 #include <kern/thread.h>
49 #include <kern/clock.h>
50 #include <kern/ledger.h>
51 #include <kern/policy_internal.h>
52 #include <kern/task.h>
53 #include <kern/telemetry.h>
54 #include <kern/waitq.h>
55 #include <kern/sched_prim.h>
56 #include <kern/zalloc.h>
57 #include <kern/debug.h>
59 #include <pexpert/pexpert.h>
61 #define XNU_TEST_BITMAP
62 #include <kern/bits.h>
64 #include <sys/ulock.h>
67 * How ulock promotion works:
69 * There’s a requested policy field on every thread called ‘promotions’, which
70 * expresses which ulock promotions are happening to this thread.
71 * The promotion priority saturates until the promotion count goes to 0.
73 * We also track effective promotion qos, which is the qos before clamping.
74 * This value is used for promoting a thread that another thread is waiting on,
75 * so that the lock owner reinflates to the right priority after unclamping.
77 * This also works for non-QoS threads, which can donate base priority to QoS
78 * and non-QoS threads alike.
80 * ulock wait applies a promotion to the owner communicated through
81 * UL_UNFAIR_LOCK as waiters block, and that promotion is saturated as long as
82 * there is still an owner. In ulock wake, if the waker is still the owner,
83 * then it clears its ownership and drops the boost. It does NOT transfer
84 * ownership/priority boost to the new thread. Instead, it selects the
85 * waiting thread with the highest base priority to be woken next, and
86 * relies on that thread to carry the torch for the other waiting threads.
89 static lck_grp_t
*ull_lck_grp
;
90 static lck_mtx_t ull_table_lock
;
92 #define ull_global_lock() lck_mtx_lock(&ull_table_lock)
93 #define ull_global_unlock() lck_mtx_unlock(&ull_table_lock)
95 #define ull_lock(ull) lck_mtx_lock(&ull->ull_lock)
96 #define ull_unlock(ull) lck_mtx_unlock(&ull->ull_lock)
97 #define ull_assert_owned(ull) LCK_MTX_ASSERT(&ull->ull_lock, LCK_MTX_ASSERT_OWNED)
99 #define ULOCK_TO_EVENT(ull) ((event_t)ull)
100 #define EVENT_TO_ULOCK(event) ((ull_t *)event)
102 typedef struct __attribute__((packed
)) {
103 user_addr_t ulk_addr
;
108 ull_key_match(ulk_t
*a
, ulk_t
*b
)
110 return ((a
->ulk_pid
== b
->ulk_pid
) &&
111 (a
->ulk_addr
== b
->ulk_addr
));
116 * ull_owner is the most recent known value for the owner of this ulock
117 * i.e. it may be out of date WRT the real value in userspace.
119 thread_t ull_owner
; /* holds +1 thread reference */
123 int32_t ull_nwaiters
;
124 int32_t ull_max_nwaiters
;
125 int32_t ull_refcount
;
126 struct promote_token ull_promote_token
;
127 queue_chain_t ull_hash_link
;
131 static const bool ull_debug
= false;
133 extern void ulock_initialize(void);
135 #define ULL_MUST_EXIST 0x0001
136 static ull_t
*ull_get(ulk_t
*, uint32_t);
137 static void ull_put(ull_t
*);
139 static thread_t
ull_promote_owner_locked(ull_t
* ull
, thread_t thread
);
141 #if DEVELOPMENT || DEBUG
142 static int ull_simulate_copyin_fault
= 0;
147 kprintf("ull\t%p\n", ull
);
148 kprintf("ull_key.ulk_pid\t%d\n", ull
->ull_key
.ulk_pid
);
149 kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull
->ull_key
.ulk_addr
));
150 kprintf("ull_saved_key.ulk_pid\t%d\n", ull
->ull_saved_key
.ulk_pid
);
151 kprintf("ull_saved_key.ulk_addr\t%p\n", (void *)(ull
->ull_saved_key
.ulk_addr
));
152 kprintf("ull_nwaiters\t%d\n", ull
->ull_nwaiters
);
153 kprintf("ull_max_nwaiters\t%d\n", ull
->ull_max_nwaiters
);
154 kprintf("ull_refcount\t%d\n", ull
->ull_refcount
);
155 kprintf("ull_opcode\t%d\n\n", ull
->ull_opcode
);
156 kprintf("ull_owner\t0x%llx\n\n", thread_tid(ull
->ull_owner
));
157 kprintf("ull_promote_token\t%d, %d\n\n", ull
->ull_promote_token
.pt_basepri
, ull
->ull_promote_token
.pt_qos
);
161 static int ull_hash_buckets
;
162 static queue_head_t
*ull_bucket
;
163 static uint32_t ull_nzalloc
= 0;
164 static zone_t ull_zone
;
166 static __inline__
uint32_t
167 ull_hash_index(char *key
, size_t length
)
169 uint32_t hash
= jenkins_hash(key
, length
);
171 hash
&= (ull_hash_buckets
- 1);
176 /* Ensure that the key structure is packed,
177 * so that no undefined memory is passed to
180 static_assert(sizeof(ulk_t
) == sizeof(user_addr_t
) + sizeof(pid_t
));
182 #define ULL_INDEX(keyp) ull_hash_index((char *)keyp, sizeof *keyp)
185 ulock_initialize(void)
187 ull_lck_grp
= lck_grp_alloc_init("ulocks", NULL
);
188 lck_mtx_init(&ull_table_lock
, ull_lck_grp
, NULL
);
190 assert(thread_max
> 16);
191 /* Size ull_hash_buckets based on thread_max.
192 * Round up to nearest power of 2, then divide by 4
194 ull_hash_buckets
= (1 << (bit_ceiling(thread_max
) - 2));
196 kprintf("%s>thread_max=%d, ull_hash_buckets=%d\n", __FUNCTION__
, thread_max
, ull_hash_buckets
);
197 assert(ull_hash_buckets
>= thread_max
/4);
199 ull_bucket
= (queue_head_t
*)kalloc(sizeof(queue_head_t
) * ull_hash_buckets
);
200 assert(ull_bucket
!= NULL
);
202 for (int i
= 0; i
< ull_hash_buckets
; i
++) {
203 queue_init(&ull_bucket
[i
]);
206 ull_zone
= zinit(sizeof(ull_t
),
207 thread_max
* sizeof(ull_t
),
210 zone_change(ull_zone
, Z_NOENCRYPT
, TRUE
);
213 #if DEVELOPMENT || DEBUG
214 /* Count the number of hash entries for a given pid.
215 * if pid==0, dump the whole table.
218 ull_hash_dump(pid_t pid
)
223 kprintf("%s>total number of ull_t allocated %d\n", __FUNCTION__
, ull_nzalloc
);
224 kprintf("%s>BEGIN\n", __FUNCTION__
);
226 for (int i
= 0; i
< ull_hash_buckets
; i
++) {
227 if (!queue_empty(&ull_bucket
[i
])) {
230 kprintf("%s>index %d:\n", __FUNCTION__
, i
);
232 qe_foreach_element(elem
, &ull_bucket
[i
], ull_hash_link
) {
233 if ((pid
== 0) || (pid
== elem
->ull_key
.ulk_pid
)) {
241 kprintf("%s>END\n", __FUNCTION__
);
250 ull_alloc(ulk_t
*key
)
252 ull_t
*ull
= (ull_t
*)zalloc(ull_zone
);
255 ull
->ull_refcount
= 1;
257 ull
->ull_saved_key
= *key
;
258 ull
->ull_nwaiters
= 0;
259 ull
->ull_max_nwaiters
= 0;
262 ull
->ull_owner
= THREAD_NULL
;
263 ull
->ull_promote_token
= PROMOTE_TOKEN_INIT
;
265 lck_mtx_init(&ull
->ull_lock
, ull_lck_grp
, NULL
);
274 assert(ull
->ull_owner
== THREAD_NULL
);
276 LCK_MTX_ASSERT(&ull
->ull_lock
, LCK_ASSERT_NOTOWNED
);
278 lck_mtx_destroy(&ull
->ull_lock
, ull_lck_grp
);
280 zfree(ull_zone
, ull
);
283 /* Finds an existing ulock structure (ull_t), or creates a new one.
284 * If MUST_EXIST flag is set, returns NULL instead of creating a new one.
285 * The ulock structure is returned with ull_lock locked
287 * TODO: Per-bucket lock to reduce contention on global lock
290 ull_get(ulk_t
*key
, uint32_t flags
)
293 uint i
= ULL_INDEX(key
);
296 qe_foreach_element(elem
, &ull_bucket
[i
], ull_hash_link
) {
298 if (ull_key_match(&elem
->ull_key
, key
)) {
306 if (flags
& ULL_MUST_EXIST
) {
307 /* Must already exist (called from wake) */
312 /* NRG maybe drop the ull_global_lock before the kalloc,
313 * then take the lock and check again for a key match
314 * and either use the new ull_t or free it.
317 ull
= ull_alloc(key
);
326 enqueue(&ull_bucket
[i
], &ull
->ull_hash_link
);
333 return ull
; /* still locked */
337 * Must be called with ull_lock held
342 ull_assert_owned(ull
);
343 int refcount
= --ull
->ull_refcount
;
344 assert(refcount
== 0 ? (ull
->ull_key
.ulk_pid
== 0 && ull
->ull_key
.ulk_addr
== 0) : 1);
352 remqueue(&ull
->ull_hash_link
);
355 #if DEVELOPMENT || DEBUG
357 kprintf("%s>", __FUNCTION__
);
365 ulock_wait(struct proc
*p
, struct ulock_wait_args
*args
, int32_t *retval
)
367 uint opcode
= args
->operation
& UL_OPCODE_MASK
;
368 uint flags
= args
->operation
& UL_FLAGS_MASK
;
370 thread_t self
= current_thread();
371 int id
= thread_tid(self
);
374 /* involved threads - each variable holds +1 ref if not null */
375 thread_t owner_thread
= THREAD_NULL
;
376 thread_t old_owner
= THREAD_NULL
;
377 thread_t old_lingering_owner
= THREAD_NULL
;
378 sched_call_t workq_callback
= NULL
;
381 kprintf("[%d]%s>ENTER opcode %d addr %llx value %llx timeout %d flags %x\n", id
, __FUNCTION__
, opcode
, (unsigned long long)(args
->addr
), args
->value
, args
->timeout
, flags
);
384 if ((flags
& ULF_WAIT_MASK
) != flags
) {
389 boolean_t set_owner
= FALSE
;
395 case UL_COMPARE_AND_WAIT
:
399 kprintf("[%d]%s>EINVAL opcode %d addr 0x%llx flags 0x%x\n",
400 id
, __FUNCTION__
, opcode
,
401 (unsigned long long)(args
->addr
), flags
);
407 /* 32-bit lock type for UL_COMPARE_AND_WAIT and UL_UNFAIR_LOCK */
410 if ((args
->addr
== 0) || (args
->addr
% _Alignof(_Atomic(typeof(value
))))) {
415 key
.ulk_pid
= p
->p_pid
;
416 key
.ulk_addr
= args
->addr
;
418 if (flags
& ULF_WAIT_WORKQ_DATA_CONTENTION
) {
419 workq_callback
= workqueue_get_sched_callback();
420 workq_callback
= thread_disable_sched_call(self
, workq_callback
);
423 ull_t
*ull
= ull_get(&key
, 0);
432 if (ull
->ull_nwaiters
> ull
->ull_max_nwaiters
) {
433 ull
->ull_max_nwaiters
= ull
->ull_nwaiters
;
436 if (ull
->ull_opcode
== 0) {
437 ull
->ull_opcode
= opcode
;
438 } else if (ull
->ull_opcode
!= opcode
) {
445 * We don't want this copyin to get wedged behind VM operations,
446 * but we have to read the userspace value under the ull lock for correctness.
448 * Until <rdar://problem/24999882> exists,
449 * fake it by disabling preemption across copyin, which forces any
450 * vm_fault we encounter to fail.
452 uint64_t val64
; /* copyin_word always zero-extends to 64-bits */
454 disable_preemption();
455 int copy_ret
= copyin_word(args
->addr
, &val64
, sizeof(value
));
458 value
= (uint32_t)val64
;
460 #if DEVELOPMENT || DEBUG
461 /* Occasionally simulate copyin finding the user address paged out */
462 if (((ull_simulate_copyin_fault
== p
->p_pid
) || (ull_simulate_copyin_fault
== 1)) && (copy_ret
== 0)) {
463 static _Atomic
int fault_inject
= 0;
464 if (__c11_atomic_fetch_add(&fault_inject
, 1, __ATOMIC_RELAXED
) % 73 == 0) {
472 /* copyin() will return an error if the access to the user addr would have faulted,
473 * so just return and let the user level code fault it in.
479 if (value
!= args
->value
) {
480 /* Lock value has changed from expected so bail out */
483 kprintf("[%d]%s>Lock value %d has changed from expected %d so bail out\n",
484 id
, __FUNCTION__
, value
, (uint32_t)(args
->value
));
490 mach_port_name_t owner_name
= ulock_owner_value_to_port_name(args
->value
);
491 owner_thread
= port_name_to_thread_for_ulock(owner_name
);
493 /* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */
494 if (owner_name
!= MACH_PORT_DEAD
&& owner_thread
== THREAD_NULL
) {
496 * Translation failed - even though the lock value is up to date,
497 * whatever was stored in the lock wasn't actually a thread port.
503 /* owner_thread has a +1 reference */
506 * At this point, I know:
507 * a) owner_thread is definitely the current owner, because I just read the value
508 * b) owner_thread is either:
509 * i) holding the user lock or
510 * ii) has just unlocked the user lock after I looked
511 * and is heading toward the kernel to call ull_wake.
512 * If so, it's going to have to wait for the ull mutex.
514 * Therefore, I can promote its priority to match mine, and I can rely on it to
515 * come by later to issue the wakeup and lose its promotion.
518 old_owner
= ull_promote_owner_locked(ull
, owner_thread
);
522 uint32_t timeout
= args
->timeout
;
523 thread_set_pending_block_hint(self
, kThreadWaitUserLock
);
525 wr
= assert_wait_timeout(ULOCK_TO_EVENT(ull
), THREAD_ABORTSAFE
, timeout
, NSEC_PER_USEC
);
527 wr
= assert_wait(ULOCK_TO_EVENT(ull
), THREAD_ABORTSAFE
);
533 kprintf("[%d]%s>after assert_wait() returned %d\n", id
, __FUNCTION__
, wr
);
536 if (set_owner
&& owner_thread
!= THREAD_NULL
&& wr
== THREAD_WAITING
) {
537 wr
= thread_handoff(owner_thread
);
538 /* owner_thread ref is consumed */
539 owner_thread
= THREAD_NULL
;
541 /* NRG At some point this should be a continuation based block, so that we can avoid saving the full kernel context. */
542 wr
= thread_block(NULL
);
545 kprintf("[%d]%s>thread_block() returned %d\n", id
, __FUNCTION__
, wr
);
548 case THREAD_AWAKENED
:
550 case THREAD_TIMED_OUT
:
553 case THREAD_INTERRUPTED
:
562 *retval
= --ull
->ull_nwaiters
;
563 if (ull
->ull_nwaiters
== 0) {
565 * If the wait was canceled early, we might need to
566 * clear out the lingering owner reference before
569 if (ull
->ull_owner
!= THREAD_NULL
) {
570 old_lingering_owner
= ull_promote_owner_locked(ull
, THREAD_NULL
);
573 assert(ull
->ull_owner
== THREAD_NULL
);
575 ull
->ull_key
.ulk_pid
= 0;
576 ull
->ull_key
.ulk_addr
= 0;
578 assert(ull
->ull_refcount
> 0);
582 if (owner_thread
!= THREAD_NULL
) {
583 thread_deallocate(owner_thread
);
586 if (old_owner
!= THREAD_NULL
) {
587 thread_deallocate(old_owner
);
590 if (old_lingering_owner
!= THREAD_NULL
) {
591 thread_deallocate(old_lingering_owner
);
594 assert(*retval
>= 0);
597 if (workq_callback
) {
598 thread_reenable_sched_call(self
, workq_callback
);
601 if ((flags
& ULF_NO_ERRNO
) && (ret
!= 0)) {
609 ulock_wake(struct proc
*p
, struct ulock_wake_args
*args
, __unused
int32_t *retval
)
611 uint opcode
= args
->operation
& UL_OPCODE_MASK
;
612 uint flags
= args
->operation
& UL_FLAGS_MASK
;
614 int id
= thread_tid(current_thread());
617 /* involved threads - each variable holds +1 ref if not null */
618 thread_t wake_thread
= THREAD_NULL
;
619 thread_t old_owner
= THREAD_NULL
;
622 kprintf("[%d]%s>ENTER opcode %d addr %llx flags %x\n",
623 id
, __FUNCTION__
, opcode
, (unsigned long long)(args
->addr
), flags
);
626 if ((flags
& ULF_WAKE_MASK
) != flags
) {
631 #if DEVELOPMENT || DEBUG
632 if (opcode
== UL_DEBUG_HASH_DUMP_PID
) {
633 *retval
= ull_hash_dump(p
->p_pid
);
635 } else if (opcode
== UL_DEBUG_HASH_DUMP_ALL
) {
636 *retval
= ull_hash_dump(0);
638 } else if (opcode
== UL_DEBUG_SIMULATE_COPYIN_FAULT
) {
639 ull_simulate_copyin_fault
= (int)(args
->wake_value
);
644 if (args
->addr
== 0) {
649 if (flags
& ULF_WAKE_THREAD
) {
650 if (flags
& ULF_WAKE_ALL
) {
654 mach_port_name_t wake_thread_name
= (mach_port_name_t
)(args
->wake_value
);
655 wake_thread
= port_name_to_thread_for_ulock(wake_thread_name
);
656 if (wake_thread
== THREAD_NULL
) {
662 key
.ulk_pid
= p
->p_pid
;
663 key
.ulk_addr
= args
->addr
;
665 ull_t
*ull
= ull_get(&key
, ULL_MUST_EXIST
);
667 if (wake_thread
!= THREAD_NULL
) {
668 thread_deallocate(wake_thread
);
675 boolean_t clear_owner
= FALSE
; /* need to reset owner */
681 case UL_COMPARE_AND_WAIT
:
685 kprintf("[%d]%s>EINVAL opcode %d addr 0x%llx flags 0x%x\n",
686 id
, __FUNCTION__
, opcode
, (unsigned long long)(args
->addr
), flags
);
692 if (opcode
!= ull
->ull_opcode
) {
694 kprintf("[%d]%s>EDOM - opcode mismatch - opcode %d addr 0x%llx flags 0x%x\n",
695 id
, __FUNCTION__
, opcode
, (unsigned long long)(args
->addr
), flags
);
702 assert(ull
->ull_owner
== THREAD_NULL
);
705 if (flags
& ULF_WAKE_ALL
) {
706 thread_wakeup(ULOCK_TO_EVENT(ull
));
707 } else if (flags
& ULF_WAKE_THREAD
) {
708 kern_return_t kr
= thread_wakeup_thread(ULOCK_TO_EVENT(ull
), wake_thread
);
709 if (kr
!= KERN_SUCCESS
) {
710 assert(kr
== KERN_NOT_WAITING
);
715 * TODO: WAITQ_SELECT_MAX_PRI forces a linear scan of the (hashed) global waitq.
716 * Move to a ulock-private, priority sorted waitq (i.e. SYNC_POLICY_FIXED_PRIORITY) to avoid that.
718 * TODO: 'owner is not current_thread (or null)' likely means we can avoid this wakeup
719 * <rdar://problem/25487001>
721 thread_wakeup_one_with_pri(ULOCK_TO_EVENT(ull
), WAITQ_SELECT_MAX_PRI
);
725 * Reaching this point means I previously moved the lock to 'unowned' state in userspace.
726 * Therefore I need to relinquish my promotion.
728 * However, someone else could have locked it after I unlocked, and then had a third thread
729 * block on the lock, causing a promotion of some other owner.
731 * I don't want to stomp over that, so only remove the promotion if I'm the current owner.
734 if (ull
->ull_owner
== current_thread()) {
735 old_owner
= ull_promote_owner_locked(ull
, THREAD_NULL
);
741 if (wake_thread
!= THREAD_NULL
) {
742 thread_deallocate(wake_thread
);
745 if (old_owner
!= THREAD_NULL
) {
746 thread_deallocate(old_owner
);
750 if ((flags
& ULF_NO_ERRNO
) && (ret
!= 0)) {
758 * Change ull_owner to be new_owner, and update it with the properties
759 * of the current thread.
761 * Records the highest current promotion value in ull_promote_token, and applies that
764 * Returns +1 ref to the old ull_owner if it is going away.
767 ull_promote_owner_locked(ull_t
* ull
,
770 if (new_owner
!= THREAD_NULL
&& ull
->ull_owner
== new_owner
) {
771 thread_user_promotion_update(new_owner
, current_thread(), &ull
->ull_promote_token
);
775 thread_t old_owner
= ull
->ull_owner
;
776 ull
->ull_owner
= THREAD_NULL
;
778 if (new_owner
!= THREAD_NULL
) {
779 /* The ull_owner field now owns a +1 ref on thread */
780 thread_reference(new_owner
);
781 ull
->ull_owner
= new_owner
;
783 thread_user_promotion_add(new_owner
, current_thread(), &ull
->ull_promote_token
);
785 /* No new owner - clear the saturated promotion value */
786 ull
->ull_promote_token
= PROMOTE_TOKEN_INIT
;
789 if (old_owner
!= THREAD_NULL
) {
790 thread_user_promotion_drop(old_owner
);
793 /* Return the +1 ref from the ull_owner field */
798 kdp_ulock_find_owner(__unused
struct waitq
* waitq
, event64_t event
, thread_waitinfo_t
* waitinfo
)
800 ull_t
*ull
= EVENT_TO_ULOCK(event
);
801 assert(kdp_is_in_zone(ull
, "ulocks"));
803 if (ull
->ull_opcode
== UL_UNFAIR_LOCK
) {// owner is only set if it's an os_unfair_lock
804 waitinfo
->owner
= thread_tid(ull
->ull_owner
);
805 waitinfo
->context
= ull
->ull_key
.ulk_addr
;
806 } else if (ull
->ull_opcode
== UL_COMPARE_AND_WAIT
) { // otherwise, this is a spinlock
808 waitinfo
->context
= ull
->ull_key
.ulk_addr
;
810 panic("%s: Invalid ulock opcode %d addr %p", __FUNCTION__
, ull
->ull_opcode
, (void*)ull
);