2 * Copyright (c) 2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/ioctl.h>
32 #include <sys/file_internal.h>
33 #include <sys/proc_internal.h>
34 #include <sys/kernel.h>
35 #include <sys/guarded.h>
37 #include <sys/malloc.h>
38 #include <sys/sysproto.h>
39 #include <sys/pthread_shims.h>
41 #include <mach/mach_types.h>
43 #include <kern/cpu_data.h>
44 #include <kern/mach_param.h>
45 #include <kern/kern_types.h>
46 #include <kern/assert.h>
47 #include <kern/kalloc.h>
48 #include <kern/thread.h>
49 #include <kern/clock.h>
50 #include <kern/ledger.h>
51 #include <kern/policy_internal.h>
52 #include <kern/task.h>
53 #include <kern/telemetry.h>
54 #include <kern/waitq.h>
55 #include <kern/sched_prim.h>
56 #include <kern/zalloc.h>
58 #include <pexpert/pexpert.h>
60 #define XNU_TEST_BITMAP
61 #include <kern/bits.h>
63 #include <sys/ulock.h>
66 * How ulock promotion works:
68 * There’s a requested policy field on every thread called ‘promotions’, which
69 * expresses which ulock promotions are happening to this thread.
70 * The promotion priority saturates until the promotion count goes to 0.
72 * We also track effective promotion qos, which is the qos before clamping.
73 * This value is used for promoting a thread that another thread is waiting on,
74 * so that the lock owner reinflates to the right priority after unclamping.
76 * This also works for non-QoS threads, which can donate base priority to QoS
77 * and non-QoS threads alike.
79 * ulock wait applies a promotion to the owner communicated through
80 * UL_UNFAIR_LOCK as waiters block, and that promotion is saturated as long as
81 * there is still an owner. In ulock wake, if the waker is still the owner,
82 * then it clears its ownership and drops the boost. It does NOT transfer
83 * ownership/priority boost to the new thread. Instead, it selects the
84 * waiting thread with the highest base priority to be woken next, and
85 * relies on that thread to carry the torch for the other waiting threads.
88 static lck_grp_t
*ull_lck_grp
;
89 static lck_mtx_t ull_table_lock
;
91 #define ull_global_lock() lck_mtx_lock(&ull_table_lock)
92 #define ull_global_unlock() lck_mtx_unlock(&ull_table_lock)
94 #define ull_lock(ull) lck_mtx_lock(&ull->ull_lock)
95 #define ull_unlock(ull) lck_mtx_unlock(&ull->ull_lock)
96 #define ull_assert_owned(ull) LCK_MTX_ASSERT(&ull->ull_lock, LCK_MTX_ASSERT_OWNED)
98 typedef struct __attribute__((packed
)) {
104 ull_key_match(ulk_t
*a
, ulk_t
*b
)
106 return ((a
->ulk_pid
== b
->ulk_pid
) &&
107 (a
->ulk_addr
== b
->ulk_addr
));
112 * ull_owner is the most recent known value for the owner of this ulock
113 * i.e. it may be out of date WRT the real value in userspace.
115 thread_t ull_owner
; /* holds +1 thread reference */
119 int32_t ull_nwaiters
;
120 int32_t ull_max_nwaiters
;
121 int32_t ull_refcount
;
122 struct promote_token ull_promote_token
;
123 queue_chain_t ull_hash_link
;
127 static const bool ull_debug
= false;
129 extern void ulock_initialize(void);
131 #define ULL_MUST_EXIST 0x0001
132 static ull_t
*ull_get(ulk_t
*, uint32_t);
133 static void ull_put(ull_t
*);
135 static thread_t
ull_promote_owner_locked(ull_t
* ull
, thread_t thread
);
137 #if DEVELOPMENT || DEBUG
138 static int ull_simulate_copyin_fault
= 0;
139 static int ull_panic_on_corruption
= 0;
144 kprintf("ull\t%p\n", ull
);
145 kprintf("ull_key.ulk_pid\t%d\n", ull
->ull_key
.ulk_pid
);
146 kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull
->ull_key
.ulk_addr
));
147 kprintf("ull_saved_key.ulk_pid\t%d\n", ull
->ull_saved_key
.ulk_pid
);
148 kprintf("ull_saved_key.ulk_addr\t%p\n", (void *)(ull
->ull_saved_key
.ulk_addr
));
149 kprintf("ull_nwaiters\t%d\n", ull
->ull_nwaiters
);
150 kprintf("ull_max_nwaiters\t%d\n", ull
->ull_max_nwaiters
);
151 kprintf("ull_refcount\t%d\n", ull
->ull_refcount
);
152 kprintf("ull_opcode\t%d\n\n", ull
->ull_opcode
);
153 kprintf("ull_owner\t0x%llx\n\n", thread_tid(ull
->ull_owner
));
154 kprintf("ull_promote_token\t%d, %d\n\n", ull
->ull_promote_token
.pt_basepri
, ull
->ull_promote_token
.pt_qos
);
158 static int ull_hash_buckets
;
159 static queue_head_t
*ull_bucket
;
160 static uint32_t ull_nzalloc
= 0;
161 static zone_t ull_zone
;
163 static __inline__
uint32_t
164 ull_hash_index(char *key
, size_t length
)
166 uint32_t hash
= jenkins_hash(key
, length
);
168 hash
&= (ull_hash_buckets
- 1);
173 /* Ensure that the key structure is packed,
174 * so that no undefined memory is passed to
177 static_assert(sizeof(ulk_t
) == sizeof(user_addr_t
) + sizeof(pid_t
));
179 #define ULL_INDEX(keyp) ull_hash_index((char *)keyp, sizeof *keyp)
182 ulock_initialize(void)
184 ull_lck_grp
= lck_grp_alloc_init("ulocks", NULL
);
185 lck_mtx_init(&ull_table_lock
, ull_lck_grp
, NULL
);
187 assert(thread_max
> 16);
188 /* Size ull_hash_buckets based on thread_max.
189 * Round up to nearest power of 2, then divide by 4
191 ull_hash_buckets
= (1 << (bit_ceiling(thread_max
) - 2));
193 kprintf("%s>thread_max=%d, ull_hash_buckets=%d\n", __FUNCTION__
, thread_max
, ull_hash_buckets
);
194 assert(ull_hash_buckets
>= thread_max
/4);
196 ull_bucket
= (queue_head_t
*)kalloc(sizeof(queue_head_t
) * ull_hash_buckets
);
197 assert(ull_bucket
!= NULL
);
199 for (int i
= 0; i
< ull_hash_buckets
; i
++) {
200 queue_init(&ull_bucket
[i
]);
203 ull_zone
= zinit(sizeof(ull_t
),
204 thread_max
* sizeof(ull_t
),
207 zone_change(ull_zone
, Z_NOENCRYPT
, TRUE
);
209 #if DEVELOPMENT || DEBUG
210 if (!PE_parse_boot_argn("ulock_panic_on_corruption",
211 &ull_panic_on_corruption
, sizeof(ull_panic_on_corruption
))) {
212 ull_panic_on_corruption
= 0;
217 #if DEVELOPMENT || DEBUG
218 /* Count the number of hash entries for a given pid.
219 * if pid==0, dump the whole table.
222 ull_hash_dump(pid_t pid
)
227 kprintf("%s>total number of ull_t allocated %d\n", __FUNCTION__
, ull_nzalloc
);
228 kprintf("%s>BEGIN\n", __FUNCTION__
);
230 for (int i
= 0; i
< ull_hash_buckets
; i
++) {
231 if (!queue_empty(&ull_bucket
[i
])) {
234 kprintf("%s>index %d:\n", __FUNCTION__
, i
);
236 qe_foreach_element(elem
, &ull_bucket
[i
], ull_hash_link
) {
237 if ((pid
== 0) || (pid
== elem
->ull_key
.ulk_pid
)) {
245 kprintf("%s>END\n", __FUNCTION__
);
254 ull_alloc(ulk_t
*key
)
256 ull_t
*ull
= (ull_t
*)zalloc(ull_zone
);
259 ull
->ull_refcount
= 1;
261 ull
->ull_saved_key
= *key
;
262 ull
->ull_nwaiters
= 0;
263 ull
->ull_max_nwaiters
= 0;
266 ull
->ull_owner
= THREAD_NULL
;
267 ull
->ull_promote_token
= PROMOTE_TOKEN_INIT
;
269 lck_mtx_init(&ull
->ull_lock
, ull_lck_grp
, NULL
);
278 assert(ull
->ull_owner
== THREAD_NULL
);
280 lck_mtx_assert(&ull
->ull_lock
, LCK_ASSERT_NOTOWNED
);
282 lck_mtx_destroy(&ull
->ull_lock
, ull_lck_grp
);
284 zfree(ull_zone
, ull
);
287 /* Finds an existing ulock structure (ull_t), or creates a new one.
288 * If MUST_EXIST flag is set, returns NULL instead of creating a new one.
289 * The ulock structure is returned with ull_lock locked
291 * TODO: Per-bucket lock to reduce contention on global lock
294 ull_get(ulk_t
*key
, uint32_t flags
)
297 uint i
= ULL_INDEX(key
);
300 qe_foreach_element(elem
, &ull_bucket
[i
], ull_hash_link
) {
302 if (ull_key_match(&elem
->ull_key
, key
)) {
310 if (flags
& ULL_MUST_EXIST
) {
311 /* Must already exist (called from wake) */
316 /* NRG maybe drop the ull_global_lock before the kalloc,
317 * then take the lock and check again for a key match
318 * and either use the new ull_t or free it.
321 ull
= ull_alloc(key
);
330 enqueue(&ull_bucket
[i
], &ull
->ull_hash_link
);
337 return ull
; /* still locked */
341 * Must be called with ull_lock held
346 ull_assert_owned(ull
);
347 int refcount
= --ull
->ull_refcount
;
348 assert(refcount
== 0 ? (ull
->ull_key
.ulk_pid
== 0 && ull
->ull_key
.ulk_addr
== 0) : 1);
356 remqueue(&ull
->ull_hash_link
);
359 #if DEVELOPMENT || DEBUG
361 kprintf("%s>", __FUNCTION__
);
369 ulock_wait(struct proc
*p
, struct ulock_wait_args
*args
, int32_t *retval
)
371 uint opcode
= args
->operation
& UL_OPCODE_MASK
;
372 uint flags
= args
->operation
& UL_FLAGS_MASK
;
374 thread_t self
= current_thread();
375 int id
= thread_tid(self
);
378 /* involved threads - each variable holds +1 ref if not null */
379 thread_t owner_thread
= THREAD_NULL
;
380 thread_t old_owner
= THREAD_NULL
;
381 thread_t old_lingering_owner
= THREAD_NULL
;
382 sched_call_t workq_callback
= NULL
;
385 kprintf("[%d]%s>ENTER opcode %d addr %llx value %llx timeout %d flags %x\n", id
, __FUNCTION__
, opcode
, (unsigned long long)(args
->addr
), args
->value
, args
->timeout
, flags
);
388 if ((flags
& ULF_WAIT_MASK
) != flags
) {
393 boolean_t set_owner
= FALSE
;
399 case UL_COMPARE_AND_WAIT
:
403 kprintf("[%d]%s>EINVAL opcode %d addr 0x%llx flags 0x%x\n",
404 id
, __FUNCTION__
, opcode
,
405 (unsigned long long)(args
->addr
), flags
);
411 /* 32-bit lock type for UL_COMPARE_AND_WAIT and UL_UNFAIR_LOCK */
414 if ((args
->addr
== 0) || (args
->addr
% _Alignof(_Atomic(typeof(value
))))) {
419 key
.ulk_pid
= p
->p_pid
;
420 key
.ulk_addr
= args
->addr
;
422 if (flags
& ULF_WAIT_WORKQ_DATA_CONTENTION
) {
423 workq_callback
= workqueue_get_sched_callback();
424 workq_callback
= thread_disable_sched_call(self
, workq_callback
);
427 ull_t
*ull
= ull_get(&key
, 0);
436 if (ull
->ull_nwaiters
> ull
->ull_max_nwaiters
) {
437 ull
->ull_max_nwaiters
= ull
->ull_nwaiters
;
440 if (ull
->ull_opcode
== 0) {
441 ull
->ull_opcode
= opcode
;
442 } else if (ull
->ull_opcode
!= opcode
) {
449 * We don't want this copyin to get wedged behind VM operations,
450 * but we have to read the userspace value under the ull lock for correctness.
452 * Until <rdar://problem/24999882> exists,
453 * fake it by disabling preemption across copyin, which forces any
454 * vm_fault we encounter to fail.
456 uint64_t val64
; /* copyin_word always zero-extends to 64-bits */
458 disable_preemption();
459 int copy_ret
= copyin_word(args
->addr
, &val64
, sizeof(value
));
462 value
= (uint32_t)val64
;
464 #if DEVELOPMENT || DEBUG
465 /* Occasionally simulate copyin finding the user address paged out */
466 if (((ull_simulate_copyin_fault
== p
->p_pid
) || (ull_simulate_copyin_fault
== 1)) && (copy_ret
== 0)) {
467 static _Atomic
int fault_inject
= 0;
468 if (__c11_atomic_fetch_add(&fault_inject
, 1, __ATOMIC_RELAXED
) % 73 == 0) {
476 /* copyin() will return an error if the access to the user addr would have faulted,
477 * so just return and let the user level code fault it in.
483 if (value
!= args
->value
) {
484 /* Lock value has changed from expected so bail out */
487 kprintf("[%d]%s>Lock value %d has changed from expected %d so bail out\n",
488 id
, __FUNCTION__
, value
, (uint32_t)(args
->value
));
494 mach_port_name_t owner_name
= ulock_owner_value_to_port_name(args
->value
);
495 owner_thread
= port_name_to_thread_for_ulock(owner_name
);
497 /* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */
498 if (owner_name
!= MACH_PORT_DEAD
&& owner_thread
== THREAD_NULL
) {
499 #if DEBUG || DEVELOPMENT
500 if (ull_panic_on_corruption
) {
501 if (flags
& ULF_NO_ERRNO
) {
502 // ULF_NO_ERRNO is used by libplatform ulocks, but not libdispatch ones.
503 // Don't panic on libdispatch ulock corruptions; the userspace likely
504 // mismanaged a dispatch queue.
505 panic("ulock_wait: ulock is corrupted; value=0x%x, ull=%p",
506 (uint32_t)(args
->value
), ull
);
511 * Translation failed - even though the lock value is up to date,
512 * whatever was stored in the lock wasn't actually a thread port.
518 /* owner_thread has a +1 reference */
521 * At this point, I know:
522 * a) owner_thread is definitely the current owner, because I just read the value
523 * b) owner_thread is either:
524 * i) holding the user lock or
525 * ii) has just unlocked the user lock after I looked
526 * and is heading toward the kernel to call ull_wake.
527 * If so, it's going to have to wait for the ull mutex.
529 * Therefore, I can promote its priority to match mine, and I can rely on it to
530 * come by later to issue the wakeup and lose its promotion.
533 old_owner
= ull_promote_owner_locked(ull
, owner_thread
);
537 uint32_t timeout
= args
->timeout
;
539 wr
= assert_wait_timeout((event_t
)ull
, THREAD_ABORTSAFE
, timeout
, NSEC_PER_USEC
);
541 wr
= assert_wait((event_t
)ull
, THREAD_ABORTSAFE
);
547 kprintf("[%d]%s>after assert_wait() returned %d\n", id
, __FUNCTION__
, wr
);
550 if (set_owner
&& owner_thread
!= THREAD_NULL
&& wr
== THREAD_WAITING
) {
551 wr
= thread_handoff(owner_thread
);
552 /* owner_thread ref is consumed */
553 owner_thread
= THREAD_NULL
;
555 /* NRG At some point this should be a continuation based block, so that we can avoid saving the full kernel context. */
556 wr
= thread_block(NULL
);
559 kprintf("[%d]%s>thread_block() returned %d\n", id
, __FUNCTION__
, wr
);
562 case THREAD_AWAKENED
:
564 case THREAD_TIMED_OUT
:
567 case THREAD_INTERRUPTED
:
576 *retval
= --ull
->ull_nwaiters
;
577 if (ull
->ull_nwaiters
== 0) {
579 * If the wait was canceled early, we might need to
580 * clear out the lingering owner reference before
583 if (ull
->ull_owner
!= THREAD_NULL
) {
584 old_lingering_owner
= ull_promote_owner_locked(ull
, THREAD_NULL
);
587 assert(ull
->ull_owner
== THREAD_NULL
);
589 ull
->ull_key
.ulk_pid
= 0;
590 ull
->ull_key
.ulk_addr
= 0;
592 assert(ull
->ull_refcount
> 0);
596 if (owner_thread
!= THREAD_NULL
) {
597 thread_deallocate(owner_thread
);
600 if (old_owner
!= THREAD_NULL
) {
601 thread_deallocate(old_owner
);
604 if (old_lingering_owner
!= THREAD_NULL
) {
605 thread_deallocate(old_lingering_owner
);
608 assert(*retval
>= 0);
611 if (workq_callback
) {
612 thread_reenable_sched_call(self
, workq_callback
);
615 if ((flags
& ULF_NO_ERRNO
) && (ret
!= 0)) {
623 ulock_wake(struct proc
*p
, struct ulock_wake_args
*args
, __unused
int32_t *retval
)
625 uint opcode
= args
->operation
& UL_OPCODE_MASK
;
626 uint flags
= args
->operation
& UL_FLAGS_MASK
;
628 int id
= thread_tid(current_thread());
631 /* involved threads - each variable holds +1 ref if not null */
632 thread_t wake_thread
= THREAD_NULL
;
633 thread_t old_owner
= THREAD_NULL
;
636 kprintf("[%d]%s>ENTER opcode %d addr %llx flags %x\n",
637 id
, __FUNCTION__
, opcode
, (unsigned long long)(args
->addr
), flags
);
640 if ((flags
& ULF_WAKE_MASK
) != flags
) {
645 #if DEVELOPMENT || DEBUG
646 if (opcode
== UL_DEBUG_HASH_DUMP_PID
) {
647 *retval
= ull_hash_dump(p
->p_pid
);
649 } else if (opcode
== UL_DEBUG_HASH_DUMP_ALL
) {
650 *retval
= ull_hash_dump(0);
652 } else if (opcode
== UL_DEBUG_SIMULATE_COPYIN_FAULT
) {
653 ull_simulate_copyin_fault
= (int)(args
->wake_value
);
658 if (args
->addr
== 0) {
663 if (flags
& ULF_WAKE_THREAD
) {
664 if (flags
& ULF_WAKE_ALL
) {
668 mach_port_name_t wake_thread_name
= (mach_port_name_t
)(args
->wake_value
);
669 wake_thread
= port_name_to_thread_for_ulock(wake_thread_name
);
670 if (wake_thread
== THREAD_NULL
) {
676 key
.ulk_pid
= p
->p_pid
;
677 key
.ulk_addr
= args
->addr
;
679 ull_t
*ull
= ull_get(&key
, ULL_MUST_EXIST
);
681 if (wake_thread
!= THREAD_NULL
) {
682 thread_deallocate(wake_thread
);
689 boolean_t clear_owner
= FALSE
; /* need to reset owner */
695 case UL_COMPARE_AND_WAIT
:
699 kprintf("[%d]%s>EINVAL opcode %d addr 0x%llx flags 0x%x\n",
700 id
, __FUNCTION__
, opcode
, (unsigned long long)(args
->addr
), flags
);
706 if (opcode
!= ull
->ull_opcode
) {
708 kprintf("[%d]%s>EDOM - opcode mismatch - opcode %d addr 0x%llx flags 0x%x\n",
709 id
, __FUNCTION__
, opcode
, (unsigned long long)(args
->addr
), flags
);
716 assert(ull
->ull_owner
== THREAD_NULL
);
719 if (flags
& ULF_WAKE_ALL
) {
720 thread_wakeup((event_t
)ull
);
721 } else if (flags
& ULF_WAKE_THREAD
) {
722 kern_return_t kr
= thread_wakeup_thread((event_t
)ull
, wake_thread
);
723 if (kr
!= KERN_SUCCESS
) {
724 assert(kr
== KERN_NOT_WAITING
);
729 * TODO: WAITQ_SELECT_MAX_PRI forces a linear scan of the (hashed) global waitq.
730 * Move to a ulock-private, priority sorted waitq to avoid that.
732 * TODO: 'owner is not current_thread (or null)' likely means we can avoid this wakeup
733 * <rdar://problem/25487001>
735 thread_wakeup_one_with_pri((event_t
)ull
, WAITQ_SELECT_MAX_PRI
);
739 * Reaching this point means I previously moved the lock to 'unowned' state in userspace.
740 * Therefore I need to relinquish my promotion.
742 * However, someone else could have locked it after I unlocked, and then had a third thread
743 * block on the lock, causing a promotion of some other owner.
745 * I don't want to stomp over that, so only remove the promotion if I'm the current owner.
748 if (ull
->ull_owner
== current_thread()) {
749 old_owner
= ull_promote_owner_locked(ull
, THREAD_NULL
);
755 if (wake_thread
!= THREAD_NULL
) {
756 thread_deallocate(wake_thread
);
759 if (old_owner
!= THREAD_NULL
) {
760 thread_deallocate(old_owner
);
764 if ((flags
& ULF_NO_ERRNO
) && (ret
!= 0)) {
772 * Change ull_owner to be new_owner, and update it with the properties
773 * of the current thread.
775 * Records the highest current promotion value in ull_promote_token, and applies that
778 * Returns +1 ref to the old ull_owner if it is going away.
781 ull_promote_owner_locked(ull_t
* ull
,
784 if (new_owner
!= THREAD_NULL
&& ull
->ull_owner
== new_owner
) {
785 thread_user_promotion_update(new_owner
, current_thread(), &ull
->ull_promote_token
);
789 thread_t old_owner
= ull
->ull_owner
;
790 ull
->ull_owner
= THREAD_NULL
;
792 if (new_owner
!= THREAD_NULL
) {
793 /* The ull_owner field now owns a +1 ref on thread */
794 thread_reference(new_owner
);
795 ull
->ull_owner
= new_owner
;
797 thread_user_promotion_add(new_owner
, current_thread(), &ull
->ull_promote_token
);
799 /* No new owner - clear the saturated promotion value */
800 ull
->ull_promote_token
= PROMOTE_TOKEN_INIT
;
803 if (old_owner
!= THREAD_NULL
) {
804 thread_user_promotion_drop(old_owner
);
807 /* Return the +1 ref from the ull_owner field */