X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/3903760236c30e3b5ace7a4eefac3a269d68957c..c6bf4f310a33a9262d455ea4d3f0630b1255e3fe:/bsd/kern/sys_ulock.c diff --git a/bsd/kern/sys_ulock.c b/bsd/kern/sys_ulock.c index d245fcbcc..dce4c3aec 100644 --- a/bsd/kern/sys_ulock.c +++ b/bsd/kern/sys_ulock.c @@ -2,7 +2,7 @@ * Copyright (c) 2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,10 +22,12 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#include + #include #include #include @@ -53,13 +55,16 @@ #include #include #include +#include #include +#include #include #define XNU_TEST_BITMAP #include +#include #include /* @@ -86,25 +91,56 @@ */ static lck_grp_t *ull_lck_grp; -static lck_mtx_t ull_table_lock; -#define ull_global_lock() lck_mtx_lock(&ull_table_lock) -#define ull_global_unlock() lck_mtx_unlock(&ull_table_lock) - -#define ull_lock(ull) lck_mtx_lock(&ull->ull_lock) -#define ull_unlock(ull) lck_mtx_unlock(&ull->ull_lock) -#define ull_assert_owned(ull) LCK_MTX_ASSERT(&ull->ull_lock, LCK_MTX_ASSERT_OWNED) - -typedef struct __attribute__((packed)) { - user_addr_t ulk_addr; - pid_t ulk_pid; +typedef lck_spin_t ull_lock_t; +#define ull_lock_init(ull) lck_spin_init(&ull->ull_lock, ull_lck_grp, NULL) +#define ull_lock_destroy(ull) lck_spin_destroy(&ull->ull_lock, ull_lck_grp) +#define ull_lock(ull) lck_spin_lock_grp(&ull->ull_lock, ull_lck_grp) +#define ull_unlock(ull) lck_spin_unlock(&ull->ull_lock) +#define ull_assert_owned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_OWNED) +#define ull_assert_notwned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_NOTOWNED) + +#define ULOCK_TO_EVENT(ull) ((event_t)ull) +#define EVENT_TO_ULOCK(event) ((ull_t *)event) + +typedef enum { + ULK_INVALID = 0, + ULK_UADDR, + ULK_XPROC, +} ulk_type; + +typedef struct { + union { + struct __attribute__((packed)) { + user_addr_t ulk_addr; + pid_t ulk_pid; + }; + struct __attribute__((packed)) { + uint64_t ulk_object; + uint64_t ulk_offset; + }; + }; + ulk_type ulk_key_type; } ulk_t; +#define ULK_UADDR_LEN (sizeof(user_addr_t) + sizeof(pid_t)) +#define ULK_XPROC_LEN (sizeof(uint64_t) + sizeof(uint64_t)) + inline static bool ull_key_match(ulk_t *a, ulk_t *b) { - return ((a->ulk_pid == b->ulk_pid) && - (a->ulk_addr == b->ulk_addr)); + if (a->ulk_key_type != b->ulk_key_type) { + return false; + } + + if (a->ulk_key_type == ULK_UADDR) { + return (a->ulk_pid == b->ulk_pid) && + (a->ulk_addr == b->ulk_addr); + } + + assert(a->ulk_key_type == ULK_XPROC); + return (a->ulk_object == b->ulk_object) && + (a->ulk_offset == b->ulk_offset); } typedef struct ull { @@ -113,76 +149,85 @@ typedef struct ull { * i.e. it may be out of date WRT the real value in userspace. */ thread_t ull_owner; /* holds +1 thread reference */ - ulk_t ull_key; - ulk_t ull_saved_key; - lck_mtx_t ull_lock; - int32_t ull_nwaiters; - int32_t ull_max_nwaiters; - int32_t ull_refcount; - struct promote_token ull_promote_token; - queue_chain_t ull_hash_link; - uint8_t ull_opcode; + ulk_t ull_key; + ull_lock_t ull_lock; + uint ull_bucket_index; + int32_t ull_nwaiters; + int32_t ull_refcount; + uint8_t ull_opcode; + struct turnstile *ull_turnstile; + queue_chain_t ull_hash_link; } ull_t; -static const bool ull_debug = false; - extern void ulock_initialize(void); -#define ULL_MUST_EXIST 0x0001 -static ull_t *ull_get(ulk_t *, uint32_t); +#define ULL_MUST_EXIST 0x0001 static void ull_put(ull_t *); -static thread_t ull_promote_owner_locked(ull_t* ull, thread_t thread); +static uint32_t ulock_adaptive_spin_usecs = 20; + +SYSCTL_INT(_kern, OID_AUTO, ulock_adaptive_spin_usecs, CTLFLAG_RW | CTLFLAG_LOCKED, + &ulock_adaptive_spin_usecs, 0, "ulock adaptive spin duration"); #if DEVELOPMENT || DEBUG static int ull_simulate_copyin_fault = 0; -static int ull_panic_on_corruption = 0; static void ull_dump(ull_t *ull) { kprintf("ull\t%p\n", ull); - kprintf("ull_key.ulk_pid\t%d\n", ull->ull_key.ulk_pid); - kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull->ull_key.ulk_addr)); - kprintf("ull_saved_key.ulk_pid\t%d\n", ull->ull_saved_key.ulk_pid); - kprintf("ull_saved_key.ulk_addr\t%p\n", (void *)(ull->ull_saved_key.ulk_addr)); + switch (ull->ull_key.ulk_key_type) { + case ULK_UADDR: + kprintf("ull_key.ulk_key_type\tULK_UADDR\n"); + kprintf("ull_key.ulk_pid\t%d\n", ull->ull_key.ulk_pid); + kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull->ull_key.ulk_addr)); + break; + case ULK_XPROC: + kprintf("ull_key.ulk_key_type\tULK_XPROC\n"); + kprintf("ull_key.ulk_object\t%p\n", (void *)(ull->ull_key.ulk_object)); + kprintf("ull_key.ulk_offset\t%p\n", (void *)(ull->ull_key.ulk_offset)); + break; + default: + kprintf("ull_key.ulk_key_type\tUNKNOWN %d\n", ull->ull_key.ulk_key_type); + break; + } kprintf("ull_nwaiters\t%d\n", ull->ull_nwaiters); - kprintf("ull_max_nwaiters\t%d\n", ull->ull_max_nwaiters); kprintf("ull_refcount\t%d\n", ull->ull_refcount); kprintf("ull_opcode\t%d\n\n", ull->ull_opcode); kprintf("ull_owner\t0x%llx\n\n", thread_tid(ull->ull_owner)); - kprintf("ull_promote_token\t%d, %d\n\n", ull->ull_promote_token.pt_basepri, ull->ull_promote_token.pt_qos); + kprintf("ull_turnstile\t%p\n\n", ull->ull_turnstile); } #endif +typedef struct ull_bucket { + queue_head_t ulb_head; + lck_spin_t ulb_lock; +} ull_bucket_t; + static int ull_hash_buckets; -static queue_head_t *ull_bucket; +static ull_bucket_t *ull_bucket; static uint32_t ull_nzalloc = 0; static zone_t ull_zone; +#define ull_bucket_lock(i) lck_spin_lock_grp(&ull_bucket[i].ulb_lock, ull_lck_grp) +#define ull_bucket_unlock(i) lck_spin_unlock(&ull_bucket[i].ulb_lock) + static __inline__ uint32_t -ull_hash_index(char *key, size_t length) +ull_hash_index(const void *key, size_t length) { - uint32_t hash = jenkins_hash(key, length); + uint32_t hash = os_hash_jenkins(key, length); hash &= (ull_hash_buckets - 1); return hash; } -/* Ensure that the key structure is packed, - * so that no undefined memory is passed to - * ull_hash_index() - */ -static_assert(sizeof(ulk_t) == sizeof(user_addr_t) + sizeof(pid_t)); - -#define ULL_INDEX(keyp) ull_hash_index((char *)keyp, sizeof *keyp) +#define ULL_INDEX(keyp) ull_hash_index(keyp, keyp->ulk_key_type == ULK_UADDR ? ULK_UADDR_LEN : ULK_XPROC_LEN) void ulock_initialize(void) { ull_lck_grp = lck_grp_alloc_init("ulocks", NULL); - lck_mtx_init(&ull_table_lock, ull_lck_grp, NULL); assert(thread_max > 16); /* Size ull_hash_buckets based on thread_max. @@ -191,27 +236,22 @@ ulock_initialize(void) ull_hash_buckets = (1 << (bit_ceiling(thread_max) - 2)); kprintf("%s>thread_max=%d, ull_hash_buckets=%d\n", __FUNCTION__, thread_max, ull_hash_buckets); - assert(ull_hash_buckets >= thread_max/4); + assert(ull_hash_buckets >= thread_max / 4); - ull_bucket = (queue_head_t *)kalloc(sizeof(queue_head_t) * ull_hash_buckets); + ull_bucket = (ull_bucket_t *)kalloc(sizeof(ull_bucket_t) * ull_hash_buckets); assert(ull_bucket != NULL); for (int i = 0; i < ull_hash_buckets; i++) { - queue_init(&ull_bucket[i]); + queue_init(&ull_bucket[i].ulb_head); + lck_spin_init(&ull_bucket[i].ulb_lock, ull_lck_grp, NULL); } ull_zone = zinit(sizeof(ull_t), - thread_max * sizeof(ull_t), - 0, "ulocks"); + thread_max * sizeof(ull_t), + 0, "ulocks"); zone_change(ull_zone, Z_NOENCRYPT, TRUE); - -#if DEVELOPMENT || DEBUG - if (!PE_parse_boot_argn("ulock_panic_on_corruption", - &ull_panic_on_corruption, sizeof(ull_panic_on_corruption))) { - ull_panic_on_corruption = 0; - } -#endif + zone_change(ull_zone, Z_CACHING_ENABLED, TRUE); } #if DEVELOPMENT || DEBUG @@ -222,30 +262,30 @@ static int ull_hash_dump(pid_t pid) { int count = 0; - ull_global_lock(); if (pid == 0) { kprintf("%s>total number of ull_t allocated %d\n", __FUNCTION__, ull_nzalloc); kprintf("%s>BEGIN\n", __FUNCTION__); } for (int i = 0; i < ull_hash_buckets; i++) { - if (!queue_empty(&ull_bucket[i])) { + ull_bucket_lock(i); + if (!queue_empty(&ull_bucket[i].ulb_head)) { ull_t *elem; if (pid == 0) { kprintf("%s>index %d:\n", __FUNCTION__, i); } - qe_foreach_element(elem, &ull_bucket[i], ull_hash_link) { - if ((pid == 0) || (pid == elem->ull_key.ulk_pid)) { + qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) { + if ((pid == 0) || ((elem->ull_key.ulk_key_type == ULK_UADDR) && (pid == elem->ull_key.ulk_pid))) { ull_dump(elem); count++; } } } + ull_bucket_unlock(i); } if (pid == 0) { kprintf("%s>END\n", __FUNCTION__); ull_nzalloc = 0; } - ull_global_unlock(); return count; } #endif @@ -258,15 +298,14 @@ ull_alloc(ulk_t *key) ull->ull_refcount = 1; ull->ull_key = *key; - ull->ull_saved_key = *key; + ull->ull_bucket_index = ULL_INDEX(key); ull->ull_nwaiters = 0; - ull->ull_max_nwaiters = 0; ull->ull_opcode = 0; ull->ull_owner = THREAD_NULL; - ull->ull_promote_token = PROMOTE_TOKEN_INIT; + ull->ull_turnstile = TURNSTILE_NULL; - lck_mtx_init(&ull->ull_lock, ull_lck_grp, NULL); + ull_lock_init(ull); ull_nzalloc++; return ull; @@ -276,10 +315,11 @@ static void ull_free(ull_t *ull) { assert(ull->ull_owner == THREAD_NULL); + assert(ull->ull_turnstile == TURNSTILE_NULL); - lck_mtx_assert(&ull->ull_lock, LCK_ASSERT_NOTOWNED); + ull_assert_notwned(ull); - lck_mtx_destroy(&ull->ull_lock, ull_lck_grp); + ull_lock_destroy(ull); zfree(ull_zone, ull); } @@ -287,17 +327,17 @@ ull_free(ull_t *ull) /* Finds an existing ulock structure (ull_t), or creates a new one. * If MUST_EXIST flag is set, returns NULL instead of creating a new one. * The ulock structure is returned with ull_lock locked - * - * TODO: Per-bucket lock to reduce contention on global lock */ static ull_t * -ull_get(ulk_t *key, uint32_t flags) +ull_get(ulk_t *key, uint32_t flags, ull_t **unused_ull) { ull_t *ull = NULL; uint i = ULL_INDEX(key); + ull_t *new_ull = (flags & ULL_MUST_EXIST) ? NULL : ull_alloc(key); ull_t *elem; - ull_global_lock(); - qe_foreach_element(elem, &ull_bucket[i], ull_hash_link) { + + ull_bucket_lock(i); + qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) { ull_lock(elem); if (ull_key_match(&elem->ull_key, key)) { ull = elem; @@ -309,30 +349,31 @@ ull_get(ulk_t *key, uint32_t flags) if (ull == NULL) { if (flags & ULL_MUST_EXIST) { /* Must already exist (called from wake) */ - ull_global_unlock(); + ull_bucket_unlock(i); + assert(new_ull == NULL); + assert(unused_ull == NULL); return NULL; } - /* NRG maybe drop the ull_global_lock before the kalloc, - * then take the lock and check again for a key match - * and either use the new ull_t or free it. - */ - - ull = ull_alloc(key); - - if (ull == NULL) { - ull_global_unlock(); + if (new_ull == NULL) { + /* Alloc above failed */ + ull_bucket_unlock(i); return NULL; } + ull = new_ull; ull_lock(ull); - - enqueue(&ull_bucket[i], &ull->ull_hash_link); + enqueue(&ull_bucket[i].ulb_head, &ull->ull_hash_link); + } else if (!(flags & ULL_MUST_EXIST)) { + assert(new_ull); + assert(unused_ull); + assert(*unused_ull == NULL); + *unused_ull = new_ull; } ull->ull_refcount++; - ull_global_unlock(); + ull_bucket_unlock(i); return ull; /* still locked */ } @@ -345,86 +386,204 @@ ull_put(ull_t *ull) { ull_assert_owned(ull); int refcount = --ull->ull_refcount; - assert(refcount == 0 ? (ull->ull_key.ulk_pid == 0 && ull->ull_key.ulk_addr == 0) : 1); + assert(refcount == 0 ? (ull->ull_key.ulk_key_type == ULK_INVALID) : 1); ull_unlock(ull); if (refcount > 0) { return; } - ull_global_lock(); + ull_bucket_lock(ull->ull_bucket_index); remqueue(&ull->ull_hash_link); - ull_global_unlock(); + ull_bucket_unlock(ull->ull_bucket_index); -#if DEVELOPMENT || DEBUG - if (ull_debug) { - kprintf("%s>", __FUNCTION__); - ull_dump(ull); - } -#endif ull_free(ull); } +extern kern_return_t vm_map_page_info(vm_map_t map, vm_map_offset_t offset, vm_page_info_flavor_t flavor, vm_page_info_t info, mach_msg_type_number_t *count); +extern vm_map_t current_map(void); +extern boolean_t machine_thread_on_core(thread_t thread); + +static int +uaddr_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp) +{ + kern_return_t ret; + vm_page_info_basic_data_t info; + mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT; + ret = vm_map_page_info(current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count); + if (ret != KERN_SUCCESS) { + return EINVAL; + } + + if (objectp != NULL) { + *objectp = (uint64_t)info.object_id; + } + if (offsetp != NULL) { + *offsetp = (uint64_t)info.offset; + } + + return 0; +} + +static void ulock_wait_continue(void *, wait_result_t); +static void ulock_wait_cleanup(ull_t *, thread_t, thread_t, int32_t *); + +inline static int +wait_result_to_return_code(wait_result_t wr) +{ + int ret = 0; + + switch (wr) { + case THREAD_AWAKENED: + break; + case THREAD_TIMED_OUT: + ret = ETIMEDOUT; + break; + case THREAD_INTERRUPTED: + case THREAD_RESTART: + default: + ret = EINTR; + break; + } + + return ret; +} + +static int +ulock_resolve_owner(uint32_t value, thread_t *owner) +{ + mach_port_name_t owner_name = ulock_owner_value_to_port_name(value); + + *owner = port_name_to_thread(owner_name, + PORT_TO_THREAD_IN_CURRENT_TASK | + PORT_TO_THREAD_NOT_CURRENT_THREAD); + if (*owner == THREAD_NULL) { + /* + * Translation failed - even though the lock value is up to date, + * whatever was stored in the lock wasn't actually a thread port. + */ + return owner_name == MACH_PORT_DEAD ? ESRCH : EOWNERDEAD; + } + return 0; +} + int ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval) { uint opcode = args->operation & UL_OPCODE_MASK; uint flags = args->operation & UL_FLAGS_MASK; + + if (flags & ULF_WAIT_CANCEL_POINT) { + __pthread_testcancel(1); + } + int ret = 0; thread_t self = current_thread(); - int id = thread_tid(self); ulk_t key; /* involved threads - each variable holds +1 ref if not null */ thread_t owner_thread = THREAD_NULL; thread_t old_owner = THREAD_NULL; - thread_t old_lingering_owner = THREAD_NULL; - sched_call_t workq_callback = NULL; - if (ull_debug) { - kprintf("[%d]%s>ENTER opcode %d addr %llx value %llx timeout %d flags %x\n", id, __FUNCTION__, opcode, (unsigned long long)(args->addr), args->value, args->timeout, flags); - } + ull_t *unused_ull = NULL; if ((flags & ULF_WAIT_MASK) != flags) { ret = EINVAL; goto munge_retval; } - boolean_t set_owner = FALSE; + bool set_owner = false; + bool xproc = false; + size_t lock_size = sizeof(uint32_t); + int copy_ret; switch (opcode) { case UL_UNFAIR_LOCK: - set_owner = TRUE; + set_owner = true; break; case UL_COMPARE_AND_WAIT: break; + case UL_COMPARE_AND_WAIT64: + lock_size = sizeof(uint64_t); + break; + case UL_COMPARE_AND_WAIT_SHARED: + xproc = true; + break; + case UL_COMPARE_AND_WAIT64_SHARED: + xproc = true; + lock_size = sizeof(uint64_t); + break; default: - if (ull_debug) { - kprintf("[%d]%s>EINVAL opcode %d addr 0x%llx flags 0x%x\n", - id, __FUNCTION__, opcode, - (unsigned long long)(args->addr), flags); - } ret = EINVAL; goto munge_retval; } - /* 32-bit lock type for UL_COMPARE_AND_WAIT and UL_UNFAIR_LOCK */ - uint32_t value = 0; + uint64_t value = 0; - if ((args->addr == 0) || (args->addr % _Alignof(_Atomic(typeof(value))))) { + if ((args->addr == 0) || (args->addr & (lock_size - 1))) { ret = EINVAL; goto munge_retval; } - key.ulk_pid = p->p_pid; - key.ulk_addr = args->addr; + if (xproc) { + uint64_t object = 0; + uint64_t offset = 0; - if (flags & ULF_WAIT_WORKQ_DATA_CONTENTION) { - workq_callback = workqueue_get_sched_callback(); - workq_callback = thread_disable_sched_call(self, workq_callback); + ret = uaddr_findobj(args->addr, &object, &offset); + if (ret) { + ret = EINVAL; + goto munge_retval; + } + key.ulk_key_type = ULK_XPROC; + key.ulk_object = object; + key.ulk_offset = offset; + } else { + key.ulk_key_type = ULK_UADDR; + key.ulk_pid = p->p_pid; + key.ulk_addr = args->addr; + } + + if ((flags & ULF_WAIT_ADAPTIVE_SPIN) && set_owner) { + /* + * Attempt the copyin outside of the lock once, + * + * If it doesn't match (which is common), return right away. + * + * If it matches, resolve the current owner, and if it is on core, + * spin a bit waiting for the value to change. If the owner isn't on + * core, or if the value stays stable, then go on with the regular + * blocking code. + */ + uint64_t end = 0; + uint32_t u32; + + ret = copyin_atomic32(args->addr, &u32); + if (ret || u32 != args->value) { + goto munge_retval; + } + for (;;) { + if (owner_thread == NULL && ulock_resolve_owner(u32, &owner_thread) != 0) { + break; + } + + /* owner_thread may have a +1 starting here */ + + if (!machine_thread_on_core(owner_thread)) { + break; + } + if (end == 0) { + clock_interval_to_deadline(ulock_adaptive_spin_usecs, + NSEC_PER_USEC, &end); + } else if (mach_absolute_time() > end) { + break; + } + if (copyin_atomic32_wait_if_equals(args->addr, u32) != 0) { + goto munge_retval; + } + } } - ull_t *ull = ull_get(&key, 0); + ull_t *ull = ull_get(&key, 0, &unused_ull); if (ull == NULL) { ret = ENOMEM; goto munge_retval; @@ -433,16 +592,11 @@ ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval) ull->ull_nwaiters++; - if (ull->ull_nwaiters > ull->ull_max_nwaiters) { - ull->ull_max_nwaiters = ull->ull_nwaiters; - } - if (ull->ull_opcode == 0) { ull->ull_opcode = opcode; } else if (ull->ull_opcode != opcode) { - ull_unlock(ull); ret = EDOM; - goto out; + goto out_locked; } /* @@ -450,70 +604,54 @@ ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval) * but we have to read the userspace value under the ull lock for correctness. * * Until exists, - * fake it by disabling preemption across copyin, which forces any + * holding the ull spinlock across copyin forces any * vm_fault we encounter to fail. */ - uint64_t val64; /* copyin_word always zero-extends to 64-bits */ - disable_preemption(); - int copy_ret = copyin_word(args->addr, &val64, sizeof(value)); - enable_preemption(); + /* copyin_atomicXX always checks alignment */ - value = (uint32_t)val64; + if (lock_size == 4) { + uint32_t u32; + copy_ret = copyin_atomic32(args->addr, &u32); + value = u32; + } else { + copy_ret = copyin_atomic64(args->addr, &value); + } #if DEVELOPMENT || DEBUG /* Occasionally simulate copyin finding the user address paged out */ if (((ull_simulate_copyin_fault == p->p_pid) || (ull_simulate_copyin_fault == 1)) && (copy_ret == 0)) { static _Atomic int fault_inject = 0; - if (__c11_atomic_fetch_add(&fault_inject, 1, __ATOMIC_RELAXED) % 73 == 0) { + if (os_atomic_inc_orig(&fault_inject, relaxed) % 73 == 0) { copy_ret = EFAULT; } } #endif if (copy_ret != 0) { - ull_unlock(ull); - /* copyin() will return an error if the access to the user addr would have faulted, * so just return and let the user level code fault it in. */ ret = copy_ret; - goto out; + goto out_locked; } if (value != args->value) { /* Lock value has changed from expected so bail out */ - ull_unlock(ull); - if (ull_debug) { - kprintf("[%d]%s>Lock value %d has changed from expected %d so bail out\n", - id, __FUNCTION__, value, (uint32_t)(args->value)); - } - goto out; + goto out_locked; } if (set_owner) { - mach_port_name_t owner_name = ulock_owner_value_to_port_name(args->value); - owner_thread = port_name_to_thread_for_ulock(owner_name); - - /* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */ - if (owner_name != MACH_PORT_DEAD && owner_thread == THREAD_NULL) { -#if DEBUG || DEVELOPMENT - if (ull_panic_on_corruption) { - if (flags & ULF_NO_ERRNO) { - // ULF_NO_ERRNO is used by libplatform ulocks, but not libdispatch ones. - // Don't panic on libdispatch ulock corruptions; the userspace likely - // mismanaged a dispatch queue. - panic("ulock_wait: ulock is corrupted; value=0x%x, ull=%p", - (uint32_t)(args->value), ull); - } + if (owner_thread == THREAD_NULL) { + ret = ulock_resolve_owner(args->value, &owner_thread); + if (ret == EOWNERDEAD) { + /* + * Translation failed - even though the lock value is up to date, + * whatever was stored in the lock wasn't actually a thread port. + */ + goto out_locked; } -#endif - /* - * Translation failed - even though the lock value is up to date, - * whatever was stored in the lock wasn't actually a thread port. - */ - ull_unlock(ull); - ret = EOWNERDEAD; - goto out; + /* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */ + ret = 0; } /* owner_thread has a +1 reference */ @@ -526,53 +664,109 @@ ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval) * and is heading toward the kernel to call ull_wake. * If so, it's going to have to wait for the ull mutex. * - * Therefore, I can promote its priority to match mine, and I can rely on it to - * come by later to issue the wakeup and lose its promotion. + * Therefore, I can ask the turnstile to promote its priority, and I can rely + * on it to come by later to issue the wakeup and lose its promotion. */ - old_owner = ull_promote_owner_locked(ull, owner_thread); + /* Return the +1 ref from the ull_owner field */ + old_owner = ull->ull_owner; + ull->ull_owner = THREAD_NULL; + + if (owner_thread != THREAD_NULL) { + /* The ull_owner field now owns a +1 ref on owner_thread */ + thread_reference(owner_thread); + ull->ull_owner = owner_thread; + } } wait_result_t wr; uint32_t timeout = args->timeout; + uint64_t deadline = TIMEOUT_WAIT_FOREVER; + wait_interrupt_t interruptible = THREAD_ABORTSAFE; + struct turnstile *ts; + + ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile, + TURNSTILE_NULL, TURNSTILE_ULOCK); + thread_set_pending_block_hint(self, kThreadWaitUserLock); + + if (flags & ULF_WAIT_WORKQ_DATA_CONTENTION) { + interruptible |= THREAD_WAIT_NOREPORT; + } + if (timeout) { - wr = assert_wait_timeout((event_t)ull, THREAD_ABORTSAFE, timeout, NSEC_PER_USEC); - } else { - wr = assert_wait((event_t)ull, THREAD_ABORTSAFE); + clock_interval_to_deadline(timeout, NSEC_PER_USEC, &deadline); } + turnstile_update_inheritor(ts, owner_thread, + (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD)); + + wr = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), + interruptible, deadline); + ull_unlock(ull); - if (ull_debug) { - kprintf("[%d]%s>after assert_wait() returned %d\n", id, __FUNCTION__, wr); + if (unused_ull) { + ull_free(unused_ull); + unused_ull = NULL; } - if (set_owner && owner_thread != THREAD_NULL && wr == THREAD_WAITING) { - wr = thread_handoff(owner_thread); - /* owner_thread ref is consumed */ - owner_thread = THREAD_NULL; - } else { - /* NRG At some point this should be a continuation based block, so that we can avoid saving the full kernel context. */ - wr = thread_block(NULL); + turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD); + + if (wr == THREAD_WAITING) { + uthread_t uthread = (uthread_t)get_bsdthread_info(self); + uthread->uu_save.uus_ulock_wait_data.retval = retval; + uthread->uu_save.uus_ulock_wait_data.flags = flags; + uthread->uu_save.uus_ulock_wait_data.owner_thread = owner_thread; + uthread->uu_save.uus_ulock_wait_data.old_owner = old_owner; + if (set_owner && owner_thread != THREAD_NULL) { + thread_handoff_parameter(owner_thread, ulock_wait_continue, ull); + } else { + assert(owner_thread == THREAD_NULL); + thread_block_parameter(ulock_wait_continue, ull); + } + /* NOT REACHED */ } - if (ull_debug) { - kprintf("[%d]%s>thread_block() returned %d\n", id, __FUNCTION__, wr); + + ret = wait_result_to_return_code(wr); + + ull_lock(ull); + turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK); + +out_locked: + ulock_wait_cleanup(ull, owner_thread, old_owner, retval); + owner_thread = NULL; + + if (unused_ull) { + ull_free(unused_ull); + unused_ull = NULL; } - switch (wr) { - case THREAD_AWAKENED: - break; - case THREAD_TIMED_OUT: - ret = ETIMEDOUT; - break; - case THREAD_INTERRUPTED: - case THREAD_RESTART: - default: - ret = EINTR; - break; + + assert(*retval >= 0); + +munge_retval: + if (owner_thread) { + thread_deallocate(owner_thread); + } + if (ret == ESTALE) { + ret = 0; + } + if ((flags & ULF_NO_ERRNO) && (ret != 0)) { + *retval = -ret; + ret = 0; } + return ret; +} + +/* + * Must be called with ull_lock held + */ +static void +ulock_wait_cleanup(ull_t *ull, thread_t owner_thread, thread_t old_owner, int32_t *retval) +{ + ull_assert_owned(ull); + + thread_t old_lingering_owner = THREAD_NULL; -out: - ull_lock(ull); *retval = --ull->ull_nwaiters; if (ull->ull_nwaiters == 0) { /* @@ -580,19 +774,18 @@ out: * clear out the lingering owner reference before * freeing the ull. */ - if (ull->ull_owner != THREAD_NULL) { - old_lingering_owner = ull_promote_owner_locked(ull, THREAD_NULL); - } + old_lingering_owner = ull->ull_owner; + ull->ull_owner = THREAD_NULL; - assert(ull->ull_owner == THREAD_NULL); - - ull->ull_key.ulk_pid = 0; - ull->ull_key.ulk_addr = 0; + memset(&ull->ull_key, 0, sizeof ull->ull_key); ull->ull_refcount--; assert(ull->ull_refcount > 0); } ull_put(ull); + /* Need to be called after dropping the interlock */ + turnstile_cleanup(); + if (owner_thread != THREAD_NULL) { thread_deallocate(owner_thread); } @@ -606,17 +799,35 @@ out: } assert(*retval >= 0); +} -munge_retval: - if (workq_callback) { - thread_reenable_sched_call(self, workq_callback); - } +__attribute__((noreturn)) +static void +ulock_wait_continue(void * parameter, wait_result_t wr) +{ + thread_t self = current_thread(); + uthread_t uthread = (uthread_t)get_bsdthread_info(self); + int ret = 0; + + ull_t *ull = (ull_t *)parameter; + int32_t *retval = uthread->uu_save.uus_ulock_wait_data.retval; + uint flags = uthread->uu_save.uus_ulock_wait_data.flags; + thread_t owner_thread = uthread->uu_save.uus_ulock_wait_data.owner_thread; + thread_t old_owner = uthread->uu_save.uus_ulock_wait_data.old_owner; + + ret = wait_result_to_return_code(wr); + + ull_lock(ull); + turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK); + + ulock_wait_cleanup(ull, owner_thread, old_owner, retval); if ((flags & ULF_NO_ERRNO) && (ret != 0)) { *retval = -ret; ret = 0; } - return ret; + + unix_syscall_return(ret); } int @@ -625,22 +836,10 @@ ulock_wake(struct proc *p, struct ulock_wake_args *args, __unused int32_t *retva uint opcode = args->operation & UL_OPCODE_MASK; uint flags = args->operation & UL_FLAGS_MASK; int ret = 0; - int id = thread_tid(current_thread()); ulk_t key; /* involved threads - each variable holds +1 ref if not null */ thread_t wake_thread = THREAD_NULL; - thread_t old_owner = THREAD_NULL; - - if (ull_debug) { - kprintf("[%d]%s>ENTER opcode %d addr %llx flags %x\n", - id, __FUNCTION__, opcode, (unsigned long long)(args->addr), flags); - } - - if ((flags & ULF_WAKE_MASK) != flags) { - ret = EINVAL; - goto munge_retval; - } #if DEVELOPMENT || DEBUG if (opcode == UL_DEBUG_HASH_DUMP_PID) { @@ -655,112 +854,159 @@ ulock_wake(struct proc *p, struct ulock_wake_args *args, __unused int32_t *retva } #endif + bool set_owner = false; + bool xproc = false; + + switch (opcode) { + case UL_UNFAIR_LOCK: + set_owner = true; + break; + case UL_COMPARE_AND_WAIT: + case UL_COMPARE_AND_WAIT64: + break; + case UL_COMPARE_AND_WAIT_SHARED: + case UL_COMPARE_AND_WAIT64_SHARED: + xproc = true; + break; + default: + ret = EINVAL; + goto munge_retval; + } + + if ((flags & ULF_WAKE_MASK) != flags) { + ret = EINVAL; + goto munge_retval; + } + + if ((flags & ULF_WAKE_THREAD) && ((flags & ULF_WAKE_ALL) || set_owner)) { + ret = EINVAL; + goto munge_retval; + } + if (args->addr == 0) { ret = EINVAL; goto munge_retval; } - if (flags & ULF_WAKE_THREAD) { - if (flags & ULF_WAKE_ALL) { + if (xproc) { + uint64_t object = 0; + uint64_t offset = 0; + + ret = uaddr_findobj(args->addr, &object, &offset); + if (ret) { ret = EINVAL; goto munge_retval; } + key.ulk_key_type = ULK_XPROC; + key.ulk_object = object; + key.ulk_offset = offset; + } else { + key.ulk_key_type = ULK_UADDR; + key.ulk_pid = p->p_pid; + key.ulk_addr = args->addr; + } + + if (flags & ULF_WAKE_THREAD) { mach_port_name_t wake_thread_name = (mach_port_name_t)(args->wake_value); - wake_thread = port_name_to_thread_for_ulock(wake_thread_name); + wake_thread = port_name_to_thread(wake_thread_name, + PORT_TO_THREAD_IN_CURRENT_TASK | + PORT_TO_THREAD_NOT_CURRENT_THREAD); if (wake_thread == THREAD_NULL) { ret = ESRCH; goto munge_retval; } } - key.ulk_pid = p->p_pid; - key.ulk_addr = args->addr; + ull_t *ull = ull_get(&key, ULL_MUST_EXIST, NULL); + thread_t new_owner = THREAD_NULL; + struct turnstile *ts = TURNSTILE_NULL; + thread_t cleanup_thread = THREAD_NULL; - ull_t *ull = ull_get(&key, ULL_MUST_EXIST); if (ull == NULL) { - if (wake_thread != THREAD_NULL) { - thread_deallocate(wake_thread); - } ret = ENOENT; goto munge_retval; } /* ull is locked */ - boolean_t clear_owner = FALSE; /* need to reset owner */ - - switch (opcode) { - case UL_UNFAIR_LOCK: - clear_owner = TRUE; - break; - case UL_COMPARE_AND_WAIT: - break; - default: - if (ull_debug) { - kprintf("[%d]%s>EINVAL opcode %d addr 0x%llx flags 0x%x\n", - id, __FUNCTION__, opcode, (unsigned long long)(args->addr), flags); - } - ret = EINVAL; - goto out_locked; - } - if (opcode != ull->ull_opcode) { - if (ull_debug) { - kprintf("[%d]%s>EDOM - opcode mismatch - opcode %d addr 0x%llx flags 0x%x\n", - id, __FUNCTION__, opcode, (unsigned long long)(args->addr), flags); - } ret = EDOM; - goto out_locked; + goto out_ull_put; } - if (!clear_owner) { + if (set_owner) { + if (ull->ull_owner != current_thread()) { + /* + * If the current thread isn't the known owner, + * then this wake call was late to the party, + * and the kernel already knows who owns the lock. + * + * This current owner already knows the lock is contended + * and will redrive wakes, just bail out. + */ + goto out_ull_put; + } + } else { assert(ull->ull_owner == THREAD_NULL); } - if (flags & ULF_WAKE_ALL) { - thread_wakeup((event_t)ull); - } else if (flags & ULF_WAKE_THREAD) { - kern_return_t kr = thread_wakeup_thread((event_t)ull, wake_thread); + ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile, + TURNSTILE_NULL, TURNSTILE_ULOCK); + assert(ts != TURNSTILE_NULL); + + if (flags & ULF_WAKE_THREAD) { + kern_return_t kr = waitq_wakeup64_thread(&ts->ts_waitq, + CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), + wake_thread, THREAD_AWAKENED); if (kr != KERN_SUCCESS) { assert(kr == KERN_NOT_WAITING); ret = EALREADY; } - } else { + } else if (flags & ULF_WAKE_ALL) { + if (set_owner) { + turnstile_update_inheritor(ts, THREAD_NULL, + TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD); + } + waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), + THREAD_AWAKENED, 0); + } else if (set_owner) { /* - * TODO: WAITQ_SELECT_MAX_PRI forces a linear scan of the (hashed) global waitq. - * Move to a ulock-private, priority sorted waitq to avoid that. - * - * TODO: 'owner is not current_thread (or null)' likely means we can avoid this wakeup - * + * The turnstile waitq is priority ordered, + * and will wake up the highest priority waiter + * and set it as the inheritor for us. */ - thread_wakeup_one_with_pri((event_t)ull, WAITQ_SELECT_MAX_PRI); + new_owner = waitq_wakeup64_identify(&ts->ts_waitq, + CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), + THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE); + } else { + waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), + THREAD_AWAKENED, WAITQ_ALL_PRIORITIES); } - /* - * Reaching this point means I previously moved the lock to 'unowned' state in userspace. - * Therefore I need to relinquish my promotion. - * - * However, someone else could have locked it after I unlocked, and then had a third thread - * block on the lock, causing a promotion of some other owner. - * - * I don't want to stomp over that, so only remove the promotion if I'm the current owner. - */ - - if (ull->ull_owner == current_thread()) { - old_owner = ull_promote_owner_locked(ull, THREAD_NULL); + if (set_owner) { + turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD); + cleanup_thread = ull->ull_owner; + ull->ull_owner = new_owner; } -out_locked: + turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK); + +out_ull_put: ull_put(ull); - if (wake_thread != THREAD_NULL) { - thread_deallocate(wake_thread); + if (ts != TURNSTILE_NULL) { + /* Need to be called after dropping the interlock */ + turnstile_cleanup(); } - if (old_owner != THREAD_NULL) { - thread_deallocate(old_owner); + if (cleanup_thread != THREAD_NULL) { + thread_deallocate(cleanup_thread); } munge_retval: + if (wake_thread != THREAD_NULL) { + thread_deallocate(wake_thread); + } + if ((flags & ULF_NO_ERRNO) && (ret != 0)) { *retval = -ret; ret = 0; @@ -768,43 +1014,28 @@ munge_retval: return ret; } -/* - * Change ull_owner to be new_owner, and update it with the properties - * of the current thread. - * - * Records the highest current promotion value in ull_promote_token, and applies that - * to any new owner. - * - * Returns +1 ref to the old ull_owner if it is going away. - */ -static thread_t -ull_promote_owner_locked(ull_t* ull, - thread_t new_owner) +void +kdp_ulock_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo) { - if (new_owner != THREAD_NULL && ull->ull_owner == new_owner) { - thread_user_promotion_update(new_owner, current_thread(), &ull->ull_promote_token); - return THREAD_NULL; - } - - thread_t old_owner = ull->ull_owner; - ull->ull_owner = THREAD_NULL; - - if (new_owner != THREAD_NULL) { - /* The ull_owner field now owns a +1 ref on thread */ - thread_reference(new_owner); - ull->ull_owner = new_owner; - - thread_user_promotion_add(new_owner, current_thread(), &ull->ull_promote_token); - } else { - /* No new owner - clear the saturated promotion value */ - ull->ull_promote_token = PROMOTE_TOKEN_INIT; - } + ull_t *ull = EVENT_TO_ULOCK(event); + assert(kdp_is_in_zone(ull, "ulocks")); - if (old_owner != THREAD_NULL) { - thread_user_promotion_drop(old_owner); + switch (ull->ull_opcode) { + case UL_UNFAIR_LOCK: + case UL_UNFAIR_LOCK64_SHARED: + waitinfo->owner = thread_tid(ull->ull_owner); + waitinfo->context = ull->ull_key.ulk_addr; + break; + case UL_COMPARE_AND_WAIT: + case UL_COMPARE_AND_WAIT64: + case UL_COMPARE_AND_WAIT_SHARED: + case UL_COMPARE_AND_WAIT64_SHARED: + waitinfo->owner = 0; + waitinfo->context = ull->ull_key.ulk_addr; + break; + default: + panic("%s: Invalid ulock opcode %d addr %p", __FUNCTION__, ull->ull_opcode, (void*)ull); + break; } - - /* Return the +1 ref from the ull_owner field */ - return old_owner; + return; } -