X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/d9a64523371fa019c4575bb400cbbc3a50ac9903..c6bf4f310a33a9262d455ea4d3f0630b1255e3fe:/bsd/kern/sys_ulock.c diff --git a/bsd/kern/sys_ulock.c b/bsd/kern/sys_ulock.c index 0d8664c7e..dce4c3aec 100644 --- a/bsd/kern/sys_ulock.c +++ b/bsd/kern/sys_ulock.c @@ -2,7 +2,7 @@ * Copyright (c) 2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,10 +22,12 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#include + #include #include #include @@ -62,6 +64,7 @@ #define XNU_TEST_BITMAP #include +#include #include /* @@ -92,7 +95,7 @@ static lck_grp_t *ull_lck_grp; typedef lck_spin_t ull_lock_t; #define ull_lock_init(ull) lck_spin_init(&ull->ull_lock, ull_lck_grp, NULL) #define ull_lock_destroy(ull) lck_spin_destroy(&ull->ull_lock, ull_lck_grp) -#define ull_lock(ull) lck_spin_lock(&ull->ull_lock) +#define ull_lock(ull) lck_spin_lock_grp(&ull->ull_lock, ull_lck_grp) #define ull_unlock(ull) lck_spin_unlock(&ull->ull_lock) #define ull_assert_owned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_OWNED) #define ull_assert_notwned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_NOTOWNED) @@ -100,16 +103,44 @@ typedef lck_spin_t ull_lock_t; #define ULOCK_TO_EVENT(ull) ((event_t)ull) #define EVENT_TO_ULOCK(event) ((ull_t *)event) -typedef struct __attribute__((packed)) { - user_addr_t ulk_addr; - pid_t ulk_pid; +typedef enum { + ULK_INVALID = 0, + ULK_UADDR, + ULK_XPROC, +} ulk_type; + +typedef struct { + union { + struct __attribute__((packed)) { + user_addr_t ulk_addr; + pid_t ulk_pid; + }; + struct __attribute__((packed)) { + uint64_t ulk_object; + uint64_t ulk_offset; + }; + }; + ulk_type ulk_key_type; } ulk_t; +#define ULK_UADDR_LEN (sizeof(user_addr_t) + sizeof(pid_t)) +#define ULK_XPROC_LEN (sizeof(uint64_t) + sizeof(uint64_t)) + inline static bool ull_key_match(ulk_t *a, ulk_t *b) { - return ((a->ulk_pid == b->ulk_pid) && - (a->ulk_addr == b->ulk_addr)); + if (a->ulk_key_type != b->ulk_key_type) { + return false; + } + + if (a->ulk_key_type == ULK_UADDR) { + return (a->ulk_pid == b->ulk_pid) && + (a->ulk_addr == b->ulk_addr); + } + + assert(a->ulk_key_type == ULK_XPROC); + return (a->ulk_object == b->ulk_object) && + (a->ulk_offset == b->ulk_offset); } typedef struct ull { @@ -118,24 +149,26 @@ typedef struct ull { * i.e. it may be out of date WRT the real value in userspace. */ thread_t ull_owner; /* holds +1 thread reference */ - ulk_t ull_key; - ulk_t ull_saved_key; - ull_lock_t ull_lock; - uint ull_bucket_index; - int32_t ull_nwaiters; - int32_t ull_max_nwaiters; - int32_t ull_refcount; - uint8_t ull_opcode; + ulk_t ull_key; + ull_lock_t ull_lock; + uint ull_bucket_index; + int32_t ull_nwaiters; + int32_t ull_refcount; + uint8_t ull_opcode; struct turnstile *ull_turnstile; - queue_chain_t ull_hash_link; + queue_chain_t ull_hash_link; } ull_t; extern void ulock_initialize(void); -#define ULL_MUST_EXIST 0x0001 -static ull_t *ull_get(ulk_t *, uint32_t, ull_t **); +#define ULL_MUST_EXIST 0x0001 static void ull_put(ull_t *); +static uint32_t ulock_adaptive_spin_usecs = 20; + +SYSCTL_INT(_kern, OID_AUTO, ulock_adaptive_spin_usecs, CTLFLAG_RW | CTLFLAG_LOCKED, + &ulock_adaptive_spin_usecs, 0, "ulock adaptive spin duration"); + #if DEVELOPMENT || DEBUG static int ull_simulate_copyin_fault = 0; @@ -143,12 +176,22 @@ static void ull_dump(ull_t *ull) { kprintf("ull\t%p\n", ull); - kprintf("ull_key.ulk_pid\t%d\n", ull->ull_key.ulk_pid); - kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull->ull_key.ulk_addr)); - kprintf("ull_saved_key.ulk_pid\t%d\n", ull->ull_saved_key.ulk_pid); - kprintf("ull_saved_key.ulk_addr\t%p\n", (void *)(ull->ull_saved_key.ulk_addr)); + switch (ull->ull_key.ulk_key_type) { + case ULK_UADDR: + kprintf("ull_key.ulk_key_type\tULK_UADDR\n"); + kprintf("ull_key.ulk_pid\t%d\n", ull->ull_key.ulk_pid); + kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull->ull_key.ulk_addr)); + break; + case ULK_XPROC: + kprintf("ull_key.ulk_key_type\tULK_XPROC\n"); + kprintf("ull_key.ulk_object\t%p\n", (void *)(ull->ull_key.ulk_object)); + kprintf("ull_key.ulk_offset\t%p\n", (void *)(ull->ull_key.ulk_offset)); + break; + default: + kprintf("ull_key.ulk_key_type\tUNKNOWN %d\n", ull->ull_key.ulk_key_type); + break; + } kprintf("ull_nwaiters\t%d\n", ull->ull_nwaiters); - kprintf("ull_max_nwaiters\t%d\n", ull->ull_max_nwaiters); kprintf("ull_refcount\t%d\n", ull->ull_refcount); kprintf("ull_opcode\t%d\n\n", ull->ull_opcode); kprintf("ull_owner\t0x%llx\n\n", thread_tid(ull->ull_owner)); @@ -166,26 +209,20 @@ static ull_bucket_t *ull_bucket; static uint32_t ull_nzalloc = 0; static zone_t ull_zone; -#define ull_bucket_lock(i) lck_spin_lock(&ull_bucket[i].ulb_lock) +#define ull_bucket_lock(i) lck_spin_lock_grp(&ull_bucket[i].ulb_lock, ull_lck_grp) #define ull_bucket_unlock(i) lck_spin_unlock(&ull_bucket[i].ulb_lock) static __inline__ uint32_t -ull_hash_index(char *key, size_t length) +ull_hash_index(const void *key, size_t length) { - uint32_t hash = jenkins_hash(key, length); + uint32_t hash = os_hash_jenkins(key, length); hash &= (ull_hash_buckets - 1); return hash; } -/* Ensure that the key structure is packed, - * so that no undefined memory is passed to - * ull_hash_index() - */ -static_assert(sizeof(ulk_t) == sizeof(user_addr_t) + sizeof(pid_t)); - -#define ULL_INDEX(keyp) ull_hash_index((char *)keyp, sizeof *keyp) +#define ULL_INDEX(keyp) ull_hash_index(keyp, keyp->ulk_key_type == ULK_UADDR ? ULK_UADDR_LEN : ULK_XPROC_LEN) void ulock_initialize(void) @@ -199,7 +236,7 @@ ulock_initialize(void) ull_hash_buckets = (1 << (bit_ceiling(thread_max) - 2)); kprintf("%s>thread_max=%d, ull_hash_buckets=%d\n", __FUNCTION__, thread_max, ull_hash_buckets); - assert(ull_hash_buckets >= thread_max/4); + assert(ull_hash_buckets >= thread_max / 4); ull_bucket = (ull_bucket_t *)kalloc(sizeof(ull_bucket_t) * ull_hash_buckets); assert(ull_bucket != NULL); @@ -210,10 +247,11 @@ ulock_initialize(void) } ull_zone = zinit(sizeof(ull_t), - thread_max * sizeof(ull_t), - 0, "ulocks"); + thread_max * sizeof(ull_t), + 0, "ulocks"); zone_change(ull_zone, Z_NOENCRYPT, TRUE); + zone_change(ull_zone, Z_CACHING_ENABLED, TRUE); } #if DEVELOPMENT || DEBUG @@ -236,7 +274,7 @@ ull_hash_dump(pid_t pid) kprintf("%s>index %d:\n", __FUNCTION__, i); } qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) { - if ((pid == 0) || (pid == elem->ull_key.ulk_pid)) { + if ((pid == 0) || ((elem->ull_key.ulk_key_type == ULK_UADDR) && (pid == elem->ull_key.ulk_pid))) { ull_dump(elem); count++; } @@ -260,10 +298,8 @@ ull_alloc(ulk_t *key) ull->ull_refcount = 1; ull->ull_key = *key; - ull->ull_saved_key = *key; ull->ull_bucket_index = ULL_INDEX(key); ull->ull_nwaiters = 0; - ull->ull_max_nwaiters = 0; ull->ull_opcode = 0; ull->ull_owner = THREAD_NULL; @@ -350,7 +386,7 @@ ull_put(ull_t *ull) { ull_assert_owned(ull); int refcount = --ull->ull_refcount; - assert(refcount == 0 ? (ull->ull_key.ulk_pid == 0 && ull->ull_key.ulk_addr == 0) : 1); + assert(refcount == 0 ? (ull->ull_key.ulk_key_type == ULK_INVALID) : 1); ull_unlock(ull); if (refcount > 0) { @@ -364,6 +400,31 @@ ull_put(ull_t *ull) ull_free(ull); } +extern kern_return_t vm_map_page_info(vm_map_t map, vm_map_offset_t offset, vm_page_info_flavor_t flavor, vm_page_info_t info, mach_msg_type_number_t *count); +extern vm_map_t current_map(void); +extern boolean_t machine_thread_on_core(thread_t thread); + +static int +uaddr_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp) +{ + kern_return_t ret; + vm_page_info_basic_data_t info; + mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT; + ret = vm_map_page_info(current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count); + if (ret != KERN_SUCCESS) { + return EINVAL; + } + + if (objectp != NULL) { + *objectp = (uint64_t)info.object_id; + } + if (offsetp != NULL) { + *offsetp = (uint64_t)info.offset; + } + + return 0; +} + static void ulock_wait_continue(void *, wait_result_t); static void ulock_wait_cleanup(ull_t *, thread_t, thread_t, int32_t *); @@ -388,6 +449,24 @@ wait_result_to_return_code(wait_result_t wr) return ret; } +static int +ulock_resolve_owner(uint32_t value, thread_t *owner) +{ + mach_port_name_t owner_name = ulock_owner_value_to_port_name(value); + + *owner = port_name_to_thread(owner_name, + PORT_TO_THREAD_IN_CURRENT_TASK | + PORT_TO_THREAD_NOT_CURRENT_THREAD); + if (*owner == THREAD_NULL) { + /* + * Translation failed - even though the lock value is up to date, + * whatever was stored in the lock wasn't actually a thread port. + */ + return owner_name == MACH_PORT_DEAD ? ESRCH : EOWNERDEAD; + } + return 0; +} + int ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval) { @@ -413,29 +492,96 @@ ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval) goto munge_retval; } - boolean_t set_owner = FALSE; + bool set_owner = false; + bool xproc = false; + size_t lock_size = sizeof(uint32_t); + int copy_ret; switch (opcode) { case UL_UNFAIR_LOCK: - set_owner = TRUE; + set_owner = true; break; case UL_COMPARE_AND_WAIT: break; + case UL_COMPARE_AND_WAIT64: + lock_size = sizeof(uint64_t); + break; + case UL_COMPARE_AND_WAIT_SHARED: + xproc = true; + break; + case UL_COMPARE_AND_WAIT64_SHARED: + xproc = true; + lock_size = sizeof(uint64_t); + break; default: ret = EINVAL; goto munge_retval; } - /* 32-bit lock type for UL_COMPARE_AND_WAIT and UL_UNFAIR_LOCK */ - uint32_t value = 0; + uint64_t value = 0; - if ((args->addr == 0) || (args->addr % _Alignof(_Atomic(typeof(value))))) { + if ((args->addr == 0) || (args->addr & (lock_size - 1))) { ret = EINVAL; goto munge_retval; } - key.ulk_pid = p->p_pid; - key.ulk_addr = args->addr; + if (xproc) { + uint64_t object = 0; + uint64_t offset = 0; + + ret = uaddr_findobj(args->addr, &object, &offset); + if (ret) { + ret = EINVAL; + goto munge_retval; + } + key.ulk_key_type = ULK_XPROC; + key.ulk_object = object; + key.ulk_offset = offset; + } else { + key.ulk_key_type = ULK_UADDR; + key.ulk_pid = p->p_pid; + key.ulk_addr = args->addr; + } + + if ((flags & ULF_WAIT_ADAPTIVE_SPIN) && set_owner) { + /* + * Attempt the copyin outside of the lock once, + * + * If it doesn't match (which is common), return right away. + * + * If it matches, resolve the current owner, and if it is on core, + * spin a bit waiting for the value to change. If the owner isn't on + * core, or if the value stays stable, then go on with the regular + * blocking code. + */ + uint64_t end = 0; + uint32_t u32; + + ret = copyin_atomic32(args->addr, &u32); + if (ret || u32 != args->value) { + goto munge_retval; + } + for (;;) { + if (owner_thread == NULL && ulock_resolve_owner(u32, &owner_thread) != 0) { + break; + } + + /* owner_thread may have a +1 starting here */ + + if (!machine_thread_on_core(owner_thread)) { + break; + } + if (end == 0) { + clock_interval_to_deadline(ulock_adaptive_spin_usecs, + NSEC_PER_USEC, &end); + } else if (mach_absolute_time() > end) { + break; + } + if (copyin_atomic32_wait_if_equals(args->addr, u32) != 0) { + goto munge_retval; + } + } + } ull_t *ull = ull_get(&key, 0, &unused_ull); if (ull == NULL) { @@ -446,10 +592,6 @@ ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval) ull->ull_nwaiters++; - if (ull->ull_nwaiters > ull->ull_max_nwaiters) { - ull->ull_max_nwaiters = ull->ull_nwaiters; - } - if (ull->ull_opcode == 0) { ull->ull_opcode = opcode; } else if (ull->ull_opcode != opcode) { @@ -465,17 +607,22 @@ ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval) * holding the ull spinlock across copyin forces any * vm_fault we encounter to fail. */ - uint64_t val64; /* copyin_word always zero-extends to 64-bits */ - int copy_ret = copyin_word(args->addr, &val64, sizeof(value)); + /* copyin_atomicXX always checks alignment */ - value = (uint32_t)val64; + if (lock_size == 4) { + uint32_t u32; + copy_ret = copyin_atomic32(args->addr, &u32); + value = u32; + } else { + copy_ret = copyin_atomic64(args->addr, &value); + } #if DEVELOPMENT || DEBUG /* Occasionally simulate copyin finding the user address paged out */ if (((ull_simulate_copyin_fault == p->p_pid) || (ull_simulate_copyin_fault == 1)) && (copy_ret == 0)) { static _Atomic int fault_inject = 0; - if (__c11_atomic_fetch_add(&fault_inject, 1, __ATOMIC_RELAXED) % 73 == 0) { + if (os_atomic_inc_orig(&fault_inject, relaxed) % 73 == 0) { copy_ret = EFAULT; } } @@ -494,17 +641,17 @@ ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval) } if (set_owner) { - mach_port_name_t owner_name = ulock_owner_value_to_port_name(args->value); - owner_thread = port_name_to_thread_for_ulock(owner_name); - - /* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */ - if (owner_name != MACH_PORT_DEAD && owner_thread == THREAD_NULL) { - /* - * Translation failed - even though the lock value is up to date, - * whatever was stored in the lock wasn't actually a thread port. - */ - ret = EOWNERDEAD; - goto out_locked; + if (owner_thread == THREAD_NULL) { + ret = ulock_resolve_owner(args->value, &owner_thread); + if (ret == EOWNERDEAD) { + /* + * Translation failed - even though the lock value is up to date, + * whatever was stored in the lock wasn't actually a thread port. + */ + goto out_locked; + } + /* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */ + ret = 0; } /* owner_thread has a +1 reference */ @@ -539,7 +686,7 @@ ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval) struct turnstile *ts; ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile, - TURNSTILE_NULL, TURNSTILE_ULOCK); + TURNSTILE_NULL, TURNSTILE_ULOCK); thread_set_pending_block_hint(self, kThreadWaitUserLock); if (flags & ULF_WAIT_WORKQ_DATA_CONTENTION) { @@ -551,10 +698,10 @@ ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval) } turnstile_update_inheritor(ts, owner_thread, - (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD)); + (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD)); wr = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), - interruptible, deadline); + interruptible, deadline); ull_unlock(ull); @@ -583,10 +730,11 @@ ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval) ret = wait_result_to_return_code(wr); ull_lock(ull); - turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL); + turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK); out_locked: ulock_wait_cleanup(ull, owner_thread, old_owner, retval); + owner_thread = NULL; if (unused_ull) { ull_free(unused_ull); @@ -596,6 +744,12 @@ out_locked: assert(*retval >= 0); munge_retval: + if (owner_thread) { + thread_deallocate(owner_thread); + } + if (ret == ESTALE) { + ret = 0; + } if ((flags & ULF_NO_ERRNO) && (ret != 0)) { *retval = -ret; ret = 0; @@ -623,8 +777,7 @@ ulock_wait_cleanup(ull_t *ull, thread_t owner_thread, thread_t old_owner, int32_ old_lingering_owner = ull->ull_owner; ull->ull_owner = THREAD_NULL; - ull->ull_key.ulk_pid = 0; - ull->ull_key.ulk_addr = 0; + memset(&ull->ull_key, 0, sizeof ull->ull_key); ull->ull_refcount--; assert(ull->ull_refcount > 0); } @@ -665,7 +818,7 @@ ulock_wait_continue(void * parameter, wait_result_t wr) ret = wait_result_to_return_code(wr); ull_lock(ull); - turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL); + turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK); ulock_wait_cleanup(ull, owner_thread, old_owner, retval); @@ -687,12 +840,6 @@ ulock_wake(struct proc *p, struct ulock_wake_args *args, __unused int32_t *retva /* involved threads - each variable holds +1 ref if not null */ thread_t wake_thread = THREAD_NULL; - thread_t old_owner = THREAD_NULL; - - if ((flags & ULF_WAKE_MASK) != flags) { - ret = EINVAL; - goto munge_retval; - } #if DEVELOPMENT || DEBUG if (opcode == UL_DEBUG_HASH_DUMP_PID) { @@ -707,120 +854,159 @@ ulock_wake(struct proc *p, struct ulock_wake_args *args, __unused int32_t *retva } #endif + bool set_owner = false; + bool xproc = false; + + switch (opcode) { + case UL_UNFAIR_LOCK: + set_owner = true; + break; + case UL_COMPARE_AND_WAIT: + case UL_COMPARE_AND_WAIT64: + break; + case UL_COMPARE_AND_WAIT_SHARED: + case UL_COMPARE_AND_WAIT64_SHARED: + xproc = true; + break; + default: + ret = EINVAL; + goto munge_retval; + } + + if ((flags & ULF_WAKE_MASK) != flags) { + ret = EINVAL; + goto munge_retval; + } + + if ((flags & ULF_WAKE_THREAD) && ((flags & ULF_WAKE_ALL) || set_owner)) { + ret = EINVAL; + goto munge_retval; + } + if (args->addr == 0) { ret = EINVAL; goto munge_retval; } - if (flags & ULF_WAKE_THREAD) { - if (flags & ULF_WAKE_ALL) { + if (xproc) { + uint64_t object = 0; + uint64_t offset = 0; + + ret = uaddr_findobj(args->addr, &object, &offset); + if (ret) { ret = EINVAL; goto munge_retval; } + key.ulk_key_type = ULK_XPROC; + key.ulk_object = object; + key.ulk_offset = offset; + } else { + key.ulk_key_type = ULK_UADDR; + key.ulk_pid = p->p_pid; + key.ulk_addr = args->addr; + } + + if (flags & ULF_WAKE_THREAD) { mach_port_name_t wake_thread_name = (mach_port_name_t)(args->wake_value); - wake_thread = port_name_to_thread_for_ulock(wake_thread_name); + wake_thread = port_name_to_thread(wake_thread_name, + PORT_TO_THREAD_IN_CURRENT_TASK | + PORT_TO_THREAD_NOT_CURRENT_THREAD); if (wake_thread == THREAD_NULL) { ret = ESRCH; goto munge_retval; } } - key.ulk_pid = p->p_pid; - key.ulk_addr = args->addr; - ull_t *ull = ull_get(&key, ULL_MUST_EXIST, NULL); + thread_t new_owner = THREAD_NULL; + struct turnstile *ts = TURNSTILE_NULL; + thread_t cleanup_thread = THREAD_NULL; + if (ull == NULL) { - if (wake_thread != THREAD_NULL) { - thread_deallocate(wake_thread); - } ret = ENOENT; goto munge_retval; } /* ull is locked */ - boolean_t clear_owner = FALSE; /* need to reset owner */ - - switch (opcode) { - case UL_UNFAIR_LOCK: - clear_owner = TRUE; - break; - case UL_COMPARE_AND_WAIT: - break; - default: - ret = EINVAL; - goto out_locked; - } - if (opcode != ull->ull_opcode) { ret = EDOM; - goto out_locked; + goto out_ull_put; } - if (!clear_owner) { + if (set_owner) { + if (ull->ull_owner != current_thread()) { + /* + * If the current thread isn't the known owner, + * then this wake call was late to the party, + * and the kernel already knows who owns the lock. + * + * This current owner already knows the lock is contended + * and will redrive wakes, just bail out. + */ + goto out_ull_put; + } + } else { assert(ull->ull_owner == THREAD_NULL); } - struct turnstile *ts; ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile, - TURNSTILE_NULL, TURNSTILE_ULOCK); + TURNSTILE_NULL, TURNSTILE_ULOCK); + assert(ts != TURNSTILE_NULL); - if (flags & ULF_WAKE_ALL) { - waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), - THREAD_AWAKENED, 0); - } else if (flags & ULF_WAKE_THREAD) { - kern_return_t kr = waitq_wakeup64_thread(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), - wake_thread, THREAD_AWAKENED); + if (flags & ULF_WAKE_THREAD) { + kern_return_t kr = waitq_wakeup64_thread(&ts->ts_waitq, + CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), + wake_thread, THREAD_AWAKENED); if (kr != KERN_SUCCESS) { assert(kr == KERN_NOT_WAITING); ret = EALREADY; } - } else { + } else if (flags & ULF_WAKE_ALL) { + if (set_owner) { + turnstile_update_inheritor(ts, THREAD_NULL, + TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD); + } + waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), + THREAD_AWAKENED, 0); + } else if (set_owner) { /* - * TODO: WAITQ_SELECT_MAX_PRI forces a linear scan of the (hashed) global waitq. - * Move to a ulock-private, priority sorted waitq (i.e. SYNC_POLICY_FIXED_PRIORITY) to avoid that. - * - * TODO: 'owner is not current_thread (or null)' likely means we can avoid this wakeup - * + * The turnstile waitq is priority ordered, + * and will wake up the highest priority waiter + * and set it as the inheritor for us. */ + new_owner = waitq_wakeup64_identify(&ts->ts_waitq, + CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), + THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE); + } else { waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), - THREAD_AWAKENED, WAITQ_SELECT_MAX_PRI); + THREAD_AWAKENED, WAITQ_ALL_PRIORITIES); } - /* - * Reaching this point means I previously moved the lock to 'unowned' state in userspace. - * Therefore I need to relinquish my promotion. - * - * However, someone else could have locked it after I unlocked, and then had a third thread - * block on the lock, causing a promotion of some other owner. - * - * I don't want to stomp over that, so only remove the promotion if I'm the current owner. - */ - - if (ull->ull_owner == current_thread()) { - turnstile_update_inheritor(ts, THREAD_NULL, - (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD)); + if (set_owner) { turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD); - old_owner = ull->ull_owner; - ull->ull_owner = THREAD_NULL; + cleanup_thread = ull->ull_owner; + ull->ull_owner = new_owner; } - turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL); + turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK); -out_locked: +out_ull_put: ull_put(ull); - /* Need to be called after dropping the interlock */ - turnstile_cleanup(); - - if (wake_thread != THREAD_NULL) { - thread_deallocate(wake_thread); + if (ts != TURNSTILE_NULL) { + /* Need to be called after dropping the interlock */ + turnstile_cleanup(); } - if (old_owner != THREAD_NULL) { - thread_deallocate(old_owner); + if (cleanup_thread != THREAD_NULL) { + thread_deallocate(cleanup_thread); } munge_retval: + if (wake_thread != THREAD_NULL) { + thread_deallocate(wake_thread); + } + if ((flags & ULF_NO_ERRNO) && (ret != 0)) { *retval = -ret; ret = 0; @@ -834,14 +1020,22 @@ kdp_ulock_find_owner(__unused struct waitq * waitq, event64_t event, thread_wait ull_t *ull = EVENT_TO_ULOCK(event); assert(kdp_is_in_zone(ull, "ulocks")); - if (ull->ull_opcode == UL_UNFAIR_LOCK) {// owner is only set if it's an os_unfair_lock - waitinfo->owner = thread_tid(ull->ull_owner); + switch (ull->ull_opcode) { + case UL_UNFAIR_LOCK: + case UL_UNFAIR_LOCK64_SHARED: + waitinfo->owner = thread_tid(ull->ull_owner); waitinfo->context = ull->ull_key.ulk_addr; - } else if (ull->ull_opcode == UL_COMPARE_AND_WAIT) { // otherwise, this is a spinlock - waitinfo->owner = 0; + break; + case UL_COMPARE_AND_WAIT: + case UL_COMPARE_AND_WAIT64: + case UL_COMPARE_AND_WAIT_SHARED: + case UL_COMPARE_AND_WAIT64_SHARED: + waitinfo->owner = 0; waitinfo->context = ull->ull_key.ulk_addr; - } else { + break; + default: panic("%s: Invalid ulock opcode %d addr %p", __FUNCTION__, ull->ull_opcode, (void*)ull); + break; } return; }